1#ifndef TMVA_SOFIE_SOFIE_COMMON
2#define TMVA_SOFIE_SOFIE_COMMON
29 UNDEFINED = 0,
FLOAT = 1,
UINT8 = 2,
INT8 = 3,
UINT16 = 4,
INT16 = 5,
INT32 = 6,
INT64 = 7,
STRING = 8, BOOL = 9,
99 return !(*
this == rhs);
129 static bool IsDim() {
return true; }
133 static bool IsDim() {
return false; }
141 static const std::string
Name() {
return "float"; }
145 static const std::string
Name() {
return "double"; }
149 static const std::string
Name() {
return "int64_t"; }
153 static const std::string
Name() {
return "int32_t"; }
157 static const std::string
Name() {
return "uint32_t"; }
161 static const std::string
Name() {
return "uint64_t"; }
165 static const std::string
Name() {
return "bool"; }
169 static const std::string
Name() {
return "int8_t"; }
173 static const std::string
Name() {
return "uint8_t"; }
182 throw std::invalid_argument(
"New size exceeds available tensor size.");
217 std::stringstream
ret;
218 if (std::is_floating_point_v<T>)
219 ret << std::setprecision(std::numeric_limits<T>::max_digits10);
228 std::stringstream
ret;
230 for (
size_t i = 0; i < std::min(
n,maxprint); i++) {
231 if (std::is_floating_point_v<T>)
232 ret << std::setprecision(std::numeric_limits<T>::max_digits10) << data[i];
237 if (i <
n-1)
ret <<
", ";
238 if (i <
n-1 && i == maxprint-1)
ret <<
"..... ";
274 template <
class T =
void>
277 return static_cast<T
const *
>(
fData.get());
285 for (std::size_t item :
fShape) {
286 fSize *=
static_cast<int>(item);
348std::string
Clean_name(std::string input_tensor_name);
351bool AreSameShape(
const std::vector<size_t>&,
const std::vector<size_t>&);
352bool AreSameShape(
const std::vector<size_t>&,
const std::vector<Dim>&);
353bool AreSameShape(
const std::vector<Dim>&,
const std::vector<Dim>&);
369T*
BroadcastConvBias(
const T* data,
const size_t channel,
const std::vector<size_t>& targetShape) {
370 size_t size = targetShape.size();
371 if (targetShape[1] != channel) {
372 std::stringstream ss;
373 ss <<
"TMVA::SOFIE - Error broadcasting Conv Bias of shape {";
374 ss << std::to_string(channel);
378 std::runtime_error(ss.str());
382 T* newData =
new T[targetLength];
384 if (targetLength == channel) {
385 std::copy(data, data + channel, newData);
391 for (
size_t i = 2; i <
size; i++)
392 cStride *= targetShape[i];
395 for (
size_t i = 0; i < channel; i++) {
396 std::fill(newData + i * cStride, newData + (i + 1) * cStride, data[i]);
399 size_t batch = targetShape[0];
400 size_t bStride = channel * cStride;
401 for (
size_t i = 1; i < batch; i++) {
402 std::copy(newData, newData + bStride, newData + i * bStride);
410template<
typename T,
class ConstContT = std::span<const T>>
411void BroadcastTensor(ConstContT data,
const std::vector<size_t>& shape,
const std::vector<size_t>& targetShape, T *broadcastedData) {
413 size_t size = shape.size();
415 size_t curLength = data.size();
417 if (
size > 1 && shape.front() == targetShape.front() && shape.back() == 1) {
418 size_t bsize = targetShape.back();
420 for (
int k =
int(
size)-2; k >=0; k--) {
421 if (shape[k] != 1)
break;
422 bsize *= targetShape[k];
424 for (
size_t i = 0; i < curLength; i++) {
425 std::fill(broadcastedData + i*bsize, broadcastedData + (i+1)*bsize , data[i]);
430 std::copy(data.begin(), data.end(), broadcastedData);
436 for (
size_t idx = 0; idx <
size; idx++) {
437 size_t dim = shape[idx];
438 size_t targetDim = targetShape[idx];
439 if (dim == 1 && targetDim > 1) {
441 size_t newLength = curLength * targetDim;
443 size_t arrayLength = curLength / arrayNum;
445 if (arrayLength > 1) {
447 for (
size_t arrayIdx = 0; arrayIdx < arrayNum; arrayIdx++) {
448 for (
size_t targetIdx = 0; targetIdx < targetDim; targetIdx++) {
449 size_t offset = arrayIdx * arrayLength * targetDim + targetIdx * arrayLength;
450 std::copy(broadcastedData + arrayIdx * arrayLength,
451 broadcastedData + (arrayIdx + 1) * arrayLength,
452 newData.begin() + offset);
457 for (
size_t arrayIdx = 0; arrayIdx < arrayNum; arrayIdx++) {
458 std::fill(newData.begin() + arrayIdx * targetDim,
459 newData.begin() + (arrayIdx + 1) * targetDim, broadcastedData[arrayIdx]);
463 curLength = newLength;
465 std::copy(newData.begin(), newData.begin() + newLength, broadcastedData);
468 arrayNum *= targetDim;
474T*
CreateBroadcastTensor(
const T* data,
const std::vector<size_t>& shape,
const std::vector<size_t>& targetShape,
size_t targetLength) {
476 T* broadcastedData =
new T[targetLength];
479 return broadcastedData;
486 if (shape.size() < targetShape.size()) {
487 size_t targetSize = targetShape.size();
488 std::vector<size_t> newShape(targetSize, 1);
489 size_t offset = targetSize - shape.size();
490 std::copy(shape.begin(), shape.end(), newShape.begin() + offset);
498void UnidirectionalBroadcast(
const T* data,
const std::vector<size_t>& shape,
const std::vector<size_t>& targetShape, T *broadcastedData) {
500 std::span<T> inData(
const_cast<T*
>(data), curLength);
502 if (shape.size() < targetShape.size()) {
503 size_t targetSize = targetShape.size();
504 std::vector<size_t> newShape(targetSize, 1);
505 size_t offset = targetSize - shape.size();
506 std::copy(shape.begin(), shape.end(), newShape.begin() + offset);
519 return static_cast<unsigned>(
a) <
static_cast<unsigned>(
b);
543void Im2col(
const T *data_im,
const int channels,
const int height,
const int width,
const int kernel_h,
544 const int kernel_w,
const int pad_h,
const int pad_w,
const int stride_h,
const int stride_w,
545 const int dilation_h,
const int dilation_w, T *data_col)
547 const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
548 const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
549 const int channel_size = height * width;
550 for (
int channel = channels; channel--; data_im += channel_size) {
551 for (
int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
552 for (
int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
553 int input_row = -pad_h + kernel_row * dilation_h;
554 for (
int output_rows = output_h; output_rows; output_rows--) {
556 for (
int output_cols = output_w; output_cols; output_cols--) {
560 int input_col = -pad_w + kernel_col * dilation_w;
561 for (
int output_col = output_w; output_col; output_col--) {
563 *(data_col++) = data_im[input_row * width + input_col];
567 input_col += stride_w;
570 input_row += stride_h;
580 const int depth,
const int height,
const int width,
581 const int kernel_d,
const int kernel_h,
const int kernel_w,
582 const int pad_d,
const int pad_h,
const int pad_w,
583 const int stride_d,
const int stride_h,
const int stride_w,
584 const int dilation_d,
const int dilation_h,
const int dilation_w, T *data_col)
586 const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
587 const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
588 const int output_d = (depth + 2 * pad_d - (dilation_d * (kernel_d - 1) + 1)) / stride_d + 1;
589 const int channel_size = height * width * depth;
591 for (
int channel = channels; channel--; data_im += channel_size) {
592 for (
int kernel_depth = 0; kernel_depth < kernel_d; kernel_depth++) {
593 for (
int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
594 for (
int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
595 int input_dep = -pad_d + kernel_depth * dilation_d;
596 for (
int output_dep = output_d; output_dep; output_dep--) {
598 for (
int output_rows = output_h; output_rows; output_rows--) {
599 for (
int output_cols = output_w; output_cols; output_cols--) {
604 int input_row = -pad_h + kernel_row * dilation_h;
605 for (
int output_rows = output_h; output_rows; output_rows--) {
607 for (
int output_cols = output_w; output_cols; output_cols--) {
611 int input_col = -pad_w + kernel_col * dilation_w;
612 for (
int output_col = output_w; output_col; output_col--) {
614 *(data_col++) = data_im[input_dep * width * height + input_row * width + input_col];
618 input_col += stride_w;
621 input_row += stride_h;
624 input_dep += stride_d;
632template <
typename Dtype>
633void col2im(
const Dtype* data_col,
const int channels,
634 const int height,
const int width,
const int kernel_h,
const int kernel_w,
635 const int pad_h,
const int pad_w,
636 const int stride_h,
const int stride_w,
637 const int dilation_h,
const int dilation_w,
640 std::fill(data_im, data_im + height * width * channels, 0.);
644 const int output_h = (height + 2 * pad_h -
645 (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
646 const int output_w = (width + 2 * pad_w -
647 (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
648 const int channel_size = height * width;
649 for (
int channel = channels; channel--; data_im += channel_size) {
650 for (
int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
651 for (
int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
652 int input_row = -pad_h + kernel_row * dilation_h;
653 for (
int output_rows = output_h; output_rows; output_rows--) {
655 data_col += output_w;
657 int input_col = -pad_w + kernel_col * dilation_w;
658 for (
int output_col = output_w; output_col; output_col--) {
665 data_im[input_row * width + input_col] += *data_col;
668 input_col += stride_w;
671 input_row += stride_h;
682extern "C" void sgemm_(
const char * transa,
const char * transb,
const int *
m,
const int *
n,
const int * k,
683 const float * alpha,
const float * A,
const int * lda,
const float * B,
const int * ldb,
684 const float * beta,
float * C,
const int * ldc);
706 throw std::runtime_error(
"TMVA RTensor Concatenate - tensors have different memory layout");
707 auto & shape1 =
t1.GetShape();
709 if (
t1.GetSize()/shape1[axis] != t2.
GetSize()/shape2[axis]) {
710 std::cout <<
"axis " << axis <<
" sizes " <<
t1.GetSize() <<
" " << t2.
GetSize() <<
" ";
713 throw std::runtime_error(
"TMVA RTensor Concatenate - tensors have incompatible shapes");
715 std::vector<size_t> outShape = shape1;
716 outShape[axis] = shape1[axis] + shape2[axis];
718 if (
t1.GetMemoryLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
719 throw std::runtime_error(
"TMVA RTensor Concatenate is not yet supported for column major tensors");
722 auto & stride1 =
t1.GetStrides();
726 size_t s1 = (axis > 0) ? stride1[axis-1] :
t1.GetSize();
727 size_t s2 = (axis > 0) ? stride2[axis-1] : t2.
GetSize();
728 size_t sout = (axis > 0) ? outStride[axis-1] : tout.
GetSize();
729 size_t nb =
t1.GetSize()/
s1;
730 for (
size_t i = 0; i < nb; i++) {
731 std::copy(
t1.GetData() + i*
s1,
t1.GetData() + (i+1)*
s1, tout.
GetData() + i * sout );
755 std::copy(data.node_data.GetData(), data.node_data.GetData()+ data.node_data.GetSize(), out.
node_data.
GetData());
756 std::copy(data.edge_data.GetData(), data.edge_data.GetData()+ data.edge_data.GetSize(), out.
edge_data.
GetData());
757 std::copy(data.global_data.GetData(), data.global_data.GetData()+ data.global_data.GetSize(), out.
global_data.
GetData());
758 std::copy(data.edge_index.GetData(), data.edge_index.GetData()+ data.edge_index.GetSize(), out.
edge_index.
GetData());
762inline void Gemm_Call(
float *output,
bool transa,
bool transb,
int m,
int n,
int k,
float alpha,
const float *A,
763 const float *B,
float beta,
const float *C)
767 const int *lda = transa ? &k : &
m;
768 const int *ldb = transb ? &
n : &k;
771 std::copy(C, C +
m *
n, output);
773 TMVA::Experimental::SOFIE::BLAS::sgemm_(transa ? &ct : &cn, transb ? &ct : &cn, &
m, &
n, &k, &alpha, A, lda, B, ldb,
777inline void Fill(
float *output,
float value,
int size)
779 std::fill(output, output +
size, value);
782inline void Copy(
float *output,
float const *input,
int size)
784 std::copy(input, input +
size, output);
787inline void Relu(
float *output,
float const *input,
int size)
789 for (
int i = 0; i <
size; i++) {
790 output[i] = (input[i] > 0.0f) ? input[i] : 0.0f;
795 if (s ==
"inf")
return std::numeric_limits<float>::infinity();
796 if (s ==
"-inf")
return -std::numeric_limits<float>::infinity();
797 if (s ==
"nan")
return std::numeric_limits<float>::quiet_NaN();
802void ReadTensorFromStream(std::istream &is, T &target, std::string
const &expectedName, std::size_t expectedLength)
806 is >>
name >> length;
807 if (
name != expectedName) {
808 std::string err_msg =
809 "TMVA-SOFIE failed to read the correct tensor name; expected name is " + expectedName +
" , read " +
name;
810 throw std::runtime_error(err_msg);
812 if (length != expectedLength) {
813 std::string err_msg =
"TMVA-SOFIE failed to read the correct tensor size; expected size is " +
814 std::to_string(expectedLength) +
" , read " + std::to_string(length);
815 throw std::runtime_error(err_msg);
818 for (
size_t i = 0; i < length; ++i) {
823 throw std::runtime_error(
"TMVA-SOFIE failed to read the values for tensor " + expectedName);
828void EmitNestedLoops(std::stringstream &out,
size_t loopRank,
const std::vector<Dim> shape);
869 std::size_t result = 1;
870 for (std::size_t i = 0; i <
size; ++i) {
871 result *=
data[i].dim;
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
RTensor is a container with contiguous memory and shape information.
RTensor< Value_t, Container_t > Copy(MemoryLayout layout=MemoryLayout::RowMajor) const
Copy RTensor to new object.
RTensor is a container with contiguous memory and shape information.
MemoryLayout GetMemoryLayout() const
const Shape_t & GetStrides() const
std::size_t GetSize() const
const Shape_t & GetShape() const
bool IsWeightTensor() const
std::shared_ptr< void > const & sharedptr() const
std::shared_ptr< void > fData
! Transient shared data
InitializedTensor()=default
ETensorType fType
Encodes the type of the data.
std::vector< std::size_t > const & shape() const
char * fPersistentData
[fSize] Persistent version of the data
std::vector< std::size_t > fShape
The shape of the data in terms of elements in each dimension.
bool fIsNotWritable
Flag to indicate that tensor values do not need to be written as weight or generated code.
bool IsConstantTensor() const
void CastSharedToPersistent()
bool fConstant
Flag specifying if tensor is a Constant one (coming from a Constant operator).
ETensorType const & type() const
bool IsNotWritable() const
void CastPersistentToShared()
InitializedTensor(ETensorType type, std::span< std::size_t > shape, std::shared_ptr< void > data, bool typeConstant=false)
int fSize
The size of the persistent data in bytes (not number of elements!).
void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
bool AreSameShape(const std::vector< size_t > &, const std::vector< size_t > &)
void Im2col_3d(const T *data_im, const int channels, const int depth, const int height, const int width, const int kernel_d, const int kernel_h, const int kernel_w, const int pad_d, const int pad_h, const int pad_w, const int stride_d, const int stride_h, const int stride_w, const int dilation_d, const int dilation_h, const int dilation_w, T *data_col)
3d implementation
T * BroadcastConvBias(const T *data, const size_t channel, const std::vector< size_t > &targetShape)
std::vector< size_t > UnidirectionalBroadcastShape(std::vector< size_t > &, std::vector< size_t > &)
void col2im(const Dtype *data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype *data_im)
void BroadcastTensor(ConstContT data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape, T *broadcastedData)
std::string Clean_name(std::string input_tensor_name)
bool is_a_ge_zero_and_a_lt_b(int a, int b)
function to check if a >> 0 and a < MAX using a single comparison / use trick casting to unsigned val...
std::vector< size_t > MultidirectionalBroadcastShape(std::vector< std::vector< size_t > >)
T * UnidirectionalBroadcast(const T *data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape)
void Im2col(const T *data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, T *data_col)
im2col : efficient function to re-arrange input data of convolution to a matrix that can be used by B...
T * CreateBroadcastTensor(const T *data, const std::vector< size_t > &shape, const std::vector< size_t > &targetShape, size_t targetLength)
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
MemoryResult OrganizeMemory(const std::vector< TensorLifeInfo > &tensorsInfo)
Greedy best-fit planner with coalescing free list.
constexpr TensorDims makeDims(Arr const &arr)
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
std::size_t ConvertShapeToLength(const std::vector< size_t > &shape)
void ReadTensorFromStream(std::istream &is, T &target, std::string const &expectedName, std::size_t expectedLength)
std::string ConvertValuesToString(size_t n, const T *data, size_t maxprint=-1)
std::vector< Dim > ConvertShapeToDim(const std::vector< size_t > &shape)
Convert shape from integer format to dynamic one (based on Dim).
constexpr size_t GetTypeSize(ETensorType type)
ETensorType GetTemplatedType(T)
void Gemm_Call(float *output, bool transa, bool transb, int m, int n, int k, float alpha, const float *A, const float *B, float beta, const float *C)
void Fill(float *output, float value, int size)
std::vector< size_t > ConvertShapeToInt(const std::vector< Dim > &shape)
Convert shape based on Dim to integer format.
std::string ConvertTypeToString(ETensorType type)
void Relu(float *output, float const *input, int size)
ETensorType ConvertStringToType(std::string type)
TMVA::Experimental::RTensor< T > Concatenate(TMVA::Experimental::RTensor< T > &t1, TMVA::Experimental::RTensor< T > &t2, int axis=0)
float ParseFloatToken(const std::string &s)
std::ostream & operator<<(std::ostream &os, const Dim &d)
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
void EmitNestedLoops(std::stringstream &out, size_t loopRank, const std::vector< Dim > shape)
std::string ConvertShapeToString(const std::vector< size_t > &shape)
void CloseNestedLoops(std::stringstream &out, size_t loopRank)
std::string ConvertValToString(T value)
bool IsInteger(const std::string &s)
GNN_Data Copy(const GNN_Data &data)
create variable transformations
bool operator!=(const Dim &rhs) const
bool operator==(const Dim &rhs) const
Dim(const std::string &p, size_t d=0)
std::ostream & operator<<(std::ostream &os) const
std::string GetVal() const
RTensor< float > global_data
RTensor< float > edge_data
RTensor< int > edge_index
RTensor< float > node_data
std::map< size_t, TensorMemoryInfo > total_stack
std::map< size_t, size_t > available_stack
std::vector< size_t > offsets
constexpr SingleDim(std::size_t v)
constexpr SingleDim(const char *v)
constexpr std::size_t total_size() const
std::vector< size_t > shape
void merge(const TensorMemoryInfo &other)
std::string_view tensor_name
TensorMemoryInfo split(const std::string_view new_name, size_t new_size)
static const std::string Name()
static const std::string Name()
static const std::string Name()
static const std::string Name()
static const std::string Name()
static const std::string Name()
static const std::string Name()
static const std::string Name()
static const std::string Name()