29 std::vector<size_t> ret_shape(shape.size());
30 for (
size_t i =0; i < shape.size(); i++){
31 if (shape[i].isParam) {
35 val = std::stoi(shape[i].param);
36 if (val >= 0) ret_shape[i] =
static_cast<size_t>(val);
42 catch (
const std::invalid_argument& ) {
47 ret_shape[i] = shape[i].dim;
152 if (shape.empty())
return "1";
153 int64_t int_length = -1;
154 for (
size_t i = 0; i < shape.size(); i++) {
155 if (shape[i].isParam) {
156 if (!length.empty()) length +=
" * ";
157 length += shape[i].param;
159 if (int_length == -1)
160 int_length = shape[i].dim;
162 int_length *= shape[i].dim;
167 if (int_length >= 0) {
168 if (!length.empty() && int_length > 1) {
170 length += std::to_string(int_length);
171 }
else if (length.empty()) {
172 length = std::to_string(int_length);
239 if (shape.size() < 2) {
241 std::runtime_error(
"TMVA::SOFIE - MultidirectionalBroadcastShape requires at least 2 input shapes.");
244 size_t n = shape.size();
246 size_t targetSize = shape[0].size();
247 for (
size_t i = 1; i <
n; i++) {
248 targetSize = std::max(targetSize, shape[i].
size());
251 bool sameSize =
true;
252 for (
size_t i = 0; i <
n; i++) {
253 if (shape[i].
size() != targetSize) {
260 bool sameShape =
true;
261 for (
size_t i = 1; i <
n; i++) {
262 for (
size_t dim = 0; dim < shape[0].size(); dim++) {
263 if (shape[i][dim] != shape[0][dim]) {
276 std::vector<size_t> targetShape(targetSize, 1);
277 for (
size_t i = 0; i <
n; i++) {
278 for (
size_t dim = 0; dim < targetSize; dim++) {
279 targetShape[dim] = std::max(targetShape[dim], shape[i][dim]);
283 bool broadcastable =
true;
284 for (
size_t i = 0; i <
n; i++) {
285 for (
size_t dim = 0; dim < targetSize; dim++) {
286 if (shape[i][dim] != 1 && targetShape[dim] != 1 && shape[i][dim] != targetShape[dim]) {
287 broadcastable =
false;
290 if (!broadcastable) {
299 std::stringstream ss;
300 ss <<
"TMVA::SOFIE - Error multidirectional broadcasting shapes ";
301 for (
size_t i = 0; i <
n; i++) {
303 if (
n > 2 && i <
n - 2) {
305 }
else if (
n >=2 && i ==
n - 2) {
309 ss <<
" to the same shape.";
311 std::runtime_error(ss.str());
316 for (
size_t i = 0; i <
n; i++) {
317 if (shape[i].
size() < targetSize) {
318 std::vector<size_t> newShape(targetSize, 1);
319 size_t offset = targetSize - shape[i].size();
320 std::copy(shape[i].begin(), shape[i].end(), newShape.begin() + offset);
325 std::vector<size_t> targetShape(targetSize, 1);
326 for (
size_t i = 0; i <
n; i++) {
327 for (
size_t dim = 0; dim < targetSize; dim++) {
328 targetShape[dim] = std::max(targetShape[dim], shape[i][dim]);
332 bool broadcastable =
true;
333 for (
size_t i = 0; i <
n; i++) {
334 for (
size_t dim = 0; dim < targetSize; dim++) {
335 if (shape[i][dim] != targetShape[dim] && shape[i][dim] != 1 && targetShape[dim] != 1) {
336 broadcastable =
false;
340 if (!broadcastable) {
347 std::stringstream ss;
348 ss <<
"TMVA::SOFIE - Error multidirectional broadcasting shapes ";
349 for (
size_t i = 0; i <
n; i++) {
351 if (
n > 2 && i <
n - 2) {
353 }
else if (
n >=2 && i ==
n - 2) {
357 ss <<
" to the same shape.";
359 std::runtime_error(ss.str());
371 size_t sizeA = shapeA.size();
372 size_t sizeB = shapeB.size();
375 return std::make_pair(0, shapeA);
378 size_t size = std::max(sizeA, sizeB);
381 std::vector<size_t> newShapeA(
size, 1);
382 size_t offset =
size - sizeA;
383 std::copy(shapeA.begin(), shapeA.end(), newShapeA.begin() + offset);
384 shapeA = std::move(newShapeA);
387 std::vector<size_t> newShapeB(
size, 1);
388 size_t offset =
size - sizeB;
389 std::copy(shapeB.begin(), shapeB.end(), newShapeB.begin() + offset);
390 shapeB = std::move(newShapeB);
392 bool broadcastable =
true;
393 for (
size_t i = 0; i <
size; i++) {
394 if (shapeA[i] != shapeB[i] && shapeA[i] != 1 && shapeB[i] != 1) {
395 broadcastable =
false;
399 int broadcastFlag = 0;
402 std::vector<size_t> targetShape(
size, 1);
403 for (
size_t i = 0; i <
size; i++) {
404 targetShape[i] = std::max(shapeA[i], shapeB[i]);
405 if (shapeB[i] < targetShape[i]) broadcastFlag |= 1;
406 if (shapeA[i] < targetShape[i]) broadcastFlag |= 2;
408 return std::make_pair(broadcastFlag, targetShape);
411 std::runtime_error(
"TMVA::SOFIE - Error multidirectional broadcasting tensors of shape "
413 +
" to a common shape.");
435 size_t sizeA = shapeA.size();
436 size_t sizeB = shapeB.size();
439 return std::make_pair(0, shapeA);
442 size_t size = std::max(sizeA, sizeB);
445 std::vector<Dim> newShapeA(
size,
Dim{1});
446 size_t offset =
size - sizeA;
447 std::copy(shapeA.begin(), shapeA.end(), newShapeA.begin() + offset);
448 shapeA = std::move(newShapeA);
451 std::vector<Dim> newShapeB(
size,
Dim{1});
452 size_t offset =
size - sizeB;
453 std::copy(shapeB.begin(), shapeB.end(), newShapeB.begin() + offset);
454 shapeB = std::move(newShapeB);
457 int broadcastFlag = 0;
459 std::vector<Dim> targetShape(
size);
460 for (
size_t i = 0; i <
size; i++) {
462 if (shapeA[i] == shapeB[i]) {
463 targetShape[i] = shapeA[i];
464 }
else if (shapeA[i].isParam && shapeB[i].GetVal() ==
"1" ) {
466 targetShape[i] = shapeA[i];
468 }
else if (shapeA[i].GetVal() ==
"1" && shapeB[i].isParam) {
470 targetShape[i] = shapeB[i];
472 }
else if (!shapeA[i].isParam && !shapeB[i].isParam) {
473 if (shapeB[i].dim == 1) {
474 targetShape[i] = shapeA[i];
476 }
else if (shapeA[i].dim == 1) {
477 targetShape[i] = shapeB[i];
483 }
else if (shapeA[i].isParam && shapeB[i].isParam) {
486 s <<
"std::max(" << shapeA[i] <<
"," << shapeB[i] <<
")";
488 targetShape[i] =
Dim { s.str() ,
static_cast<size_t>(-1)};
490 }
else if (shapeA[i].isParam && !shapeB[i].isParam) {
492 targetShape[i] = shapeB[i];
494 }
else if (!shapeA[i].isParam && shapeB[i].isParam) {
496 targetShape[i] = shapeA[i];
500 throw std::runtime_error(
"TMVA::SOFIE - Fatal error in MultiDirectionalBroadCastDimShape");
503 if (broadcastFlag == -1) {
504 throw std::runtime_error(
"TMVA::SOFIE - Error multidirectional broadcasting tensors of shape " +
506 " to a common shape.");
509 return std::make_pair(broadcastFlag, targetShape);
513 std::string s (input_tensor_name);
514 std::replace( s.begin(), s.end(),
'-',
'_');
516 s.erase(std::remove_if(s.begin(), s.end(), [](
char const&
c ) ->
bool { return !std::isalnum(c) && c !=
'_'; } ), s.end());
598 for (
const auto &t : tensorsInfo) {
599 if (!(t.end > t.begin)) {
600 throw std::runtime_error(
"Each tensor must have end > begin.");
605 std::vector<MemoryEvent> events;
606 events.reserve(tensorsInfo.size() * 2);
607 for (
int i = 0; i < (
int)tensorsInfo.size(); ++i) {
608 events.push_back({tensorsInfo[i].end, 0, i});
609 events.push_back({tensorsInfo[i].begin, 1, i});
611 std::sort(events.begin(), events.end());
613 std::vector<size_t> tensorsOffset(tensorsInfo.size());
617 std::set<FreeBlock> free_list;
620 std::unordered_map<int, std::size_t> live_size;
621 std::unordered_map<int, std::size_t> live_offset;
623 std::size_t total_bytes = 0;
625 auto allocate_best_fit = [&](std::size_t need) -> std::size_t {
629 auto best = free_list.end();
630 for (
auto it = free_list.begin(); it != free_list.end(); ++it) {
631 if (it->size >= need) {
632 if (best == free_list.end() || it->size < best->size)
636 if (best != free_list.end()) {
637 std::size_t off = best->offset;
638 if (best->size == need) {
639 free_list.erase(best);
641 FreeBlock updated{best->offset + need, best->size - need};
642 free_list.erase(best);
643 free_list.insert(updated);
648 std::size_t off = total_bytes;
653 auto try_coalesce = [&](std::set<FreeBlock>::iterator it) {
655 if (it != free_list.begin()) {
656 auto prev = std::prev(it);
657 if (prev->offset + prev->size == it->offset) {
658 FreeBlock merged{prev->offset, prev->size + it->size};
659 free_list.erase(prev);
660 it = free_list.erase(it);
661 it = free_list.insert(merged).first;
665 auto next = std::next(it);
666 if (next != free_list.end() && it->offset + it->size == next->offset) {
667 FreeBlock merged{it->offset, it->size + next->size};
668 free_list.erase(next);
669 it = free_list.erase(it);
670 free_list.insert(merged);
675 for (
const auto &
e : events) {
677 auto it_sz = live_size.find(
e.idx);
678 auto it_off = live_offset.find(
e.idx);
679 if (it_sz != live_size.end() && it_off != live_offset.end()) {
680 FreeBlock fb{it_off->second, it_sz->second};
682 auto it = free_list.insert(fb).first;
684 live_size.erase(it_sz);
685 live_offset.erase(it_off);
688 auto &t = tensorsInfo[
e.idx];
689 std::size_t off = allocate_best_fit(t.size);
690 tensorsOffset[
e.idx] = off;
691 live_size[
e.idx] = t.size;
692 live_offset[
e.idx] = off;
696 return MemoryResult{total_bytes, std::move(tensorsOffset)};