1#ifndef TMVA_SOFIE_ROPERATOR_RNN
2#define TMVA_SOFIE_ROPERATOR_RNN
73 std::vector<float> activation_beta,
74 std::vector<std::string> activations,
float clip,
75 std::string direction,
size_t hidden_size,
size_t layout,
76 std::string nameX, std::string nameW, std::string nameR,
77 std::string nameB, std::string nameSequence_lens,
78 std::string nameInitial_h, std::string nameY,
89 if (std::is_same<T, float>::value) {
92 throw std::runtime_error(
93 "TMVA SOFIE Encountered unsupported type parsing a RNN operator");
98 fInputTensorNames.emplace_back(fNB);
101 fInputTensorNames.emplace_back(fNSequence_lens);
104 fInputTensorNames.emplace_back(fNInitial_h);
109 fOutputTensorNames.emplace_back(fNY);
112 fOutputTensorNames.emplace_back(fNY_h);
120 std::vector<ETensorType>
TypeInference(std::vector<ETensorType> input)
override;
126 std::vector<std::vector<size_t>>
139 std::string
Generate(std::string OpName)
override;
146 std::vector<std::string>
GetBlasRoutines()
override {
return { std::string(
"Gemm"), std::string(
"Axpy") }; }
159 size_t num_directions = input[1][0];
160 size_t hidden_size = input[1][1];
162 size_t seq_length = input[0][0];
163 size_t batch_size = input[0][1];
164 std::vector<std::vector<size_t>>
ret(
165 {{seq_length, num_directions, batch_size, hidden_size}, {num_directions, batch_size, hidden_size}});
168 size_t batch_size = input[0][0];
169 size_t seq_length = input[0][1];
170 std::vector<std::vector<size_t>>
ret(
171 {{batch_size, seq_length, num_directions, hidden_size}, {batch_size, num_directions, hidden_size}});
181 if (!model.CheckIfTensorAlreadyExist(
fNX)) {
182 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNX +
" is not found in model.");
186 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNX +
" is not of 3 dimensions.");
188 if (!model.CheckIfTensorAlreadyExist(
fNW)) {
189 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNW +
" is not found in model.");
193 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNW +
" is not of 3 dimensions.");
195 if (!model.CheckIfTensorAlreadyExist(
fNR)) {
196 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNR +
" is not found in model.");
200 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNR +
" is not of 3 dimensions.");
203 if (!model.CheckIfTensorAlreadyExist(
fNB)) {
204 throw std::runtime_error(
"TMVA SOFIE RNN op input tensor " +
fNB +
" is not found in model.");
208 throw std::runtime_error(
"TMVA SOFIE RNN op input tensor " +
fNB +
" is not of 2 or 4 dimensions.");
212 auto original_data = model.GetInitializedTensorData(
fNB);
213 size_t num_directions =
fShapeW[0];
216 if (
fType ==
"float") {
217 float *original_bias =
static_cast<float *
>(original_data.get());
218 float *new_bias =
new float[num_directions * seq_length * batch_size *
fAttrHiddenSize];
220 for (
size_t direction = 0; direction < num_directions; direction++) {
225 for (
size_t seq = 0; seq < seq_length; seq++) {
226 for (
size_t batch = 0; batch < batch_size; batch++) {
227 size_t bias_offset = direction * seq_length * batch_size *
fAttrHiddenSize +
229 std::copy(
sum.begin(),
sum.end(), new_bias + bias_offset);
233 std::vector<size_t> new_bias_shape = {num_directions, seq_length, batch_size,
fAttrHiddenSize};
234 std::shared_ptr<void> new_bias_ptr(new_bias, std::default_delete<
float[]>());
235 model.UpdateInitializedTensor(
fNB, model.GetTensorType(
fNB), new_bias_shape, new_bias_ptr);
242 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNSequence_lens +
"is not found in model.");
246 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNSequence_lens +
" is not of 1 dimension.");
250 if (!model.CheckIfTensorAlreadyExist(
fNInitial_h)) {
251 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNInitial_h +
" is not found in model.");
255 throw std::runtime_error(
"TMVA SOFIE RNN Op input tensor " +
fNInitial_h +
" is not of 3 dimensions.");
260 if (!model.CheckIfTensorAlreadyExist(
fNY)) {
261 model.AddIntermediateTensor(
fNY, model.GetTensorType(
fNX),
fShapeY);
264 if (!
fNY_h.empty()) {
266 if (!model.CheckIfTensorAlreadyExist(
fNY_h)) {
272 if (activation !=
"Relu" && activation !=
"Tanh" && activation !=
"Sigmoid" && activation !=
"Affine" &&
273 activation !=
"LeakyRelu" && activation !=
"ThresholdRelu" && activation !=
"ScaledTanh" &&
274 activation !=
"HardSigmoid" && activation !=
"Elu" && activation !=
"Softsign" && activation !=
"Softplus") {
275 throw std::runtime_error(
"TMVA SOFIE - Activation function " + activation +
" not implemented");
279 throw std::runtime_error(
"TMVA SOFIE - Invalid RNN direction fAttrDirection = " +
fAttrDirection);
282 throw std::runtime_error(
"TMVA SOFIE - fAttrHiddenSize must be equal to " + std::to_string(
fShapeW[1]));
285 throw std::runtime_error(
"TMVA SOFIE - Layout fAttrLayout = " + std::to_string(
fAttrLayout) +
286 " must be 0 (timewise) or 1 (batchwise)");
296 model.AddNeededStdLib(
"cmath");
303 opName =
"op_" + opName;
304 std::stringstream out;
306 size_t num_directions =
fShapeW[0];
309 size_t input_size =
fShapeX[2];
316 std::vector<Block> blocks;
319 blocks.push_back({
"input", seq_length * batch_size * input_size});
320 blocks.push_back({
"initial_hidden_state", num_directions * batch_size *
fAttrHiddenSize});
322 blocks.push_back({
"feedforward", seq_length * batch_size *
fAttrHiddenSize});
324 blocks.push_back({
"hidden_state", seq_length * num_directions * batch_size *
fAttrHiddenSize});
328 size_t total_size = 0;
329 for (
const auto &
b : blocks) {
330 total_size +=
b.size;
334 out <<
"std::vector<" <<
fType <<
"> fVec_" << opName <<
"_buffer = std::vector<" <<
fType <<
">(" << total_size
338 std::size_t offset = 0;
339 for (
const auto &
b : blocks) {
340 out <<
fType <<
"* fVec_" << opName <<
"_" <<
b.name <<
" = fVec_" << opName <<
"_buffer.data() + " << offset
354 OpName =
"op_" + OpName;
355 std::stringstream out;
359 size_t input_size =
fShapeX[2];
360 size_t num_directions =
fShapeW[0];
364 if (
fType ==
"float") {
365 out <<
SP <<
"float const*" << OpName <<
"_input = tensor_" <<
fNX <<
";\n";
369 out <<
SP <<
fType <<
" * " << OpName <<
"_input = this->fVec_" << OpName <<
"_input;\n";
371 out <<
SP <<
fType <<
" " << OpName <<
"_input[" << seq_length * batch_size * input_size <<
"];\n";
372 out <<
SP <<
"for(size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
373 out <<
SP <<
SP <<
"for(size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
374 out <<
SP <<
SP <<
SP <<
"for(size_t i = 0; i < " << input_size <<
"; i++) {\n";
375 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_input[seq * " << batch_size * input_size <<
" + batch * " << input_size
376 <<
" + i] = " <<
"tensor_" <<
fNX <<
"[batch * " << seq_length * input_size <<
" + seq * " << input_size
378 out <<
SP <<
SP <<
SP <<
"}\n";
379 out <<
SP <<
SP <<
"}\n";
386 out <<
SP <<
fType <<
" *" << OpName <<
"_initial_hidden_state = " <<
" tensor_" <<
fNInitial_h <<
";\n";
389 out <<
SP <<
fType <<
" * " << OpName <<
"_initial_hidden_state = this->fVec_" << OpName
390 <<
"_initial_hidden_state;\n";
392 out <<
fType <<
" " << OpName <<
"_initial_hidden_state[" << num_directions * batch_size *
fAttrHiddenSize
395 for (
size_t direction = 0; direction < num_directions; direction++) {
396 out <<
SP <<
"for(size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
398 out <<
SP <<
SP <<
SP << OpName <<
"_initial_hidden_state[" << direction * batch_size *
fAttrHiddenSize
401 out <<
SP <<
SP <<
"}\n";
408 out <<
SP <<
fType <<
" * " << OpName <<
"_feedforward = this->fVec_" << OpName <<
"_feedforward;\n";
415 out <<
SP <<
fType <<
" *" << OpName <<
"_hidden_state = tensor_" <<
fNY <<
";\n";
418 out <<
SP <<
fType <<
" * " << OpName <<
"_hidden_state = this->fVec_" << OpName <<
"_hidden_state;\n";
420 out <<
SP <<
fType <<
" " << OpName <<
"_hidden_state["
421 << seq_length * num_directions * batch_size *
fAttrHiddenSize <<
"] = {0};\n";
424 out <<
SP <<
"char " << OpName <<
"_transA = 'N';\n";
425 out <<
SP <<
"char " << OpName <<
"_transB = 'T';\n";
426 out <<
SP <<
"int " << OpName <<
"_m = " << seq_length * batch_size <<
";\n";
428 out <<
SP <<
"int " << OpName <<
"_k = " << input_size <<
";\n";
429 if (
fType ==
"float") {
430 out <<
SP <<
"float " << OpName <<
"_alpha = 1.;\n";
431 out <<
SP <<
"float " << OpName <<
"_beta = .0;\n";
434 out <<
SP <<
"int " << OpName <<
"_bias_size = " << seq_length * batch_size *
fAttrHiddenSize <<
";\n";
435 out <<
SP <<
"int " << OpName <<
"_incx = 1;\n";
436 out <<
SP <<
"int " << OpName <<
"_incy = 1;\n";
439 for (
size_t direction = 0; direction < num_directions; direction++) {
441 if (
fType ==
"float") {
442 if (direction == 0) {
443 out <<
SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &" << OpName <<
"_n, &"
444 << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_" <<
fNW <<
", &" << OpName
445 <<
"_k, " << OpName <<
"_input, &" << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName
446 <<
"_feedforward, &" << OpName <<
"_n);\n";
448 out <<
SP <<
"size_t " << OpName <<
"_w_offset = " <<
fAttrHiddenSize * input_size <<
";\n";
449 out <<
SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &" << OpName <<
"_n, &"
450 << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_" <<
fNW <<
" + " << OpName
451 <<
"_w_offset, &" << OpName <<
"_k, " << OpName <<
"_input, &" << OpName <<
"_k, &" << OpName
452 <<
"_beta, " << OpName <<
"_feedforward, &" << OpName <<
"_n);\n";
457 if (
fType ==
"float") {
458 if (direction == 0) {
459 out <<
SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_" <<
fNB <<
", &"
460 << OpName <<
"_incx, " << OpName <<
"_feedforward, &" << OpName <<
"_incy);\n";
462 out <<
SP <<
"size_t " << OpName <<
"_bias_offset = " << seq_length * batch_size *
fAttrHiddenSize
464 out <<
SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_" <<
fNB <<
" + "
465 << OpName <<
"_bias_offset, &" << OpName <<
"_incx, " << OpName <<
"_feedforward, &" << OpName
472 out <<
SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
475 out <<
SP <<
SP <<
"size_t h_offset = seq * " << num_directions * batch_size *
fAttrHiddenSize <<
" + "
477 out <<
SP <<
SP <<
"std::copy(" << OpName <<
"_feedforward + offset, " << OpName
478 <<
"_feedforward + offset + size, " << OpName <<
"_hidden_state + h_offset);\n";
481 out <<
SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
483 out <<
SP <<
SP <<
"size_t index = " << seq_length - 1 <<
" - seq;\n";
485 out <<
SP <<
SP <<
"size_t index = seq;\n";
488 out <<
SP <<
SP <<
"int m2 = " << batch_size <<
";\n";
489 out <<
SP <<
SP <<
"size_t offset = index * " << num_directions * batch_size *
fAttrHiddenSize <<
" + "
492 out <<
SP <<
SP <<
"if (seq == 0) {\n";
496 out <<
SP <<
SP <<
SP <<
"size_t initial_hidden_state_offset = " << direction * batch_size *
fAttrHiddenSize
498 if (
fType ==
"float") {
499 out <<
SP <<
SP <<
SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &" << OpName
500 <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" <<
fNR <<
" + r_offset, &" << OpName
501 <<
"_n, " << OpName <<
"_initial_hidden_state + initial_hidden_state_offset, &" << OpName <<
"_n, &"
502 << OpName <<
"_alpha, " << OpName <<
"_hidden_state + offset, &" << OpName <<
"_n);\n";
505 out <<
SP <<
SP <<
"} else {\n";
509 out <<
SP <<
SP <<
SP <<
"size_t previous_offset = (index + 1) * "
513 out <<
SP <<
SP <<
SP <<
"size_t previous_offset = (seq - 1) * "
517 if (
fType ==
"float") {
518 out <<
SP <<
SP <<
SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &" << OpName
519 <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" <<
fNR <<
" + r_offset, &" << OpName
520 <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &" << OpName <<
"_n, &" << OpName <<
"_alpha, "
521 << OpName <<
"_hidden_state + offset, &" << OpName <<
"_n);\n";
523 out <<
SP <<
SP <<
"}\n";
527 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
528 if (
fType ==
"float") {
529 out <<
SP <<
SP <<
SP <<
"float x = (" << OpName <<
"_hidden_state[i] > " << -
fAttrClip <<
") ? " << OpName
530 <<
"_hidden_state[i] : " << -
fAttrClip <<
";\n";
533 out <<
SP <<
SP <<
"}\n";
538 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
539 out <<
SP <<
SP <<
SP <<
"if (" << OpName <<
"_hidden_state[i] < 0.)\n";
540 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[i] = 0.;\n";
541 out <<
SP <<
SP <<
"}\n";
543 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
544 if (
fType ==
"float") {
545 out <<
SP <<
SP <<
SP <<
"float ex = std::exp(-2 * " << OpName <<
"_hidden_state[i]);\n";
547 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[i] = (1. - ex) / (1. + ex);\n";
548 out <<
SP <<
SP <<
"}\n";
550 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
551 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[i] = 1. / (1. + std::exp(-" << OpName
552 <<
"_hidden_state[i]));\n";
553 out <<
SP <<
SP <<
"}\n";
555 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
558 out <<
SP <<
SP <<
"}\n";
560 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
561 if (
fType ==
"float") {
563 <<
"_hidden_state[i]);\n";
566 <<
" * (1. - ex) / (1. + ex);\n";
567 out <<
SP <<
SP <<
"}\n";
569 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
570 if (
fType ==
"float") {
573 out <<
SP <<
SP <<
SP <<
"float b = (a > 0.) ? a : 0.;\n";
575 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[i] = (b < 1.) ? b : 1.;\n";
576 out <<
SP <<
SP <<
"}\n";
578 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
579 out <<
SP <<
SP <<
SP <<
"if (" << OpName <<
"_hidden_state[i] < 0.)\n";
581 << OpName <<
"_hidden_state[i];\n";
582 out <<
SP <<
SP <<
"}\n";
584 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
586 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[i] = 0.;\n";
587 out <<
SP <<
SP <<
"}";
589 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
590 out <<
SP <<
SP <<
SP <<
"if (" << OpName <<
"_hidden_state[i] < 0.)\n";
592 <<
" * std::exp(" << OpName <<
"_hidden_state[i] - 1.);\n";
593 out <<
SP <<
SP <<
"}\n";
595 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
596 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[i] = " << OpName <<
"_hidden_state[i] / (1. + abs("
597 << OpName <<
"_hidden_state[i]));\n";
598 out <<
SP <<
SP <<
"}\n";
600 out <<
SP <<
SP <<
"for (size_t i = offset; i < offset + size; i++) {\n";
601 out <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[i] = log(1. + std::exp(" << OpName
602 <<
"_hidden_state[i]));\n";
603 out <<
SP <<
SP <<
"}\n";
611 out <<
SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
612 out <<
SP <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
615 if (num_directions == 1) {
616 out <<
SP <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[seq * "
619 out <<
SP <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[seq * "
621 out <<
SP <<
SP <<
SP <<
SP <<
SP << OpName <<
"_hidden_state[seq * "
625 out <<
SP <<
SP <<
SP <<
SP <<
"}\n";
626 out <<
SP <<
SP <<
SP <<
"}\n";
627 out <<
SP <<
SP <<
"}\n";
633 if (!
fNY_h.empty()) {
637 out <<
SP <<
"std::copy(" << OpName <<
"_hidden_state, " << OpName <<
"_hidden_state + " << yh_size
638 <<
", tensor_" <<
fNY_h <<
");\n";
640 size_t offset = (seq_length - 1) * num_directions * batch_size *
fAttrHiddenSize;
641 out <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + " << offset <<
", " << OpName
642 <<
"_hidden_state + " << offset <<
" + " << yh_size <<
", tensor_" <<
fNY_h <<
");\n";
644 if (num_directions == 2) {
645 out <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + " << yh_size <<
", " << OpName
646 <<
"_hidden_state + " << 2 * yh_size <<
", tensor_" <<
fNY_h <<
" + " << yh_size <<
");\n";
650 out <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
652 out <<
SP <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
656 out <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
658 out <<
SP <<
SP <<
"size_t offset = seq * " << num_directions * batch_size *
fAttrHiddenSize
661 out <<
SP <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
662 <<
"_hidden_state + offset + " <<
fAttrHiddenSize <<
", tensor_" <<
fNY_h <<
" + yh_offset);\n";
665 if (num_directions == 2) {
666 out <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
671 out <<
SP <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
672 <<
"_hidden_state + offset + " <<
fAttrHiddenSize <<
", tensor_" <<
fNY_h <<
" + yh_offset);\n";
679 for (
size_t direction = 0; direction < num_directions; direction++) {
680 out <<
SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
681 out <<
SP <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
682 out <<
SP <<
SP <<
SP <<
"size_t offset = seq * " << num_directions * batch_size *
fAttrHiddenSize <<
" + "
684 out <<
SP <<
SP <<
SP <<
"size_t y_offset = batch * " << seq_length * num_directions *
fAttrHiddenSize
686 out <<
SP <<
SP <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
687 <<
"_hidden_state + offset + " <<
fAttrHiddenSize <<
", tensor_" <<
fNY <<
" + y_offset);\n";
688 out <<
SP <<
SP <<
"}\n";
692 if (!
fNY_h.empty()) {
694 out <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
696 out <<
SP <<
SP <<
"size_t yh_offset = batch * " << num_directions *
fAttrHiddenSize <<
";\n";
697 out <<
SP <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
698 <<
"_hidden_state + offset + " <<
fAttrHiddenSize <<
", tensor_" <<
fNY_h <<
" + yh_offset);\n";
701 out <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
703 out <<
SP <<
SP <<
"size_t seq = " << seq_length - 1 <<
";\n";
707 out <<
SP <<
SP <<
"size_t offset = seq * " << num_directions * batch_size *
fAttrHiddenSize
709 out <<
SP <<
SP <<
"size_t yh_offset = batch * " << num_directions *
fAttrHiddenSize <<
";\n";
710 out <<
SP <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
711 <<
"_hidden_state + offset + " <<
fAttrHiddenSize <<
", tensor_" <<
fNY_h <<
" + yh_offset);\n";
714 if (num_directions == 2) {
715 out <<
SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
718 out <<
SP <<
SP <<
"size_t yh_offset = batch * " << num_directions *
fAttrHiddenSize <<
" + "
720 out <<
SP <<
SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
721 <<
"_hidden_state + offset + " <<
fAttrHiddenSize <<
", tensor_" <<
fNY_h <<
" + yh_offset);\n";
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
std::vector< size_t > fShapeB
Shape of the bias.
std::vector< float > fAttrActivationBeta
Scaling values used by some activation functions.
size_t fAttrHiddenSize
Number of the hidden layers.
std::string fNInitial_h
Name of the initial value of the hidden states.
ROperator_RNN(std::vector< float > activation_alpha, std::vector< float > activation_beta, std::vector< std::string > activations, float clip, std::string direction, size_t hidden_size, size_t layout, std::string nameX, std::string nameW, std::string nameR, std::string nameB, std::string nameSequence_lens, std::string nameInitial_h, std::string nameY, std::string nameY_h)
Constructor of ROperator_RNN from the attributes.
void Initialize(RModel &) override
Initialize the model.
std::vector< size_t > fShapeR
Shape of the recurrence.
std::string fNW
Name of the weights.
std::string fNB
Name of the bias.
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
Infers the type of the output tensors.
std::vector< size_t > fShapeY
Shape of the output.
float fAttrClip
Clip threshold.
std::string fType
Type of the tensors.
size_t fAttrLayout
Data layout.
std::string fNY
Name of the output.
std::string fNSequence_lens
Name of the length of the sequences.
std::vector< size_t > fShapeSequence_lens
Shape of the length of the sequences.
std::vector< std::string > GetBlasRoutines() override
Returns the blas routines needed to compile the generated code.
std::string fNR
Name of the recurrence.
std::string GenerateSessionMembersCode(std::string opName) override
std::vector< float > fAttrActivationAlpha
Scaling values used by some activation functions.
ROperator_RNN()
Default constructor of ROperator_RNN.
std::vector< size_t > fShapeX
Shape of the input.
std::string fAttrDirection
Direction of processing.
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
Infers the shape of the output tensors.
std::string fNX
Name of the input.
std::string Generate(std::string OpName) override
Generates the inference code.
std::string fNY_h
Name of the last sequence of the output.
std::vector< size_t > fShapeInitial_h
Shape of the initial value of the hidden states.
std::vector< size_t > fShapeW
Shape of the weights.
std::vector< std::string > fAttrActivations
Activation functions.
std::vector< size_t > fShapeY_h
Shape of the last sequence of the output.
std::vector< std::string_view > fInputTensorNames
const std::string SP
space used to correctly indent the generated C++ code
bool fUseSession
flag to identify if using the session class
std::vector< std::string_view > fOutputTensorNames
static uint64_t sum(uint64_t i)