208 fUseSession = model.UseSession();
210 if (!model.CheckIfTensorAlreadyExist(fNX)) {
211 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNX +
" is not found in model.");
213 fShapeX = model.GetTensorShape(fNX);
214 if (fShapeX.size() != 3) {
215 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNX +
" is not of 3 dimensions.");
217 if (!model.CheckIfTensorAlreadyExist(fNW)) {
218 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNW +
" is not found in model.");
220 fShapeW = model.GetTensorShape(fNW);
221 if (fShapeW.size() != 3) {
222 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNW +
" is not of 3 dimensions.");
224 if (!model.CheckIfTensorAlreadyExist(fNR)) {
225 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNR +
" is not found in model.");
227 fShapeR = model.GetTensorShape(fNR);
228 if (fShapeR.size() != 3) {
229 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNR +
" is not of 3 dimensions.");
232 if (!model.CheckIfTensorAlreadyExist(fNB)) {
233 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNB +
" is not found in model.");
235 fShapeB = model.GetTensorShape(fNB);
236 if (fShapeB.size() != 2 && fShapeB.size() != 5) {
237 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNB +
" is not of 2 or 5 dimensions.");
239 if (fShapeB.size() == 2) {
243 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
244 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
245 if (fType ==
"float") {
249 std::vector<float>
sum(fAttrHiddenSize);
252 for (
size_t h = 0;
h < fAttrHiddenSize;
h++) {
268 fShapeB = model.GetTensorShape(fNB);
272 if (!fNSequence_lens.empty()) {
273 if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) {
274 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNSequence_lens +
"is not found in model.");
276 fShapeSequence_lens = model.GetTensorShape(fNSequence_lens);
277 if (fShapeSequence_lens.size() != 1) {
278 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNSequence_lens +
" is not of 1 dimension.");
281 if (!fNInitial_h.empty()) {
282 if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) {
283 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_h +
" is not found in model.");
285 fShapeInitial_h = model.GetTensorShape(fNInitial_h);
286 if (fShapeInitial_h.size() != 3) {
287 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_h +
" is not of 3 dimensions.");
290 if (!fNInitial_c.empty()) {
291 if (!model.CheckIfTensorAlreadyExist(fNInitial_c)) {
292 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_c +
" is not found in model.");
294 fShapeInitial_c = model.GetTensorShape(fNInitial_c);
295 if (fShapeInitial_c.size() != 3) {
296 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_c +
" is not of 3 dimensions.");
300 if (!model.CheckIfTensorAlreadyExist(fNP)) {
301 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNP +
" is not found in model.");
303 fShapeP = model.GetTensorShape(fNP);
304 if (fShapeP.size() != 2 && fShapeP.size() != 4) {
305 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNP +
" is not of 2 or 4 dimensions.");
307 if (fShapeP.size() == 2) {
311 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
312 if (fType ==
"float") {
326 std::shared_ptr<void>
new_p_ptr(
new_p, std::default_delete<
float[]>());
328 fShapeP = model.GetTensorShape(fNP);
333 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
334 if (!model.CheckIfTensorAlreadyExist(fNY)) {
335 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
338 if (!fNY_h.empty()) {
339 fShapeY_h = ShapeInference({fShapeX, fShapeW})[1];
340 if (!model.CheckIfTensorAlreadyExist(fNY_h)) {
341 model.AddIntermediateTensor(fNY_h, model.GetTensorType(fNX), fShapeY_h);
344 if (!fNY_c.empty()) {
345 fShapeY_c = ShapeInference({fShapeX, fShapeW})[2];
346 if (!model.CheckIfTensorAlreadyExist(fNY_c)) {
347 model.AddIntermediateTensor(fNY_c, model.GetTensorType(fNX), fShapeY_c);
355 throw std::runtime_error(
"TMVA SOFIE - Activation function " +
activation +
" not implemented");
358 if (fAttrDirection !=
"forward" && fAttrDirection !=
"backward" && fAttrDirection !=
"bidirectional") {
359 throw std::runtime_error(
"TMVA SOFIE - Invalid LSTM direction fAttrDirection = " + fAttrDirection);
361 if (4 * fAttrHiddenSize != fShapeW[1]) {
362 throw std::runtime_error(
"TMVA SOFIE - fAttrHiddenSize must be equal to " + std::to_string(fShapeW[1] / 4));
364 if (fAttrInputForget > 1) {
365 throw std::runtime_error(
"TMVA SOFIE - fAttrInputForget = " + std::to_string(fAttrInputForget) +
368 if (fAttrLayout > 1) {
369 throw std::runtime_error(
"TMVA SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) +
370 " must be 0 (timewise) or 1 (batchwise)");
372 if (fAttrActivations.empty()) {
373 if (fAttrDirection ==
"bidirectional") {
374 fAttrActivations = {
"Sigmoid",
"Tanh",
"Tanh",
"Sigmoid",
"Tanh",
"Tanh"};
376 fAttrActivations = {
"Sigmoid",
"Tanh",
"Tanh"};
443 std::stringstream out;
445 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
446 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
451 if (fAttrLayout == 0) {
452 out << SP << fType <<
" const *" <<
OpName <<
"_input = tensor_" << fNX <<
";\n";
455 out << SP << fType <<
" * " <<
OpName <<
"_input = this->fVec_" <<
OpName <<
"_input.data();\n";
459 out << SP <<
"for(size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
460 out << SP << SP <<
"for(size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
461 out << SP << SP << SP <<
"for(size_t i = 0; i < " <<
input_size <<
"; i++) {\n";
465 out << SP << SP << SP <<
"}\n";
466 out << SP << SP <<
"}\n";
471 if (!fNInitial_h.empty()) {
472 if (fAttrLayout == 0) {
473 out << SP << fType <<
" *" <<
OpName <<
"_initial_hidden_state = " <<
" tensor_" << fNInitial_h <<
";\n";
476 out << SP << fType <<
" * " <<
OpName <<
"_initial_hidden_state = this->fVec_" <<
OpName
477 <<
"_initial_hidden_state.data();\n";
479 out << SP << fType <<
" " <<
OpName <<
"_initial_hidden_state["
483 out << SP <<
"for(size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
484 out << SP << SP <<
"for(size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
486 <<
" + batch * " << fAttrHiddenSize <<
" + h] = tensor_" << fNInitial_h <<
"[batch * "
488 out << SP << SP <<
"}\n";
495 if (!fNInitial_c.empty()) {
496 if (fAttrLayout == 0) {
497 out << SP << fType <<
" *" <<
OpName <<
"_initial_cell_state = " <<
" tensor_" << fNInitial_c <<
";\n";
500 out << SP << fType <<
" * " <<
OpName <<
"_initial_cell_state = this->fVec_" <<
OpName
501 <<
"_initial_cell_state.data();\n";
503 out << SP << fType <<
" " <<
OpName <<
"_initial_cell_state["
507 out << SP <<
"for(size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
508 out << SP << SP <<
"for(size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
510 <<
" + batch * " << fAttrHiddenSize <<
" + h] = tensor_" << fNInitial_c <<
"[batch * "
512 out << SP << SP <<
"}\n";
521 out << SP << fType <<
" * " <<
OpName <<
"_ff_input_gate = this->fVec_" <<
OpName <<
"_ff_input_gate.data();\n";
522 out << SP << fType <<
" * " <<
OpName <<
"_ff_output_gate = this->fVec_" <<
OpName <<
"_ff_output_gate.data();\n";
523 out << SP << fType <<
" * " <<
OpName <<
"_ff_cell_gate = this->fVec_" <<
OpName <<
"_ff_cell_gate.data();\n";
524 if (fAttrInputForget == 0) {
525 out << SP << fType <<
" * " <<
OpName <<
"_ff_forget_gate = this->fVec_" <<
OpName
526 <<
"_ff_forget_gate.data();\n";
529 out << SP << fType <<
" " <<
OpName <<
"_ff_input_gate[" <<
ff_size <<
"] = {0};\n";
530 out << SP << fType <<
" " <<
OpName <<
"_ff_output_gate[" <<
ff_size <<
"] = {0};\n";
531 out << SP << fType <<
" " <<
OpName <<
"_ff_cell_gate[" <<
ff_size <<
"] = {0};\n";
532 if (fAttrInputForget == 0) {
533 out << SP << fType <<
" " <<
OpName <<
"_ff_forget_gate[" <<
ff_size <<
"] = {0};\n";
539 out << SP << fType <<
" * " <<
OpName <<
"_input_gate = this->fVec_" <<
OpName <<
"_input_gate.data();\n";
540 out << SP << fType <<
" * " <<
OpName <<
"_output_gate = this->fVec_" <<
OpName <<
"_output_gate.data();\n";
541 out << SP << fType <<
" * " <<
OpName <<
"_cell_gate = this->fVec_" <<
OpName <<
"_cell_gate.data();\n";
542 if (fAttrInputForget == 0) {
543 out << SP << fType <<
" * " <<
OpName <<
"_forget_gate = this->fVec_" <<
OpName <<
"_forget_gate.data();\n";
549 if (fAttrInputForget == 0) {
555 out << SP << fType <<
" * " <<
OpName <<
"_cell_state = this->fVec_" <<
OpName <<
"_cell_state.data();\n";
556 out << SP << fType <<
" * " <<
OpName <<
"_new_cell_state = this->fVec_" <<
OpName <<
"_new_cell_state.data();\n";
563 if (fAttrLayout == 0 && !fNY.empty()) {
564 out << SP << fType <<
" *" <<
OpName <<
"_hidden_state = tensor_" << fNY <<
";\n";
567 out << SP << fType <<
" * " <<
OpName <<
"_hidden_state = this->fVec_" <<
OpName <<
"_hidden_state.data();\n";
573 out << SP <<
"char " <<
OpName <<
"_transA = 'N';\n";
574 out << SP <<
"char " <<
OpName <<
"_transB = 'T';\n";
576 out << SP <<
"int " <<
OpName <<
"_n = " << fAttrHiddenSize <<
";\n";
578 if (fType ==
"float") {
579 out << SP << fType <<
" " <<
OpName <<
"_alpha = 1.;\n";
580 out << SP << fType <<
" " <<
OpName <<
"_beta = 0.;\n";
584 out << SP <<
"int " <<
OpName <<
"_incx = 1;\n";
585 out << SP <<
"int " <<
OpName <<
"_incy = 1;\n";
590 if (fType ==
"float") {
592 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
595 <<
"_ff_input_gate, &" <<
OpName <<
"_n);\n";
598 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
601 <<
OpName <<
"_ff_output_gate, &" <<
OpName <<
"_n);\n";
604 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
610 if (fType ==
"float") {
613 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
619 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
622 <<
OpName <<
"_ff_output_gate, &" <<
OpName <<
"_n);\n";
625 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
631 if (fAttrInputForget == 0) {
634 if (fType ==
"float") {
636 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
639 <<
OpName <<
"_ff_forget_gate, &" <<
OpName <<
"_n);\n";
642 if (fType ==
"float") {
644 out << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName <<
"_n, &"
647 <<
OpName <<
"_ff_forget_gate, &" <<
OpName <<
"_n);\n";
655 if (fType ==
"float") {
657 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
", &"
661 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
666 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
671 if (fType ==
"float") {
674 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
680 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
686 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
691 if (fAttrInputForget == 0) {
694 if (fType ==
"float") {
696 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB
701 if (fType ==
"float") {
704 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB
714 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
715 out << SP << SP <<
"size_t ff_offset = seq * " <<
batch_size * fAttrHiddenSize <<
";\n";
723 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_input_gate + ff_offset, " <<
OpName
724 <<
"_ff_input_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_input_gate + gate_offset);\n";
725 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_output_gate + ff_offset, " <<
OpName
726 <<
"_ff_output_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_output_gate + gate_offset);\n";
727 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_cell_gate + ff_offset, " <<
OpName
728 <<
"_ff_cell_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_cell_gate + gate_offset);\n";
729 if (fAttrInputForget == 0) {
730 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_forget_gate + ff_offset, " <<
OpName
731 <<
"_ff_forget_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_forget_gate + gate_offset);\n";
735 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
736 if (fAttrDirection ==
"backward" ||
direction == 1) {
737 out << SP << SP <<
"size_t index = " <<
seq_length - 1 <<
" - seq;\n";
739 out << SP << SP <<
"size_t index = seq;\n";
741 out << SP << SP <<
"int m2 = " <<
batch_size <<
";\n";
750 out << SP << SP <<
"if (seq == 0) {\n";
751 if (!fNInitial_h.empty()) {
753 if (fType ==
"float") {
754 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
755 <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
", &" <<
OpName
756 <<
"_n, " <<
OpName <<
"_initial_hidden_state, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, "
757 <<
OpName <<
"_input_gate + offset, &" <<
OpName <<
"_n);\n";
758 size_t ro_offset = fAttrHiddenSize * fAttrHiddenSize;
759 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
762 <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
763 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
764 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
767 <<
"_alpha, " <<
OpName <<
"_cell_gate + offset, &" <<
OpName <<
"_n);\n";
768 if (fAttrInputForget == 0) {
769 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
770 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &"
771 <<
OpName <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
" + "
773 <<
"_n, &" <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
777 if (fType ==
"float") {
778 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
779 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
782 <<
"_alpha, " <<
OpName <<
"_input_gate + offset, &" <<
OpName <<
"_n);\n";
783 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 1 * fAttrHiddenSize * fAttrHiddenSize;
784 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
787 <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
788 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
789 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
792 <<
"_alpha, " <<
OpName <<
"_cell_gate + offset, &" <<
OpName <<
"_n);\n";
793 if (fAttrInputForget == 0) {
794 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
795 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &"
796 <<
OpName <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
" + "
798 <<
"_n, &" <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
803 out << SP << SP <<
"} else {\n";
806 if (fAttrDirection ==
"backward") {
807 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
810 out << SP << SP << SP <<
"size_t previous_offset = (seq - 1) * "
813 if (fType ==
"float") {
814 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
815 <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
", &" <<
OpName <<
"_n, "
817 <<
"_input_gate + offset, &" <<
OpName <<
"_n);\n";
818 size_t ro_offset = 1 * fAttrHiddenSize * fAttrHiddenSize;
819 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
821 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
822 <<
OpName <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
823 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
824 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
826 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
828 if (fAttrInputForget == 0) {
829 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
830 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
832 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
833 <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
837 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
839 if (fType ==
"float") {
840 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
841 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
843 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
845 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + fAttrHiddenSize * fAttrHiddenSize;
846 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
848 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
849 <<
OpName <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
850 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
851 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
853 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
855 if (fAttrInputForget == 0) {
856 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
857 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
859 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
860 <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
864 out << SP << SP <<
"}\n";
867 if (fAttrClip > .0) {
868 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
869 if (fType ==
"float") {
870 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_cell_gate[i] > " << -fAttrClip <<
") ? " <<
OpName
871 <<
"_cell_gate[i] : " << -fAttrClip <<
";\n";
873 out << SP << SP << SP <<
OpName <<
"_cell_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip <<
";\n";
874 out << SP << SP <<
"}\n";
877 if (fAttrActivations[
direction * 3 + 1] ==
"Relu") {
878 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
879 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < 0.)\n";
880 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = 0.;\n";
881 out << SP << SP <<
"}\n";
882 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Tanh") {
883 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
884 if (fType ==
"float") {
885 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_cell_gate[i]);\n";
887 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = (1. - ex) / (1. + ex);\n";
888 out << SP << SP <<
"}\n";
889 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Sigmoid") {
890 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
891 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = 1. / (1. + exp(-" <<
OpName <<
"_cell_gate[i]));\n";
892 out << SP << SP <<
"}\n";
893 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Affine") {
894 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
895 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1] <<
" * "
896 <<
OpName <<
"_cell_gate[i] + " << fAttrActivationBeta[
direction * 3 + 1] <<
";\n";
897 out << SP << SP <<
"}\n";
898 }
else if (fAttrActivations[
direction * 3 + 1] ==
"ScaledTanh") {
899 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
900 if (fType ==
"float") {
901 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3 + 1] <<
" * " <<
OpName
902 <<
"_cell_gate[i]);\n";
904 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1]
905 <<
" * (1. - ex) / (1. + ex);\n";
906 out << SP << SP <<
"}\n";
907 }
else if (fAttrActivations[
direction * 3 + 1] ==
"HardSigmoid") {
908 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
909 if (fType ==
"float") {
910 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3 + 1] <<
" * " <<
OpName
911 <<
"_cell_gate[i] + " << fAttrActivationBeta[
direction * 3 + 1] <<
";\n";
912 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
914 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = (b < 1.) ? b : 1.;\n";
915 out << SP << SP <<
"}\n";
916 }
else if (fAttrActivations[
direction * 3 + 1] ==
"LeakyRelu") {
917 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
918 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < 0.)\n";
919 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1] <<
" * "
920 <<
OpName <<
"_cell_gate[i];\n";
921 out << SP << SP <<
"}\n";
922 }
else if (fAttrActivations[
direction * 3 + 1] ==
"ThresholdRelu") {
923 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
924 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < " << fAttrActivationAlpha[
direction * 3 + 1]
926 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = 0.;\n";
927 out << SP << SP <<
"}";
928 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Elu") {
929 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
930 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < 0.)\n";
931 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1]
932 <<
" * exp(" <<
OpName <<
"_cell_gate[i] - 1.);\n";
933 out << SP << SP <<
"}\n";
934 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Softsign") {
935 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
936 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " <<
OpName <<
"_cell_gate[i] / (1. + abs(" <<
OpName
937 <<
"_cell_gate[i]));\n";
938 out << SP << SP <<
"}\n";
940 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
941 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = log(1. + exp(" <<
OpName <<
"_cell_gate[i]));\n";
942 out << SP << SP <<
"}\n";
948 out << SP << SP <<
"if (seq == 0) {\n";
949 if (!fNInitial_c.empty()) {
951 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
952 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i] * "
953 <<
OpName <<
"_initial_cell_state[i];\n";
954 out << SP << SP << SP <<
"}\n";
955 if (fAttrInputForget == 0) {
957 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
958 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
960 out << SP << SP << SP <<
"}\n";
965 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
966 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i + "
968 out << SP << SP << SP <<
"}\n";
969 if (fAttrInputForget == 0) {
971 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
972 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
974 out << SP << SP << SP <<
"}\n";
978 out << SP << SP <<
"} else {\n";
980 if (fAttrDirection ==
"backward") {
981 out << SP << SP << SP <<
"size_t c_offset = (index + 1) * "
984 out << SP << SP << SP <<
"size_t c_offset = (seq - 1) * "
987 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
988 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i] * " <<
OpName
989 <<
"_cell_state[i + c_offset];\n";
990 out << SP << SP << SP <<
"}\n";
991 if (fAttrInputForget == 0) {
993 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
994 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
996 out << SP << SP << SP <<
"}\n";
1001 <<
" + " <<
batch_size * fAttrHiddenSize <<
";\n";
1002 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1003 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i + " <<
pi_offset
1004 <<
"] * " <<
OpName <<
"_cell_state[i + c_offset];\n";
1005 out << SP << SP << SP <<
"}\n";
1006 if (fAttrInputForget == 0) {
1008 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1009 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
1011 out << SP << SP << SP <<
"}\n";
1014 out << SP << SP <<
"}\n";
1018 if (fAttrClip > .0) {
1019 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1020 if (fType ==
"float") {
1021 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_input_gate[i] > " << -fAttrClip <<
") ? " <<
OpName
1022 <<
"_input_gate[i] : " << -fAttrClip <<
";\n";
1024 out << SP << SP << SP <<
OpName <<
"_input_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip <<
";\n";
1025 out << SP << SP <<
"}\n";
1028 if (fAttrActivations[
direction * 3] ==
"Relu") {
1029 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1030 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < 0.)\n";
1031 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = 0.;\n";
1032 out << SP << SP <<
"}\n";
1033 }
else if (fAttrActivations[
direction * 3] ==
"Tanh") {
1034 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1035 if (fType ==
"float") {
1036 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_input_gate[i]);\n";
1038 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = (1. - ex) / (1. + ex);\n";
1039 out << SP << SP <<
"}\n";
1040 }
else if (fAttrActivations[
direction * 3] ==
"Sigmoid") {
1041 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1042 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = 1. / (1. + exp(-" <<
OpName
1043 <<
"_input_gate[i]));\n";
1044 out << SP << SP <<
"}\n";
1045 }
else if (fAttrActivations[
direction * 3] ==
"Affine") {
1046 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1047 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1048 <<
OpName <<
"_input_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1049 out << SP << SP <<
"}\n";
1050 }
else if (fAttrActivations[
direction * 3] ==
"ScaledTanh") {
1051 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1052 if (fType ==
"float") {
1053 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3] <<
" * " <<
OpName
1054 <<
"_input_gate[i]);\n";
1056 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1057 <<
" * (1. - ex) / (1. + ex);\n";
1058 out << SP << SP <<
"}\n";
1059 }
else if (fAttrActivations[
direction * 3] ==
"HardSigmoid") {
1060 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1061 if (fType ==
"float") {
1062 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3] <<
" * " <<
OpName
1063 <<
"_input_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1064 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1066 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = (b < 1.) ? b : 1.;\n";
1067 out << SP << SP <<
"}\n";
1068 }
else if (fAttrActivations[
direction * 3] ==
"LeakyRelu") {
1069 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1070 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < 0.)\n";
1071 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1072 <<
OpName <<
"_input_gate[i];\n";
1073 out << SP << SP <<
"}\n";
1074 }
else if (fAttrActivations[
direction * 3] ==
"ThresholdRelu") {
1075 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1076 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < " << fAttrActivationAlpha[
direction * 3]
1078 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = 0.;\n";
1079 out << SP << SP <<
"}";
1080 }
else if (fAttrActivations[
direction * 3] ==
"Elu") {
1081 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1082 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < 0.)\n";
1083 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1084 <<
" * exp(" <<
OpName <<
"_input_gate[i] - 1.);\n";
1085 out << SP << SP <<
"}\n";
1086 }
else if (fAttrActivations[
direction * 3] ==
"Softsign") {
1087 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1088 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " <<
OpName <<
"_input_gate[i] / (1. + abs("
1089 <<
OpName <<
"_input_gate[i]));\n";
1090 out << SP << SP <<
"}\n";
1092 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1093 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = log(1. + exp(" <<
OpName <<
"_input_gate[i]));\n";
1094 out << SP << SP <<
"}\n";
1097 if (fAttrInputForget == 0) {
1099 if (fAttrClip > .0) {
1100 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1101 if (fType ==
"float") {
1102 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_forget_gate[i] > " << -fAttrClip <<
") ? "
1103 <<
OpName <<
"_forget_gate[i] : " << -fAttrClip <<
";\n";
1105 out << SP << SP << SP <<
OpName <<
"_forget_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip
1107 out << SP << SP <<
"}\n";
1110 if (fAttrActivations[
direction * 3] ==
"Relu") {
1111 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1112 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < 0.)\n";
1113 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = 0.;\n";
1114 out << SP << SP <<
"}\n";
1115 }
else if (fAttrActivations[
direction * 3] ==
"Tanh") {
1116 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1117 if (fType ==
"float") {
1118 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_forget_gate[i]);\n";
1120 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = (1. - ex) / (1. + ex);\n";
1121 out << SP << SP <<
"}\n";
1122 }
else if (fAttrActivations[
direction * 3] ==
"Sigmoid") {
1123 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1124 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = 1. / (1. + exp(-" <<
OpName
1125 <<
"_forget_gate[i]));\n";
1126 out << SP << SP <<
"}\n";
1127 }
else if (fAttrActivations[
direction * 3] ==
"Affine") {
1128 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1129 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1130 <<
" * " <<
OpName <<
"_forget_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1131 out << SP << SP <<
"}\n";
1132 }
else if (fAttrActivations[
direction * 3] ==
"ScaledTanh") {
1133 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1134 if (fType ==
"float") {
1135 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3] <<
" * " <<
OpName
1136 <<
"_forget_gate[i]);\n";
1138 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1139 <<
" * (1. - ex) / (1. + ex);\n";
1140 out << SP << SP <<
"}\n";
1141 }
else if (fAttrActivations[
direction * 3] ==
"HardSigmoid") {
1142 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1143 if (fType ==
"float") {
1144 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3] <<
" * " <<
OpName
1145 <<
"_forget_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1146 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1148 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = (b < 1.) ? b : 1.;\n";
1149 out << SP << SP <<
"}\n";
1150 }
else if (fAttrActivations[
direction * 3] ==
"LeakyRelu") {
1151 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1152 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < 0.)\n";
1153 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1154 <<
" * " <<
OpName <<
"_forget_gate[i];\n";
1155 out << SP << SP <<
"}\n";
1156 }
else if (fAttrActivations[
direction * 3] ==
"ThresholdRelu") {
1157 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1158 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < " << fAttrActivationAlpha[
direction * 3]
1160 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = 0.;\n";
1161 out << SP << SP <<
"}";
1162 }
else if (fAttrActivations[
direction * 3] ==
"Elu") {
1163 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1164 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < 0.)\n";
1165 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1166 <<
" * exp(" <<
OpName <<
"_forget_gate[i] - 1.);\n";
1167 out << SP << SP <<
"}\n";
1168 }
else if (fAttrActivations[
direction * 3] ==
"Softsign") {
1169 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1170 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " <<
OpName <<
"_forget_gate[i] / (1. + abs("
1171 <<
OpName <<
"_forget_gate[i]));\n";
1172 out << SP << SP <<
"}\n";
1174 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1175 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = log(1. + exp(" <<
OpName
1176 <<
"_forget_gate[i]));\n";
1177 out << SP << SP <<
"}\n";
1182 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1183 out << SP << SP << SP <<
OpName <<
"_cell_state[i] = " <<
OpName <<
"_input_gate[i] * " <<
OpName
1184 <<
"_cell_gate[i];\n";
1185 out << SP << SP <<
"}\n";
1187 if (fAttrInputForget == 0) {
1188 out << SP << SP <<
"if (seq == 0) {\n";
1189 if (!fNInitial_c.empty()) {
1191 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1192 out << SP << SP << SP << SP <<
OpName <<
"_cell_state[i + offset] += " <<
OpName
1193 <<
"_forget_gate[i + offset] * " <<
OpName <<
"_initial_cell_state[i];\n";
1194 out << SP << SP << SP <<
"}\n";
1196 out << SP << SP <<
"} else {\n";
1199 if (fAttrDirection ==
"backward") {
1200 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
1203 out << SP << SP << SP <<
"size_t previous_offset = (seq - 1) * "
1207 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
1210 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1211 out << SP << SP << SP << SP <<
OpName <<
"_cell_state[i + offset] += " <<
OpName
1212 <<
"_forget_gate[i + offset] * " <<
OpName <<
"_cell_state[i + previous_offset];\n";
1213 out << SP << SP << SP <<
"}\n";
1214 out << SP << SP <<
"}\n";
1221 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1222 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i + offset] += tensor_" << fNP <<
"[i + " <<
p_offset
1223 <<
"] * " <<
OpName <<
"_cell_state[i + offset];\n";
1224 out << SP << SP << SP <<
"}\n";
1227 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1228 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i + offset] += tensor_" << fNP <<
"[i + " <<
p_offset
1229 <<
"] * " <<
OpName <<
"_cell_state[i + offset];\n";
1230 out << SP << SP << SP <<
"}\n";
1235 if (fAttrClip > .0) {
1236 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1237 if (fType ==
"float") {
1238 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_output_gate[i] > " << -fAttrClip <<
") ? " <<
OpName
1239 <<
"_output_gate[i] : " << -fAttrClip <<
";\n";
1241 out << SP << SP << SP <<
OpName <<
"_output_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip <<
";\n";
1242 out << SP << SP <<
"}\n";
1245 if (fAttrActivations[
direction * 3] ==
"Relu") {
1246 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1247 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < 0.)\n";
1248 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = 0.;\n";
1249 out << SP << SP <<
"}\n";
1250 }
else if (fAttrActivations[
direction * 3] ==
"Tanh") {
1251 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1252 if (fType ==
"float") {
1253 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_output_gate[i]);\n";
1255 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = (1. - ex) / (1. + ex);\n";
1256 out << SP << SP <<
"}\n";
1257 }
else if (fAttrActivations[
direction * 3] ==
"Sigmoid") {
1258 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1259 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = 1. / (1. + exp(-" <<
OpName
1260 <<
"_output_gate[i]));\n";
1261 out << SP << SP <<
"}\n";
1262 }
else if (fAttrActivations[
direction * 3] ==
"Affine") {
1263 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1264 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1265 <<
OpName <<
"_output_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1266 out << SP << SP <<
"}\n";
1267 }
else if (fAttrActivations[
direction * 3] ==
"ScaledTanh") {
1268 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1269 if (fType ==
"float") {
1270 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3] <<
" * " <<
OpName
1271 <<
"_output_gate[i]);\n";
1273 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1274 <<
" * (1. - ex) / (1. + ex);\n";
1275 out << SP << SP <<
"}\n";
1276 }
else if (fAttrActivations[
direction * 3] ==
"HardSigmoid") {
1277 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1278 if (fType ==
"float") {
1279 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3] <<
" * " <<
OpName
1280 <<
"_output_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1281 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1283 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = (b < 1.) ? b : 1.;\n";
1284 out << SP << SP <<
"}\n";
1285 }
else if (fAttrActivations[
direction * 3] ==
"LeakyRelu") {
1286 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1287 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < 0.)\n";
1288 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1289 <<
OpName <<
"_output_gate[i];\n";
1290 out << SP << SP <<
"}\n";
1291 }
else if (fAttrActivations[
direction * 3] ==
"ThresholdRelu") {
1292 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1293 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < " << fAttrActivationAlpha[
direction * 3]
1295 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = 0.;\n";
1296 out << SP << SP <<
"}";
1297 }
else if (fAttrActivations[
direction * 3] ==
"Elu") {
1298 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1299 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < 0.)\n";
1300 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1301 <<
" * exp(" <<
OpName <<
"_output_gate[i] - 1.);\n";
1302 out << SP << SP <<
"}\n";
1303 }
else if (fAttrActivations[
direction * 3] ==
"Softsign") {
1304 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1305 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " <<
OpName <<
"_output_gate[i] / (1. + abs("
1306 <<
OpName <<
"_output_gate[i]));\n";
1307 out << SP << SP <<
"}\n";
1309 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1310 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = log(1. + exp(" <<
OpName <<
"_output_gate[i]));\n";
1311 out << SP << SP <<
"}\n";
1315 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1316 <<
size <<
", " <<
OpName <<
"_new_cell_state + offset);\n";
1318 if (fAttrClip > .0) {
1319 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1320 if (fType ==
"float") {
1321 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_new_cell_state[i] > " << -fAttrClip <<
") ? "
1322 <<
OpName <<
"_new_cell_state[i] : " << -fAttrClip <<
";\n";
1324 out << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip
1326 out << SP << SP <<
"}\n";
1329 if (fAttrActivations[
direction * 3 + 2] ==
"Relu") {
1330 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1331 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < 0.)\n";
1332 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = 0.;\n";
1333 out << SP << SP <<
"}\n";
1334 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Tanh") {
1335 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1336 if (fType ==
"float") {
1337 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_new_cell_state[i]);\n";
1339 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = (1. - ex) / (1. + ex);\n";
1340 out << SP << SP <<
"}\n";
1341 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Sigmoid") {
1342 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1343 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = 1. / (1. + exp(-" <<
OpName
1344 <<
"_new_cell_state[i]));\n";
1345 out << SP << SP <<
"}\n";
1346 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Affine") {
1347 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1348 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1349 <<
" * " <<
OpName <<
"_new_cell_state[i] + " << fAttrActivationBeta[
direction * 3 + 2] <<
";\n";
1350 out << SP << SP <<
"}\n";
1351 }
else if (fAttrActivations[
direction * 3 + 2] ==
"ScaledTanh") {
1352 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1353 if (fType ==
"float") {
1354 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3 + 2] <<
" * " <<
OpName
1355 <<
"_new_cell_state[i]);\n";
1357 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1358 <<
" * (1. - ex) / (1. + ex);\n";
1359 out << SP << SP <<
"}\n";
1360 }
else if (fAttrActivations[
direction * 3 + 2] ==
"HardSigmoid") {
1361 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1362 if (fType ==
"float") {
1363 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3 + 2] <<
" * " <<
OpName
1364 <<
"_new_cell_state[i] + " << fAttrActivationBeta[
direction * 3 + 2] <<
";\n";
1365 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1367 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = (b < 1.) ? b : 1.;\n";
1368 out << SP << SP <<
"}\n";
1369 }
else if (fAttrActivations[
direction * 3 + 2] ==
"LeakyRelu") {
1370 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1371 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < 0.)\n";
1372 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1373 <<
" * " <<
OpName <<
"_new_cell_state[i];\n";
1374 out << SP << SP <<
"}\n";
1375 }
else if (fAttrActivations[
direction * 3 + 2] ==
"ThresholdRelu") {
1376 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1377 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < " << fAttrActivationAlpha[
direction * 3 + 2]
1379 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = 0.;\n";
1380 out << SP << SP <<
"}";
1381 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Elu") {
1382 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1383 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < 0.)\n";
1384 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1385 <<
" * exp(" <<
OpName <<
"_new_cell_state[i] - 1.);\n";
1386 out << SP << SP <<
"}\n";
1387 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Softsign") {
1388 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1389 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " <<
OpName <<
"_new_cell_state[i] / (1. + abs("
1390 <<
OpName <<
"_new_cell_state[i]));\n";
1391 out << SP << SP <<
"}\n";
1393 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1394 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = log(1. + exp(" <<
OpName
1395 <<
"_new_cell_state[i]));\n";
1396 out << SP << SP <<
"}\n";
1400 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1401 out << SP << SP << SP <<
OpName <<
"_hidden_state[i] = " <<
OpName <<
"_output_gate[i] * " <<
OpName
1402 <<
"_new_cell_state[i];\n";
1403 out << SP << SP <<
"}\n";
1408 if (!fNSequence_lens.empty()) {
1409 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
1410 out << SP << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1411 out << SP << SP << SP <<
"if (seq >= tensor_" << fNSequence_lens <<
"[batch]) {\n";
1413 out << SP << SP << SP << SP << SP <<
"for (size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
1414 out << SP << SP << SP << SP << SP << SP <<
"size_t idx = seq * "
1416 <<
" + batch * " << fAttrHiddenSize <<
" + h;\n";
1417 out << SP << SP << SP << SP << SP << SP <<
OpName <<
"_cell_state[idx] = 0.;\n";
1418 out << SP << SP << SP << SP << SP << SP <<
OpName <<
"_hidden_state[idx] = 0.;\n";
1419 out << SP << SP << SP << SP << SP <<
"}\n";
1421 out << SP << SP << SP <<
"}\n";
1422 out << SP << SP <<
"}\n";
1427 if (fAttrLayout == 0) {
1428 if (!fNY_h.empty()) {
1430 if (fNSequence_lens.empty()) {
1432 if (fAttrDirection ==
"backward") {
1433 out << SP <<
"std::copy(" <<
OpName <<
"_hidden_state, " <<
OpName <<
"_hidden_state + " <<
y_h_size
1434 <<
", tensor_" << fNY_h <<
");\n";
1437 out << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + " <<
offset <<
", " <<
OpName
1438 <<
"_hidden_state + " <<
offset <<
" + " <<
y_h_size <<
", tensor_" << fNY_h <<
");\n";
1442 <<
"_hidden_state + " << 2 *
y_h_size <<
", tensor_" << fNY_h <<
" + " <<
y_h_size <<
");\n";
1445 if (fAttrDirection ==
"backward") {
1446 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1447 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1448 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1449 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + offset);\n";
1452 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1453 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1455 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1456 out << SP << SP <<
"size_t y_h_offset = batch * " << fAttrHiddenSize <<
";\n";
1457 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1458 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1462 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1463 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1465 out << SP << SP <<
"size_t y_h_offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * "
1466 << fAttrHiddenSize <<
";\n";
1467 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1468 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1473 if (!fNY_c.empty()) {
1475 if (fNSequence_lens.empty()) {
1477 if (fAttrDirection ==
"backward") {
1478 out << SP <<
"std::copy(" <<
OpName <<
"_cell_state, " <<
OpName <<
"_hidden_state + " <<
y_h_size
1479 <<
", tensor_" << fNY_c <<
");\n";
1482 out << SP <<
"std::copy(" <<
OpName <<
"_cell_state + " <<
offset <<
", " <<
OpName <<
"_cell_state + "
1483 <<
offset <<
" + " <<
y_h_size <<
", tensor_" << fNY_c <<
");\n";
1486 out << SP <<
"std::copy(" <<
OpName <<
"_cell_state + " <<
y_h_size <<
", " <<
OpName <<
"_cell_state + "
1490 if (fAttrDirection ==
"backward") {
1491 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1492 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1493 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName
1494 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + offset);\n";
1497 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1498 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1500 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1501 out << SP << SP <<
"size_t y_h_offset = batch * " << fAttrHiddenSize <<
";\n";
1502 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName
1503 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1507 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1508 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1510 out << SP << SP <<
"size_t y_h_offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * "
1511 << fAttrHiddenSize <<
";\n";
1512 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName
1513 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1522 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
1523 out << SP << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1528 out << SP << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1529 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY <<
" + y_offset);\n";
1530 out << SP << SP <<
"}\n";
1534 if (!fNY_h.empty()) {
1536 if (fAttrDirection ==
"backward") {
1537 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1538 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1539 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1540 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1541 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1544 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1545 if (fNSequence_lens.empty()) {
1546 out << SP << SP <<
"size_t seq = " <<
seq_length - 1 <<
";\n";
1548 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1551 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1552 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1553 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1554 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1558 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1559 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1561 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
" + "
1562 << fAttrHiddenSize <<
";\n";
1563 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1564 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1569 if (!fNY_c.empty()) {
1571 if (fAttrDirection ==
"backward") {
1572 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1573 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1574 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1575 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1576 << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1579 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1580 if (fNSequence_lens.empty()) {
1581 out << SP << SP <<
"size_t seq = " <<
seq_length - 1 <<
";\n";
1583 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1586 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1587 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1588 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1589 << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1593 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1594 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1596 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
" + "
1597 << fAttrHiddenSize <<
";\n";
1598 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1599 << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
ROperator_LSTM(std::vector< float > activation_alpha, std::vector< float > activation_beta, std::vector< std::string > activations, float clip, std::string direction, size_t hidden_size, size_t input_forget, size_t layout, std::string nameX, std::string nameW, std::string nameR, std::string nameB, std::string nameSequence_lens, std::string nameInitial_h, std::string nameInitial_c, std::string nameP, std::string nameY, std::string nameY_h, std::string nameY_c)
Constructor of ROperator_LSTM from the attributes.