208 fUseSession = model.UseSession();
210 if (!model.CheckIfTensorAlreadyExist(fNX)) {
211 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNX +
" is not found in model.");
213 fShapeX = model.GetTensorShape(fNX);
214 if (fShapeX.size() != 3) {
215 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNX +
" is not of 3 dimensions.");
217 if (!model.CheckIfTensorAlreadyExist(fNW)) {
218 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNW +
" is not found in model.");
220 fShapeW = model.GetTensorShape(fNW);
221 if (fShapeW.size() != 3) {
222 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNW +
" is not of 3 dimensions.");
224 if (!model.CheckIfTensorAlreadyExist(fNR)) {
225 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNR +
" is not found in model.");
227 fShapeR = model.GetTensorShape(fNR);
228 if (fShapeR.size() != 3) {
229 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNR +
" is not of 3 dimensions.");
232 if (!model.CheckIfTensorAlreadyExist(fNB)) {
233 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNB +
" is not found in model.");
235 fShapeB = model.GetTensorShape(fNB);
236 if (fShapeB.size() != 2 && fShapeB.size() != 5) {
237 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNB +
" is not of 2 or 5 dimensions.");
239 if (fShapeB.size() == 2) {
243 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
244 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
245 if (fType ==
"float") {
249 std::vector<float>
sum(fAttrHiddenSize);
252 for (
size_t h = 0;
h < fAttrHiddenSize;
h++) {
268 fShapeB = model.GetTensorShape(fNB);
272 if (!fNSequence_lens.empty()) {
273 if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) {
274 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNSequence_lens +
"is not found in model.");
276 fShapeSequence_lens = model.GetTensorShape(fNSequence_lens);
277 if (fShapeSequence_lens.size() != 1) {
278 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNSequence_lens +
" is not of 1 dimension.");
281 if (!fNInitial_h.empty()) {
282 if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) {
283 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_h +
" is not found in model.");
285 fShapeInitial_h = model.GetTensorShape(fNInitial_h);
286 if (fShapeInitial_h.size() != 3) {
287 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_h +
" is not of 3 dimensions.");
290 if (!fNInitial_c.empty()) {
291 if (!model.CheckIfTensorAlreadyExist(fNInitial_c)) {
292 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_c +
" is not found in model.");
294 fShapeInitial_c = model.GetTensorShape(fNInitial_c);
295 if (fShapeInitial_c.size() != 3) {
296 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNInitial_c +
" is not of 3 dimensions.");
300 if (!model.CheckIfTensorAlreadyExist(fNP)) {
301 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNP +
" is not found in model.");
303 fShapeP = model.GetTensorShape(fNP);
304 if (fShapeP.size() != 2 && fShapeP.size() != 4) {
305 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNP +
" is not of 2 or 4 dimensions.");
307 if (fShapeP.size() == 2) {
311 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
312 if (fType ==
"float") {
326 std::shared_ptr<void>
new_p_ptr(
new_p, std::default_delete<
float[]>());
328 fShapeP = model.GetTensorShape(fNP);
333 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
334 if (!model.CheckIfTensorAlreadyExist(fNY)) {
335 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
338 if (!fNY_h.empty()) {
339 fShapeY_h = ShapeInference({fShapeX, fShapeW})[1];
340 if (!model.CheckIfTensorAlreadyExist(fNY_h)) {
341 model.AddIntermediateTensor(fNY_h, model.GetTensorType(fNX), fShapeY_h);
344 if (!fNY_c.empty()) {
345 fShapeY_c = ShapeInference({fShapeX, fShapeW})[2];
346 if (!model.CheckIfTensorAlreadyExist(fNY_c)) {
347 model.AddIntermediateTensor(fNY_c, model.GetTensorType(fNX), fShapeY_c);
355 throw std::runtime_error(
"TMVA SOFIE - Activation function " +
activation +
" not implemented");
358 if (fAttrDirection !=
"forward" && fAttrDirection !=
"backward" && fAttrDirection !=
"bidirectional") {
359 throw std::runtime_error(
"TMVA SOFIE - Invalid LSTM direction fAttrDirection = " + fAttrDirection);
361 if (4 * fAttrHiddenSize != fShapeW[1]) {
362 throw std::runtime_error(
"TMVA SOFIE - fAttrHiddenSize must be equal to " + std::to_string(fShapeW[1] / 4));
364 if (fAttrInputForget > 1) {
365 throw std::runtime_error(
"TMVA SOFIE - fAttrInputForget = " + std::to_string(fAttrInputForget) +
368 if (fAttrLayout > 1) {
369 throw std::runtime_error(
"TMVA SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) +
370 " must be 0 (timewise) or 1 (batchwise)");
372 if (fAttrActivations.empty()) {
373 if (fAttrDirection ==
"bidirectional") {
374 fAttrActivations = {
"Sigmoid",
"Tanh",
"Tanh",
"Sigmoid",
"Tanh",
"Tanh"};
376 fAttrActivations = {
"Sigmoid",
"Tanh",
"Tanh"};
460 std::stringstream out;
462 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
463 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
468 if (fAttrLayout == 0) {
469 out << SP << fType <<
" const *" <<
OpName <<
"_input = tensor_" << fNX <<
";\n";
472 out << SP << fType <<
" * " <<
OpName <<
"_input = this->fVec_" <<
OpName <<
"_input;\n";
476 out << SP <<
"for(size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
477 out << SP << SP <<
"for(size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
478 out << SP << SP << SP <<
"for(size_t i = 0; i < " <<
input_size <<
"; i++) {\n";
482 out << SP << SP << SP <<
"}\n";
483 out << SP << SP <<
"}\n";
488 if (!fNInitial_h.empty()) {
489 if (fAttrLayout == 0) {
490 out << SP << fType <<
" const*" <<
OpName <<
"_initial_hidden_state = " <<
" tensor_" << fNInitial_h <<
";\n";
493 out << SP << fType <<
" const* " <<
OpName <<
"_initial_hidden_state = this->fVec_" <<
OpName
494 <<
"_initial_hidden_state;\n";
496 out << SP << fType <<
" " <<
OpName <<
"_initial_hidden_state["
500 out << SP <<
"for(size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
501 out << SP << SP <<
"for(size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
503 <<
" + batch * " << fAttrHiddenSize <<
" + h] = tensor_" << fNInitial_h <<
"[batch * "
505 out << SP << SP <<
"}\n";
512 if (!fNInitial_c.empty()) {
513 if (fAttrLayout == 0) {
514 out << SP << fType <<
" const*" <<
OpName <<
"_initial_cell_state = " <<
" tensor_" << fNInitial_c <<
";\n";
517 out << SP << fType <<
" const* " <<
OpName <<
"_initial_cell_state = this->fVec_" <<
OpName
518 <<
"_initial_cell_state;\n";
520 out << SP << fType <<
" " <<
OpName <<
"_initial_cell_state["
524 out << SP <<
"for(size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
525 out << SP << SP <<
"for(size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
527 <<
" + batch * " << fAttrHiddenSize <<
" + h] = tensor_" << fNInitial_c <<
"[batch * "
529 out << SP << SP <<
"}\n";
538 out << SP << fType <<
" * " <<
OpName <<
"_ff_input_gate = this->fVec_" <<
OpName <<
"_ff_input_gate;\n";
539 out << SP << fType <<
" * " <<
OpName <<
"_ff_output_gate = this->fVec_" <<
OpName <<
"_ff_output_gate;\n";
540 out << SP << fType <<
" * " <<
OpName <<
"_ff_cell_gate = this->fVec_" <<
OpName <<
"_ff_cell_gate;\n";
541 if (fAttrInputForget == 0) {
542 out << SP << fType <<
" * " <<
OpName <<
"_ff_forget_gate = this->fVec_" <<
OpName
543 <<
"_ff_forget_gate;\n";
546 out << SP << fType <<
" " <<
OpName <<
"_ff_input_gate[" <<
ff_size <<
"] = {0};\n";
547 out << SP << fType <<
" " <<
OpName <<
"_ff_output_gate[" <<
ff_size <<
"] = {0};\n";
548 out << SP << fType <<
" " <<
OpName <<
"_ff_cell_gate[" <<
ff_size <<
"] = {0};\n";
549 if (fAttrInputForget == 0) {
550 out << SP << fType <<
" " <<
OpName <<
"_ff_forget_gate[" <<
ff_size <<
"] = {0};\n";
556 out << SP << fType <<
" * " <<
OpName <<
"_input_gate = this->fVec_" <<
OpName <<
"_input_gate;\n";
557 out << SP << fType <<
" * " <<
OpName <<
"_output_gate = this->fVec_" <<
OpName <<
"_output_gate;\n";
558 out << SP << fType <<
" * " <<
OpName <<
"_cell_gate = this->fVec_" <<
OpName <<
"_cell_gate;\n";
559 if (fAttrInputForget == 0) {
560 out << SP << fType <<
" * " <<
OpName <<
"_forget_gate = this->fVec_" <<
OpName <<
"_forget_gate;\n";
566 if (fAttrInputForget == 0) {
572 out << SP << fType <<
" * " <<
OpName <<
"_cell_state = this->fVec_" <<
OpName <<
"_cell_state;\n";
573 out << SP << fType <<
" * " <<
OpName <<
"_new_cell_state = this->fVec_" <<
OpName <<
"_new_cell_state;\n";
580 if (fAttrLayout == 0 && !fNY.empty()) {
581 out << SP << fType <<
" *" <<
OpName <<
"_hidden_state = tensor_" << fNY <<
";\n";
584 out << SP << fType <<
" * " <<
OpName <<
"_hidden_state = this->fVec_" <<
OpName <<
"_hidden_state;\n";
590 out << SP <<
"char " <<
OpName <<
"_transA = 'N';\n";
591 out << SP <<
"char " <<
OpName <<
"_transB = 'T';\n";
593 out << SP <<
"int " <<
OpName <<
"_n = " << fAttrHiddenSize <<
";\n";
595 if (fType ==
"float") {
596 out << SP << fType <<
" " <<
OpName <<
"_alpha = 1.;\n";
597 out << SP << fType <<
" " <<
OpName <<
"_beta = 0.;\n";
601 out << SP <<
"int " <<
OpName <<
"_incx = 1;\n";
602 out << SP <<
"int " <<
OpName <<
"_incy = 1;\n";
606 std::stringstream
ss;
608 <<
"_m, &" <<
OpName <<
"_k, &" <<
OpName <<
"_alpha, tensor_" << fNW;
620 if (fType ==
"float") {
631 if (fType ==
"float") {
642 if (fAttrInputForget == 0) {
645 if (fType ==
"float") {
650 if (fType ==
"float") {
660 if (fType ==
"float") {
662 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
", &"
666 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
671 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
676 if (fType ==
"float") {
679 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
685 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
691 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB <<
" + "
696 if (fAttrInputForget == 0) {
699 if (fType ==
"float") {
701 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB
706 if (fType ==
"float") {
709 out << SP <<
"BLAS::saxpy_(&" <<
OpName <<
"_bias_size, &" <<
OpName <<
"_alpha, tensor_" << fNB
719 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
720 out << SP << SP <<
"size_t ff_offset = seq * " <<
batch_size * fAttrHiddenSize <<
";\n";
728 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_input_gate + ff_offset, " <<
OpName
729 <<
"_ff_input_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_input_gate + gate_offset);\n";
730 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_output_gate + ff_offset, " <<
OpName
731 <<
"_ff_output_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_output_gate + gate_offset);\n";
732 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_cell_gate + ff_offset, " <<
OpName
733 <<
"_ff_cell_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_cell_gate + gate_offset);\n";
734 if (fAttrInputForget == 0) {
735 out << SP << SP <<
"std::copy(" <<
OpName <<
"_ff_forget_gate + ff_offset, " <<
OpName
736 <<
"_ff_forget_gate + ff_offset + " <<
ff_seq_size <<
", " <<
OpName <<
"_forget_gate + gate_offset);\n";
740 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
741 if (fAttrDirection ==
"backward" ||
direction == 1) {
742 out << SP << SP <<
"size_t index = " <<
seq_length - 1 <<
" - seq;\n";
744 out << SP << SP <<
"size_t index = seq;\n";
746 out << SP << SP <<
"int m2 = " <<
batch_size <<
";\n";
755 out << SP << SP <<
"if (seq == 0) {\n";
756 if (!fNInitial_h.empty()) {
758 if (fType ==
"float") {
759 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
760 <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
", &" <<
OpName
761 <<
"_n, " <<
OpName <<
"_initial_hidden_state, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, "
762 <<
OpName <<
"_input_gate + offset, &" <<
OpName <<
"_n);\n";
763 size_t ro_offset = fAttrHiddenSize * fAttrHiddenSize;
764 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
767 <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
768 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
769 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
772 <<
"_alpha, " <<
OpName <<
"_cell_gate + offset, &" <<
OpName <<
"_n);\n";
773 if (fAttrInputForget == 0) {
774 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
775 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &"
776 <<
OpName <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
" + "
778 <<
"_n, &" <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
782 if (fType ==
"float") {
783 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
784 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
787 <<
"_alpha, " <<
OpName <<
"_input_gate + offset, &" <<
OpName <<
"_n);\n";
788 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 1 * fAttrHiddenSize * fAttrHiddenSize;
789 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
792 <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
793 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
794 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
797 <<
"_alpha, " <<
OpName <<
"_cell_gate + offset, &" <<
OpName <<
"_n);\n";
798 if (fAttrInputForget == 0) {
799 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
800 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &"
801 <<
OpName <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
" + "
803 <<
"_n, &" <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
808 out << SP << SP <<
"} else {\n";
811 if (fAttrDirection ==
"backward") {
812 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
815 out << SP << SP << SP <<
"size_t previous_offset = (seq - 1) * "
818 if (fType ==
"float") {
819 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
820 <<
"_n, &m2, &" <<
OpName <<
"_n, &" <<
OpName <<
"_alpha, tensor_" << fNR <<
", &" <<
OpName <<
"_n, "
822 <<
"_input_gate + offset, &" <<
OpName <<
"_n);\n";
823 size_t ro_offset = 1 * fAttrHiddenSize * fAttrHiddenSize;
824 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
826 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
827 <<
OpName <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
828 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
829 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
831 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
833 if (fAttrInputForget == 0) {
834 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
835 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
837 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
838 <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
842 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
844 if (fType ==
"float") {
845 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
846 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
848 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
850 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + fAttrHiddenSize * fAttrHiddenSize;
851 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
853 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
854 <<
OpName <<
"_alpha, " <<
OpName <<
"_output_gate + offset, &" <<
OpName <<
"_n);\n";
855 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
856 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
858 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
860 if (fAttrInputForget == 0) {
861 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
862 out << SP << SP << SP <<
"BLAS::sgemm_(&" <<
OpName <<
"_transB, &" <<
OpName <<
"_transA, &" <<
OpName
864 <<
", &" <<
OpName <<
"_n, " <<
OpName <<
"_hidden_state + previous_offset, &" <<
OpName <<
"_n, &"
865 <<
OpName <<
"_alpha, " <<
OpName <<
"_forget_gate + offset, &" <<
OpName <<
"_n);\n";
869 out << SP << SP <<
"}\n";
872 if (fAttrClip > .0) {
873 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
874 if (fType ==
"float") {
875 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_cell_gate[i] > " << -fAttrClip <<
") ? " <<
OpName
876 <<
"_cell_gate[i] : " << -fAttrClip <<
";\n";
878 out << SP << SP << SP <<
OpName <<
"_cell_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip <<
";\n";
879 out << SP << SP <<
"}\n";
882 if (fAttrActivations[
direction * 3 + 1] ==
"Relu") {
883 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
884 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < 0.)\n";
885 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = 0.;\n";
886 out << SP << SP <<
"}\n";
887 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Tanh") {
888 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
889 if (fType ==
"float") {
890 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_cell_gate[i]);\n";
892 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = (1. - ex) / (1. + ex);\n";
893 out << SP << SP <<
"}\n";
894 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Sigmoid") {
895 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
896 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = 1. / (1. + exp(-" <<
OpName <<
"_cell_gate[i]));\n";
897 out << SP << SP <<
"}\n";
898 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Affine") {
899 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
900 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1] <<
" * "
901 <<
OpName <<
"_cell_gate[i] + " << fAttrActivationBeta[
direction * 3 + 1] <<
";\n";
902 out << SP << SP <<
"}\n";
903 }
else if (fAttrActivations[
direction * 3 + 1] ==
"ScaledTanh") {
904 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
905 if (fType ==
"float") {
906 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3 + 1] <<
" * " <<
OpName
907 <<
"_cell_gate[i]);\n";
909 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1]
910 <<
" * (1. - ex) / (1. + ex);\n";
911 out << SP << SP <<
"}\n";
912 }
else if (fAttrActivations[
direction * 3 + 1] ==
"HardSigmoid") {
913 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
914 if (fType ==
"float") {
915 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3 + 1] <<
" * " <<
OpName
916 <<
"_cell_gate[i] + " << fAttrActivationBeta[
direction * 3 + 1] <<
";\n";
917 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
919 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = (b < 1.) ? b : 1.;\n";
920 out << SP << SP <<
"}\n";
921 }
else if (fAttrActivations[
direction * 3 + 1] ==
"LeakyRelu") {
922 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
923 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < 0.)\n";
924 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1] <<
" * "
925 <<
OpName <<
"_cell_gate[i];\n";
926 out << SP << SP <<
"}\n";
927 }
else if (fAttrActivations[
direction * 3 + 1] ==
"ThresholdRelu") {
928 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
929 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < " << fAttrActivationAlpha[
direction * 3 + 1]
931 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = 0.;\n";
932 out << SP << SP <<
"}";
933 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Elu") {
934 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
935 out << SP << SP << SP <<
"if (" <<
OpName <<
"_cell_gate[i] < 0.)\n";
936 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " << fAttrActivationAlpha[
direction * 3 + 1]
937 <<
" * exp(" <<
OpName <<
"_cell_gate[i] - 1.);\n";
938 out << SP << SP <<
"}\n";
939 }
else if (fAttrActivations[
direction * 3 + 1] ==
"Softsign") {
940 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
941 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = " <<
OpName <<
"_cell_gate[i] / (1. + abs(" <<
OpName
942 <<
"_cell_gate[i]));\n";
943 out << SP << SP <<
"}\n";
945 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
946 out << SP << SP << SP << SP <<
OpName <<
"_cell_gate[i] = log(1. + exp(" <<
OpName <<
"_cell_gate[i]));\n";
947 out << SP << SP <<
"}\n";
953 out << SP << SP <<
"if (seq == 0) {\n";
954 if (!fNInitial_c.empty()) {
956 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
957 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i] * "
958 <<
OpName <<
"_initial_cell_state[i];\n";
959 out << SP << SP << SP <<
"}\n";
960 if (fAttrInputForget == 0) {
962 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
963 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
965 out << SP << SP << SP <<
"}\n";
970 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
971 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i + "
973 out << SP << SP << SP <<
"}\n";
974 if (fAttrInputForget == 0) {
976 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
977 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
979 out << SP << SP << SP <<
"}\n";
983 out << SP << SP <<
"} else {\n";
985 if (fAttrDirection ==
"backward") {
986 out << SP << SP << SP <<
"size_t c_offset = (index + 1) * "
989 out << SP << SP << SP <<
"size_t c_offset = (seq - 1) * "
992 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
993 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i] * " <<
OpName
994 <<
"_cell_state[i + c_offset];\n";
995 out << SP << SP << SP <<
"}\n";
996 if (fAttrInputForget == 0) {
998 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
999 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
1001 out << SP << SP << SP <<
"}\n";
1006 <<
" + " <<
batch_size * fAttrHiddenSize <<
";\n";
1007 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1008 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i + offset] += tensor_" << fNP <<
"[i + " <<
pi_offset
1009 <<
"] * " <<
OpName <<
"_cell_state[i + c_offset];\n";
1010 out << SP << SP << SP <<
"}\n";
1011 if (fAttrInputForget == 0) {
1013 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1014 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i + offset] += tensor_" << fNP <<
"[i + "
1016 out << SP << SP << SP <<
"}\n";
1019 out << SP << SP <<
"}\n";
1023 if (fAttrClip > .0) {
1024 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1025 if (fType ==
"float") {
1026 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_input_gate[i] > " << -fAttrClip <<
") ? " <<
OpName
1027 <<
"_input_gate[i] : " << -fAttrClip <<
";\n";
1029 out << SP << SP << SP <<
OpName <<
"_input_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip <<
";\n";
1030 out << SP << SP <<
"}\n";
1033 if (fAttrActivations[
direction * 3] ==
"Relu") {
1034 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1035 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < 0.)\n";
1036 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = 0.;\n";
1037 out << SP << SP <<
"}\n";
1038 }
else if (fAttrActivations[
direction * 3] ==
"Tanh") {
1039 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1040 if (fType ==
"float") {
1041 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_input_gate[i]);\n";
1043 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = (1. - ex) / (1. + ex);\n";
1044 out << SP << SP <<
"}\n";
1045 }
else if (fAttrActivations[
direction * 3] ==
"Sigmoid") {
1046 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1047 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = 1. / (1. + exp(-" <<
OpName
1048 <<
"_input_gate[i]));\n";
1049 out << SP << SP <<
"}\n";
1050 }
else if (fAttrActivations[
direction * 3] ==
"Affine") {
1051 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1052 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1053 <<
OpName <<
"_input_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1054 out << SP << SP <<
"}\n";
1055 }
else if (fAttrActivations[
direction * 3] ==
"ScaledTanh") {
1056 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1057 if (fType ==
"float") {
1058 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3] <<
" * " <<
OpName
1059 <<
"_input_gate[i]);\n";
1061 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1062 <<
" * (1. - ex) / (1. + ex);\n";
1063 out << SP << SP <<
"}\n";
1064 }
else if (fAttrActivations[
direction * 3] ==
"HardSigmoid") {
1065 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1066 if (fType ==
"float") {
1067 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3] <<
" * " <<
OpName
1068 <<
"_input_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1069 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1071 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = (b < 1.) ? b : 1.;\n";
1072 out << SP << SP <<
"}\n";
1073 }
else if (fAttrActivations[
direction * 3] ==
"LeakyRelu") {
1074 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1075 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < 0.)\n";
1076 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1077 <<
OpName <<
"_input_gate[i];\n";
1078 out << SP << SP <<
"}\n";
1079 }
else if (fAttrActivations[
direction * 3] ==
"ThresholdRelu") {
1080 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1081 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < " << fAttrActivationAlpha[
direction * 3]
1083 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = 0.;\n";
1084 out << SP << SP <<
"}";
1085 }
else if (fAttrActivations[
direction * 3] ==
"Elu") {
1086 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1087 out << SP << SP << SP <<
"if (" <<
OpName <<
"_input_gate[i] < 0.)\n";
1088 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1089 <<
" * exp(" <<
OpName <<
"_input_gate[i] - 1.);\n";
1090 out << SP << SP <<
"}\n";
1091 }
else if (fAttrActivations[
direction * 3] ==
"Softsign") {
1092 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1093 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = " <<
OpName <<
"_input_gate[i] / (1. + abs("
1094 <<
OpName <<
"_input_gate[i]));\n";
1095 out << SP << SP <<
"}\n";
1097 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1098 out << SP << SP << SP << SP <<
OpName <<
"_input_gate[i] = log(1. + exp(" <<
OpName <<
"_input_gate[i]));\n";
1099 out << SP << SP <<
"}\n";
1102 if (fAttrInputForget == 0) {
1104 if (fAttrClip > .0) {
1105 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1106 if (fType ==
"float") {
1107 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_forget_gate[i] > " << -fAttrClip <<
") ? "
1108 <<
OpName <<
"_forget_gate[i] : " << -fAttrClip <<
";\n";
1110 out << SP << SP << SP <<
OpName <<
"_forget_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip
1112 out << SP << SP <<
"}\n";
1115 if (fAttrActivations[
direction * 3] ==
"Relu") {
1116 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1117 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < 0.)\n";
1118 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = 0.;\n";
1119 out << SP << SP <<
"}\n";
1120 }
else if (fAttrActivations[
direction * 3] ==
"Tanh") {
1121 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1122 if (fType ==
"float") {
1123 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_forget_gate[i]);\n";
1125 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = (1. - ex) / (1. + ex);\n";
1126 out << SP << SP <<
"}\n";
1127 }
else if (fAttrActivations[
direction * 3] ==
"Sigmoid") {
1128 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1129 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = 1. / (1. + exp(-" <<
OpName
1130 <<
"_forget_gate[i]));\n";
1131 out << SP << SP <<
"}\n";
1132 }
else if (fAttrActivations[
direction * 3] ==
"Affine") {
1133 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1134 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1135 <<
" * " <<
OpName <<
"_forget_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1136 out << SP << SP <<
"}\n";
1137 }
else if (fAttrActivations[
direction * 3] ==
"ScaledTanh") {
1138 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1139 if (fType ==
"float") {
1140 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3] <<
" * " <<
OpName
1141 <<
"_forget_gate[i]);\n";
1143 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1144 <<
" * (1. - ex) / (1. + ex);\n";
1145 out << SP << SP <<
"}\n";
1146 }
else if (fAttrActivations[
direction * 3] ==
"HardSigmoid") {
1147 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1148 if (fType ==
"float") {
1149 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3] <<
" * " <<
OpName
1150 <<
"_forget_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1151 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1153 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = (b < 1.) ? b : 1.;\n";
1154 out << SP << SP <<
"}\n";
1155 }
else if (fAttrActivations[
direction * 3] ==
"LeakyRelu") {
1156 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1157 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < 0.)\n";
1158 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1159 <<
" * " <<
OpName <<
"_forget_gate[i];\n";
1160 out << SP << SP <<
"}\n";
1161 }
else if (fAttrActivations[
direction * 3] ==
"ThresholdRelu") {
1162 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1163 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < " << fAttrActivationAlpha[
direction * 3]
1165 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = 0.;\n";
1166 out << SP << SP <<
"}";
1167 }
else if (fAttrActivations[
direction * 3] ==
"Elu") {
1168 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1169 out << SP << SP << SP <<
"if (" <<
OpName <<
"_forget_gate[i] < 0.)\n";
1170 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1171 <<
" * exp(" <<
OpName <<
"_forget_gate[i] - 1.);\n";
1172 out << SP << SP <<
"}\n";
1173 }
else if (fAttrActivations[
direction * 3] ==
"Softsign") {
1174 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1175 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = " <<
OpName <<
"_forget_gate[i] / (1. + abs("
1176 <<
OpName <<
"_forget_gate[i]));\n";
1177 out << SP << SP <<
"}\n";
1179 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1180 out << SP << SP << SP << SP <<
OpName <<
"_forget_gate[i] = log(1. + exp(" <<
OpName
1181 <<
"_forget_gate[i]));\n";
1182 out << SP << SP <<
"}\n";
1187 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1188 out << SP << SP << SP <<
OpName <<
"_cell_state[i] = " <<
OpName <<
"_input_gate[i] * " <<
OpName
1189 <<
"_cell_gate[i];\n";
1190 out << SP << SP <<
"}\n";
1192 if (fAttrInputForget == 0) {
1193 out << SP << SP <<
"if (seq == 0) {\n";
1194 if (!fNInitial_c.empty()) {
1196 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1197 out << SP << SP << SP << SP <<
OpName <<
"_cell_state[i + offset] += " <<
OpName
1198 <<
"_forget_gate[i + offset] * " <<
OpName <<
"_initial_cell_state[i];\n";
1199 out << SP << SP << SP <<
"}\n";
1201 out << SP << SP <<
"} else {\n";
1204 if (fAttrDirection ==
"backward") {
1205 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
1208 out << SP << SP << SP <<
"size_t previous_offset = (seq - 1) * "
1212 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
1215 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1216 out << SP << SP << SP << SP <<
OpName <<
"_cell_state[i + offset] += " <<
OpName
1217 <<
"_forget_gate[i + offset] * " <<
OpName <<
"_cell_state[i + previous_offset];\n";
1218 out << SP << SP << SP <<
"}\n";
1219 out << SP << SP <<
"}\n";
1226 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1227 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i + offset] += tensor_" << fNP <<
"[i + " <<
p_offset
1228 <<
"] * " <<
OpName <<
"_cell_state[i + offset];\n";
1229 out << SP << SP << SP <<
"}\n";
1232 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1233 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i + offset] += tensor_" << fNP <<
"[i + " <<
p_offset
1234 <<
"] * " <<
OpName <<
"_cell_state[i + offset];\n";
1235 out << SP << SP << SP <<
"}\n";
1240 if (fAttrClip > .0) {
1241 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1242 if (fType ==
"float") {
1243 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_output_gate[i] > " << -fAttrClip <<
") ? " <<
OpName
1244 <<
"_output_gate[i] : " << -fAttrClip <<
";\n";
1246 out << SP << SP << SP <<
OpName <<
"_output_gate[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip <<
";\n";
1247 out << SP << SP <<
"}\n";
1250 if (fAttrActivations[
direction * 3] ==
"Relu") {
1251 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1252 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < 0.)\n";
1253 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = 0.;\n";
1254 out << SP << SP <<
"}\n";
1255 }
else if (fAttrActivations[
direction * 3] ==
"Tanh") {
1256 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1257 if (fType ==
"float") {
1258 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_output_gate[i]);\n";
1260 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = (1. - ex) / (1. + ex);\n";
1261 out << SP << SP <<
"}\n";
1262 }
else if (fAttrActivations[
direction * 3] ==
"Sigmoid") {
1263 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1264 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = 1. / (1. + exp(-" <<
OpName
1265 <<
"_output_gate[i]));\n";
1266 out << SP << SP <<
"}\n";
1267 }
else if (fAttrActivations[
direction * 3] ==
"Affine") {
1268 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1269 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1270 <<
OpName <<
"_output_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1271 out << SP << SP <<
"}\n";
1272 }
else if (fAttrActivations[
direction * 3] ==
"ScaledTanh") {
1273 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1274 if (fType ==
"float") {
1275 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3] <<
" * " <<
OpName
1276 <<
"_output_gate[i]);\n";
1278 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1279 <<
" * (1. - ex) / (1. + ex);\n";
1280 out << SP << SP <<
"}\n";
1281 }
else if (fAttrActivations[
direction * 3] ==
"HardSigmoid") {
1282 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1283 if (fType ==
"float") {
1284 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3] <<
" * " <<
OpName
1285 <<
"_output_gate[i] + " << fAttrActivationBeta[
direction * 3] <<
";\n";
1286 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1288 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = (b < 1.) ? b : 1.;\n";
1289 out << SP << SP <<
"}\n";
1290 }
else if (fAttrActivations[
direction * 3] ==
"LeakyRelu") {
1291 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1292 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < 0.)\n";
1293 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3] <<
" * "
1294 <<
OpName <<
"_output_gate[i];\n";
1295 out << SP << SP <<
"}\n";
1296 }
else if (fAttrActivations[
direction * 3] ==
"ThresholdRelu") {
1297 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1298 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < " << fAttrActivationAlpha[
direction * 3]
1300 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = 0.;\n";
1301 out << SP << SP <<
"}";
1302 }
else if (fAttrActivations[
direction * 3] ==
"Elu") {
1303 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1304 out << SP << SP << SP <<
"if (" <<
OpName <<
"_output_gate[i] < 0.)\n";
1305 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " << fAttrActivationAlpha[
direction * 3]
1306 <<
" * exp(" <<
OpName <<
"_output_gate[i] - 1.);\n";
1307 out << SP << SP <<
"}\n";
1308 }
else if (fAttrActivations[
direction * 3] ==
"Softsign") {
1309 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1310 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = " <<
OpName <<
"_output_gate[i] / (1. + abs("
1311 <<
OpName <<
"_output_gate[i]));\n";
1312 out << SP << SP <<
"}\n";
1314 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1315 out << SP << SP << SP << SP <<
OpName <<
"_output_gate[i] = log(1. + exp(" <<
OpName <<
"_output_gate[i]));\n";
1316 out << SP << SP <<
"}\n";
1320 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1321 <<
size <<
", " <<
OpName <<
"_new_cell_state + offset);\n";
1323 if (fAttrClip > .0) {
1324 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1325 if (fType ==
"float") {
1326 out << SP << SP << SP <<
"float x = (" <<
OpName <<
"_new_cell_state[i] > " << -fAttrClip <<
") ? "
1327 <<
OpName <<
"_new_cell_state[i] : " << -fAttrClip <<
";\n";
1329 out << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = (x < " << fAttrClip <<
") ? x : " << fAttrClip
1331 out << SP << SP <<
"}\n";
1334 if (fAttrActivations[
direction * 3 + 2] ==
"Relu") {
1335 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1336 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < 0.)\n";
1337 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = 0.;\n";
1338 out << SP << SP <<
"}\n";
1339 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Tanh") {
1340 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1341 if (fType ==
"float") {
1342 out << SP << SP << SP <<
"float ex = exp(-2 * " <<
OpName <<
"_new_cell_state[i]);\n";
1344 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = (1. - ex) / (1. + ex);\n";
1345 out << SP << SP <<
"}\n";
1346 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Sigmoid") {
1347 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1348 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = 1. / (1. + exp(-" <<
OpName
1349 <<
"_new_cell_state[i]));\n";
1350 out << SP << SP <<
"}\n";
1351 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Affine") {
1352 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1353 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1354 <<
" * " <<
OpName <<
"_new_cell_state[i] + " << fAttrActivationBeta[
direction * 3 + 2] <<
";\n";
1355 out << SP << SP <<
"}\n";
1356 }
else if (fAttrActivations[
direction * 3 + 2] ==
"ScaledTanh") {
1357 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1358 if (fType ==
"float") {
1359 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[
direction * 3 + 2] <<
" * " <<
OpName
1360 <<
"_new_cell_state[i]);\n";
1362 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1363 <<
" * (1. - ex) / (1. + ex);\n";
1364 out << SP << SP <<
"}\n";
1365 }
else if (fAttrActivations[
direction * 3 + 2] ==
"HardSigmoid") {
1366 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1367 if (fType ==
"float") {
1368 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[
direction * 3 + 2] <<
" * " <<
OpName
1369 <<
"_new_cell_state[i] + " << fAttrActivationBeta[
direction * 3 + 2] <<
";\n";
1370 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1372 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = (b < 1.) ? b : 1.;\n";
1373 out << SP << SP <<
"}\n";
1374 }
else if (fAttrActivations[
direction * 3 + 2] ==
"LeakyRelu") {
1375 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1376 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < 0.)\n";
1377 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1378 <<
" * " <<
OpName <<
"_new_cell_state[i];\n";
1379 out << SP << SP <<
"}\n";
1380 }
else if (fAttrActivations[
direction * 3 + 2] ==
"ThresholdRelu") {
1381 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1382 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < " << fAttrActivationAlpha[
direction * 3 + 2]
1384 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = 0.;\n";
1385 out << SP << SP <<
"}";
1386 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Elu") {
1387 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1388 out << SP << SP << SP <<
"if (" <<
OpName <<
"_new_cell_state[i] < 0.)\n";
1389 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " << fAttrActivationAlpha[
direction * 3 + 2]
1390 <<
" * exp(" <<
OpName <<
"_new_cell_state[i] - 1.);\n";
1391 out << SP << SP <<
"}\n";
1392 }
else if (fAttrActivations[
direction * 3 + 2] ==
"Softsign") {
1393 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1394 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = " <<
OpName <<
"_new_cell_state[i] / (1. + abs("
1395 <<
OpName <<
"_new_cell_state[i]));\n";
1396 out << SP << SP <<
"}\n";
1398 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1399 out << SP << SP << SP << SP <<
OpName <<
"_new_cell_state[i] = log(1. + exp(" <<
OpName
1400 <<
"_new_cell_state[i]));\n";
1401 out << SP << SP <<
"}\n";
1405 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1406 out << SP << SP << SP <<
OpName <<
"_hidden_state[i] = " <<
OpName <<
"_output_gate[i] * " <<
OpName
1407 <<
"_new_cell_state[i];\n";
1408 out << SP << SP <<
"}\n";
1413 if (!fNSequence_lens.empty()) {
1414 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
1415 out << SP << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1416 out << SP << SP << SP <<
"if (seq >= tensor_" << fNSequence_lens <<
"[batch]) {\n";
1418 out << SP << SP << SP << SP << SP <<
"for (size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
1419 out << SP << SP << SP << SP << SP << SP <<
"size_t idx = seq * "
1421 <<
" + batch * " << fAttrHiddenSize <<
" + h;\n";
1422 out << SP << SP << SP << SP << SP << SP <<
OpName <<
"_cell_state[idx] = 0.;\n";
1423 out << SP << SP << SP << SP << SP << SP <<
OpName <<
"_hidden_state[idx] = 0.;\n";
1424 out << SP << SP << SP << SP << SP <<
"}\n";
1426 out << SP << SP << SP <<
"}\n";
1427 out << SP << SP <<
"}\n";
1432 if (fAttrLayout == 0) {
1433 if (!fNY_h.empty()) {
1435 if (fNSequence_lens.empty()) {
1437 if (fAttrDirection ==
"backward") {
1438 out << SP <<
"std::copy(" <<
OpName <<
"_hidden_state, " <<
OpName <<
"_hidden_state + " <<
y_h_size
1439 <<
", tensor_" << fNY_h <<
");\n";
1442 out << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + " <<
offset <<
", " <<
OpName
1443 <<
"_hidden_state + " <<
offset <<
" + " <<
y_h_size <<
", tensor_" << fNY_h <<
");\n";
1447 <<
"_hidden_state + " << 2 *
y_h_size <<
", tensor_" << fNY_h <<
" + " <<
y_h_size <<
");\n";
1450 if (fAttrDirection ==
"backward") {
1451 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1452 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1453 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1454 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + offset);\n";
1457 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1458 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1460 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1461 out << SP << SP <<
"size_t y_h_offset = batch * " << fAttrHiddenSize <<
";\n";
1462 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1463 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1467 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1468 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1470 out << SP << SP <<
"size_t y_h_offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * "
1471 << fAttrHiddenSize <<
";\n";
1472 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1473 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1478 if (!fNY_c.empty()) {
1480 if (fNSequence_lens.empty()) {
1482 if (fAttrDirection ==
"backward") {
1483 out << SP <<
"std::copy(" <<
OpName <<
"_cell_state, " <<
OpName <<
"_hidden_state + " <<
y_h_size
1484 <<
", tensor_" << fNY_c <<
");\n";
1487 out << SP <<
"std::copy(" <<
OpName <<
"_cell_state + " <<
offset <<
", " <<
OpName <<
"_cell_state + "
1488 <<
offset <<
" + " <<
y_h_size <<
", tensor_" << fNY_c <<
");\n";
1491 out << SP <<
"std::copy(" <<
OpName <<
"_cell_state + " <<
y_h_size <<
", " <<
OpName <<
"_cell_state + "
1495 if (fAttrDirection ==
"backward") {
1496 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1497 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1498 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName
1499 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + offset);\n";
1502 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1503 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1505 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1506 out << SP << SP <<
"size_t y_h_offset = batch * " << fAttrHiddenSize <<
";\n";
1507 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName
1508 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1512 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1513 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1515 out << SP << SP <<
"size_t y_h_offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * "
1516 << fAttrHiddenSize <<
";\n";
1517 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName
1518 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1527 out << SP <<
"for (size_t seq = 0; seq < " <<
seq_length <<
"; seq++) {\n";
1528 out << SP << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1533 out << SP << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1534 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY <<
" + y_offset);\n";
1535 out << SP << SP <<
"}\n";
1539 if (!fNY_h.empty()) {
1541 if (fAttrDirection ==
"backward") {
1542 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1543 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1544 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1545 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1546 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1549 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1550 if (fNSequence_lens.empty()) {
1551 out << SP << SP <<
"size_t seq = " <<
seq_length - 1 <<
";\n";
1553 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1556 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1557 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1558 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1559 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1563 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1564 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1566 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
" + "
1567 << fAttrHiddenSize <<
";\n";
1568 out << SP << SP <<
"std::copy(" <<
OpName <<
"_hidden_state + offset, " <<
OpName
1569 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1574 if (!fNY_c.empty()) {
1576 if (fAttrDirection ==
"backward") {
1577 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1578 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1579 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1580 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1581 << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1584 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1585 if (fNSequence_lens.empty()) {
1586 out << SP << SP <<
"size_t seq = " <<
seq_length - 1 <<
";\n";
1588 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1591 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1592 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
";\n";
1593 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1594 << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1598 out << SP <<
"for (size_t batch = 0; batch < " <<
batch_size <<
"; batch++) {\n";
1599 out << SP << SP <<
"size_t offset = " <<
batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
1601 out << SP << SP <<
"size_t y_h_offset = batch * " <<
num_directions * fAttrHiddenSize <<
" + "
1602 << fAttrHiddenSize <<
";\n";
1603 out << SP << SP <<
"std::copy(" <<
OpName <<
"_cell_state + offset, " <<
OpName <<
"_cell_state + offset + "
1604 << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
ROperator_LSTM(std::vector< float > activation_alpha, std::vector< float > activation_beta, std::vector< std::string > activations, float clip, std::string direction, size_t hidden_size, size_t input_forget, size_t layout, std::string nameX, std::string nameW, std::string nameR, std::string nameB, std::string nameSequence_lens, std::string nameInitial_h, std::string nameInitial_c, std::string nameP, std::string nameY, std::string nameY_h, std::string nameY_c)
Constructor of ROperator_LSTM from the attributes.