1#ifndef TMVA_SOFIE_ROPERATOR_LSTM_I
2#define TMVA_SOFIE_ROPERATOR_LSTM_I
5namespace Experimental {
10-> std::vector<ETensorType> {
17-> std::vector<std::vector<size_t>> {
18 size_t num_directions =
input[1][0];
19 size_t hidden_size =
input[1][1] / 4;
20 if (fAttrLayout == 0) {
21 size_t seq_length =
input[0][0];
22 size_t batch_size =
input[0][1];
23 std::vector<std::vector<size_t>> ret(
24 {{seq_length, num_directions, batch_size, hidden_size},
25 {num_directions, batch_size, hidden_size},
26 {num_directions, batch_size, hidden_size}});
29 size_t batch_size =
input[0][0];
30 size_t seq_length =
input[0][1];
31 std::vector<std::vector<size_t>> ret(
32 {{batch_size, seq_length, num_directions, hidden_size},
33 {batch_size, num_directions, hidden_size},
34 {batch_size, num_directions, hidden_size}});
42 fUseSession = model.UseSession();
44 if (!model.CheckIfTensorAlreadyExist(fNX)) {
45 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNX +
" is not found in model.");
47 fShapeX = model.GetTensorShape(fNX);
48 if (fShapeX.size() != 3) {
49 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNX +
" is not of 3 dimensions.");
51 if (!model.CheckIfTensorAlreadyExist(fNW)) {
52 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNW +
" is not found in model.");
54 fShapeW = model.GetTensorShape(fNW);
55 if (fShapeW.size() != 3) {
56 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNW +
" is not of 3 dimensions.");
58 if (!model.CheckIfTensorAlreadyExist(fNR)) {
59 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNR +
" is not found in model.");
61 fShapeR = model.GetTensorShape(fNR);
62 if (fShapeR.size() != 3) {
63 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " + fNR +
" is not of 3 dimensions.");
66 if (!model.CheckIfTensorAlreadyExist(fNB)) {
67 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNB +
" is not found in model.");
69 fShapeB = model.GetTensorShape(fNB);
70 if (fShapeB.size() != 2 && fShapeB.size() != 5) {
71 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNB +
" is not of 2 or 5 dimensions.");
73 if (fShapeB.size() == 2) {
75 auto original_data = model.GetInitializedTensorData(fNB);
76 size_t num_directions = fShapeW[0];
77 size_t seq_length = (fAttrLayout == 0)? fShapeX[0] : fShapeX[1];
78 size_t batch_size = (fAttrLayout == 0)? fShapeX[1] : fShapeX[0];
79 if (fType ==
"float") {
80 float *original_bias =
static_cast<float*
>(original_data.get());
81 float *new_bias =
new float[4 * num_directions * seq_length * batch_size * fAttrHiddenSize];
82 for (
size_t gate = 0; gate < 4; gate++) {
83 float sum[fAttrHiddenSize];
84 for (
size_t direction = 0; direction < num_directions; direction++) {
85 size_t offset = direction * 8 * fAttrHiddenSize + gate * fAttrHiddenSize;
86 for (
size_t h = 0;
h < fAttrHiddenSize;
h++) {
89 for (
size_t seq = 0; seq < seq_length; seq++) {
90 for (
size_t batch = 0; batch < batch_size; batch++) {
91 size_t bias_offset = gate * num_directions * seq_length * batch_size * fAttrHiddenSize
92 + direction * seq_length * batch_size * fAttrHiddenSize
93 + seq * batch_size * fAttrHiddenSize + batch * fAttrHiddenSize;
94 std::copy(
sum,
sum + fAttrHiddenSize, new_bias + bias_offset);
99 std::vector<size_t> new_bias_shape = {4, num_directions, seq_length, batch_size, fAttrHiddenSize};
100 std::shared_ptr<void> new_bias_ptr(new_bias, std::default_delete<
float[]>());
101 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), new_bias_shape, new_bias_ptr);
102 fShapeB = model.GetTensorShape(fNB);
106 if (!fNSequence_lens.empty()) {
107 if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) {
108 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " +
110 "is not found in model.");
112 fShapeSequence_lens = model.GetTensorShape(fNSequence_lens);
113 if (fShapeSequence_lens.size() != 1) {
114 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " +
116 " is not of 1 dimension.");
119 if (!fNInitial_h.empty()) {
120 if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) {
121 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " +
122 fNInitial_h +
" is not found in model.");
124 fShapeInitial_h = model.GetTensorShape(fNInitial_h);
125 if (fShapeInitial_h.size() != 3) {
126 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " +
127 fNInitial_h +
" is not of 3 dimensions.");
130 if (!fNInitial_c.empty()) {
131 if (!model.CheckIfTensorAlreadyExist(fNInitial_c)) {
132 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " +
133 fNInitial_c +
" is not found in model.");
135 fShapeInitial_c = model.GetTensorShape(fNInitial_c);
136 if (fShapeInitial_c.size() != 3) {
137 throw std::runtime_error(
"TMVA SOFIE LSTM Op input tensor " +
138 fNInitial_c +
" is not of 3 dimensions.");
142 if (!model.CheckIfTensorAlreadyExist(fNP)) {
143 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNP +
" is not found in model.");
145 fShapeP = model.GetTensorShape(fNP);
146 if (fShapeP.size() != 2 && fShapeP.size() != 4) {
147 throw std::runtime_error(
"TMVA SOFIE LSTM op input tensor " + fNP +
" is not of 2 or 4 dimensions.");
149 if (fShapeP.size() == 2) {
151 auto original_data = model.GetInitializedTensorData(fNP);
152 size_t num_directions = fShapeW[0];
153 size_t batch_size = (fAttrLayout == 0)? fShapeX[1] : fShapeX[0];
154 if (fType ==
"float") {
155 float *original_p =
static_cast<float*
>(original_data.get());
156 float *new_p =
new float[num_directions * 3 * batch_size * fAttrHiddenSize];
157 for (
size_t direction = 0; direction < num_directions; direction++) {
158 for (
size_t gate = 0; gate < 3; gate++) {
159 size_t p_offset = direction * 3 * fAttrHiddenSize + gate * fAttrHiddenSize;
160 for (
size_t batch = 0; batch < batch_size; batch++) {
161 size_t offset = direction * 3 * batch_size * fAttrHiddenSize
162 + gate * batch_size * fAttrHiddenSize + batch * fAttrHiddenSize;
163 std::copy(original_p + p_offset, original_p + p_offset + fAttrHiddenSize,
168 std::vector<size_t> new_p_shape = {num_directions, 3, batch_size, fAttrHiddenSize};
169 std::shared_ptr<void> new_p_ptr(new_p, std::default_delete<
float[]>());
170 model.UpdateInitializedTensor(fNP, model.GetTensorType(fNP), new_p_shape, new_p_ptr);
171 fShapeP = model.GetTensorShape(fNP);
176 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
177 if (!model.CheckIfTensorAlreadyExist(fNY)) {
178 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
181 if (!fNY_h.empty()) {
182 fShapeY_h = ShapeInference({fShapeX, fShapeW})[1];
183 if (!model.CheckIfTensorAlreadyExist(fNY_h)) {
184 model.AddIntermediateTensor(fNY_h, model.GetTensorType(fNX), fShapeY_h);
187 if (!fNY_c.empty()) {
188 fShapeY_c = ShapeInference({fShapeX, fShapeW})[2];
189 if (!model.CheckIfTensorAlreadyExist(fNY_c)) {
190 model.AddIntermediateTensor(fNY_c, model.GetTensorType(fNX), fShapeY_c);
194 for (
auto &activation : fAttrActivations) {
195 if (activation !=
"Relu" && activation !=
"Tanh" &&
196 activation !=
"Sigmoid" && activation !=
"Affine" &&
197 activation !=
"LeakyRelu" && activation !=
"ThresholdRelu" &&
198 activation !=
"ScaledTanh" && activation !=
"HardSigmoid" &&
199 activation !=
"Elu" && activation !=
"Softsign" &&
200 activation !=
"Softplus") {
201 throw std::runtime_error(
"TMVA SOFIE - Activation function " +
202 activation +
" not implemented");
205 if (fAttrDirection !=
"forward" && fAttrDirection !=
"backward" &&
206 fAttrDirection !=
"bidirectional") {
207 throw std::runtime_error(
208 "TMVA SOFIE - Invalid LSTM direction fAttrDirection = " +
211 if (4 * fAttrHiddenSize != fShapeW[1]) {
212 throw std::runtime_error(
213 "TMVA SOFIE - fAttrHiddenSize must be equal to " +
214 std::to_string(fShapeW[1] / 4));
216 if (fAttrInputForget > 1) {
217 throw std::runtime_error(
218 "TMVA SOFIE - fAttrInputForget = " + std::to_string(fAttrInputForget)
219 +
" must be 0 or 1.");
221 if (fAttrLayout > 1) {
222 throw std::runtime_error(
"TMVA SOFIE - Layout fAttrLayout = " +
223 std::to_string(fAttrLayout) +
224 " must be 0 (timewise) or 1 (batchwise)");
226 if (fAttrActivations.empty()) {
227 if (fAttrDirection ==
"bidirectional") {
228 fAttrActivations = {
"Sigmoid",
"Tanh",
"Tanh",
"Sigmoid",
"Tanh",
"Tanh"};
230 fAttrActivations = {
"Sigmoid",
"Tanh",
"Tanh"};
239 opName =
"op_" + opName;
240 std::stringstream out;
242 size_t num_directions = fShapeW[0];
243 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
244 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
245 size_t input_size = fShapeX[2];
247 if (fAttrLayout != 0) {
248 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_input = std::vector<" << fType <<
">("
249 << seq_length * batch_size * input_size <<
");\n";
250 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_initial_hidden_state = std::vector<" << fType <<
">("
251 << num_directions * batch_size * fAttrHiddenSize <<
");\n";
252 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_initial_cell_state = std::vector<" << fType <<
">("
253 << num_directions * batch_size * fAttrHiddenSize <<
");\n";
256 size_t ff_size = seq_length * batch_size * fAttrHiddenSize;
257 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_ff_input_gate = std::vector<" << fType <<
">(" << ff_size <<
");\n";
258 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_ff_output_gate = std::vector<" << fType <<
">(" << ff_size <<
");\n";
259 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_ff_cell_gate = std::vector<" << fType <<
">(" << ff_size <<
");\n";
260 if (fAttrInputForget == 0)
261 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_ff_forget_gate = std::vector<" << fType <<
">(" << ff_size <<
");\n";
263 size_t hs_size = seq_length * num_directions * batch_size * fAttrHiddenSize;
264 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_input_gate = std::vector<" << fType <<
">(" << hs_size <<
");\n";
265 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_output_gate = std::vector<" << fType <<
">(" << hs_size <<
");\n";
266 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_cell_gate = std::vector<" << fType <<
">(" << hs_size <<
");\n";
267 if (fAttrInputForget == 0)
268 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_forget_gate = std::vector<" << fType <<
">(" << hs_size <<
");\n";
270 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_cell_state = std::vector<" << fType <<
">(" << hs_size <<
");\n";
271 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_new_cell_state = std::vector<" << fType <<
">(" << hs_size <<
");\n";
273 if (fAttrLayout != 0 || fNY.empty()) {
274 out <<
"std::vector<" << fType <<
"> fVec_" << opName <<
"_hidden_state = std::vector<" << fType <<
">(" << hs_size <<
");\n";
285 OpName =
"op_" + OpName;
286 std::stringstream out;
288 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
289 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
290 size_t input_size = fShapeX[2];
291 size_t num_directions = fShapeW[0];
294 if (fAttrLayout == 0) {
295 out << SP << fType <<
" *" << OpName <<
"_input = tensor_" << fNX <<
";\n";
298 out << SP << fType <<
" * " << OpName <<
"_input = fVec_" << OpName <<
"_input.data();\n";
300 out << SP << fType <<
" " << OpName <<
"_input[" << seq_length * batch_size * input_size <<
"] = {0};\n";
302 out << SP <<
"for(size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
303 out << SP << SP <<
"for(size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
304 out << SP << SP << SP <<
"for(size_t i = 0; i < " << input_size <<
"; i++) {\n";
305 out << SP << SP << SP << SP << OpName <<
"_input[seq * " << batch_size * input_size
306 <<
" + batch * " << input_size <<
" + i] = " <<
"tensor_" << fNX <<
"[batch * "
307 << seq_length * input_size <<
" + seq * " << input_size <<
" + i];\n";
308 out << SP << SP << SP <<
"}\n";
309 out << SP << SP <<
"}\n";
314 if (!fNInitial_h.empty()) {
315 if (fAttrLayout == 0) {
316 out << SP << fType <<
" *" << OpName <<
"_initial_hidden_state = " <<
" tensor_"
317 << fNInitial_h <<
";\n";
320 out << SP << fType <<
" * " << OpName <<
"_initial_hidden_state = fVec_" << OpName
321 <<
"_initial_hidden_state.data();\n";
323 out << SP << fType <<
" " << OpName <<
"_initial_hidden_state[" << num_directions * batch_size *
324 fAttrHiddenSize <<
"] = {0};\n";
326 for (
size_t direction = 0; direction < num_directions; direction++) {
327 out << SP <<
"for(size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
328 out << SP << SP <<
"for(size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
329 out << SP << SP << SP << OpName <<
"_initial_hidden_state["
330 << direction * batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
331 <<
" + h] = tensor_" << fNInitial_h <<
"[batch * " << num_directions * fAttrHiddenSize
332 <<
" + " << direction * fAttrHiddenSize <<
" + h];\n";
333 out << SP << SP <<
"}\n";
340 if (!fNInitial_c.empty()) {
341 if (fAttrLayout == 0) {
342 out << SP << fType <<
" *" << OpName <<
"_initial_cell_state = " <<
" tensor_"
343 << fNInitial_c <<
";\n";
346 out << SP << fType <<
" * " << OpName <<
"_initial_cell_state = fVec_" << OpName
347 <<
"_initial_cell_state.data();\n";
349 out << SP << fType <<
" " << OpName <<
"_initial_cell_state[" << num_directions * batch_size *
350 fAttrHiddenSize <<
"] = {0};\n";
352 for (
size_t direction = 0; direction < num_directions; direction++) {
353 out << SP <<
"for(size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
354 out << SP << SP <<
"for(size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
355 out << SP << SP << SP << OpName <<
"_initial_cell_state["
356 << direction * batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize
357 <<
" + h] = tensor_" << fNInitial_c <<
"[batch * " << num_directions * fAttrHiddenSize
358 <<
" + " << direction * fAttrHiddenSize <<
" + h];\n";
359 out << SP << SP <<
"}\n";
366 size_t ff_size = seq_length * batch_size * fAttrHiddenSize;
368 out << SP << fType <<
" * " << OpName <<
"_ff_input_gate = fVec_" << OpName <<
"_ff_input_gate.data();\n";
369 out << SP << fType <<
" * " << OpName <<
"_ff_output_gate = fVec_" << OpName <<
"_ff_output_gate.data();\n";
370 out << SP << fType <<
" * " << OpName <<
"_ff_cell_gate = fVec_" << OpName <<
"_ff_cell_gate.data();\n";
371 if (fAttrInputForget == 0) {
372 out << SP << fType <<
" * " << OpName <<
"_ff_forget_gate = fVec_" << OpName <<
"_ff_forget_gate.data();\n";
375 out << SP << fType <<
" " << OpName <<
"_ff_input_gate[" << ff_size <<
"] = {0};\n";
376 out << SP << fType <<
" " << OpName <<
"_ff_output_gate[" << ff_size <<
"] = {0};\n";
377 out << SP << fType <<
" " << OpName <<
"_ff_cell_gate[" << ff_size <<
"] = {0};\n";
378 if (fAttrInputForget == 0) {
379 out << SP << fType <<
" " << OpName <<
"_ff_forget_gate[" << ff_size <<
"] = {0};\n";
383 size_t hidden_state_size = seq_length * num_directions * batch_size * fAttrHiddenSize;
385 out << SP << fType <<
" * " << OpName <<
"_input_gate = fVec_" << OpName <<
"_input_gate.data();\n";
386 out << SP << fType <<
" * " << OpName <<
"_output_gate = fVec_" << OpName <<
"_output_gate.data();\n";
387 out << SP << fType <<
" * " << OpName <<
"_cell_gate = fVec_" << OpName <<
"_cell_gate.data();\n";
388 if (fAttrInputForget == 0) {
389 out << SP << fType <<
" * " << OpName <<
"_forget_gate = fVec_" << OpName <<
"_forget_gate.data();\n";
392 out << SP << fType <<
" " << OpName <<
"_input_gate[" << hidden_state_size <<
"] = {0};\n";
393 out << SP << fType <<
" " << OpName <<
"_output_gate[" << hidden_state_size <<
"] = {0};\n";
394 out << SP << fType <<
" " << OpName <<
"_cell_gate[" << hidden_state_size <<
"] = {0};\n";
395 if (fAttrInputForget == 0) {
396 out << SP << fType <<
" " << OpName <<
"_forget_gate[" << hidden_state_size <<
"] = {0};\n";
401 out << SP << fType <<
" * " << OpName <<
"_cell_state = fVec_" << OpName <<
"_cell_state.data();\n";
402 out << SP << fType <<
" * " << OpName <<
"_new_cell_state = fVec_" << OpName <<
"_new_cell_state.data();\n";
404 out << SP << fType <<
" " << OpName <<
"_cell_state[" << hidden_state_size <<
"] = {0};\n";
405 out << SP << fType <<
" " << OpName <<
"_new_cell_state[" << hidden_state_size <<
"] = {0};\n";
409 if (fAttrLayout == 0 && !fNY.empty()) {
410 out << SP << fType <<
" *" << OpName <<
"_hidden_state = tensor_" << fNY <<
";\n";
413 out << SP << fType <<
" * " << OpName <<
"_hidden_state = fVec_" << OpName <<
"_hidden_state.data();\n";
415 out << SP << fType <<
" " << OpName <<
"_hidden_state[" << hidden_state_size <<
"] = {0};\n";
419 out << SP <<
"char " << OpName <<
"_transA = 'N';\n";
420 out << SP <<
"char " << OpName <<
"_transB = 'T';\n";
421 out << SP <<
"int " << OpName <<
"_m = " << seq_length * batch_size <<
";\n";
422 out << SP <<
"int " << OpName <<
"_n = " << fAttrHiddenSize <<
";\n";
423 out << SP <<
"int " << OpName <<
"_k = " << input_size <<
";\n";
424 if (fType ==
"float") {
425 out << SP << fType <<
" " << OpName <<
"_alpha = 1.;\n";
426 out << SP << fType <<
" " << OpName <<
"_beta = 0.;\n";
429 out << SP <<
"int " << OpName <<
"_bias_size = " << seq_length * batch_size * fAttrHiddenSize <<
";\n";
430 out << SP <<
"int " << OpName <<
"_incx = 1;\n";
431 out << SP <<
"int " << OpName <<
"_incy = 1;\n";
434 for (
size_t direction = 0; direction < num_directions; direction++) {
435 if (direction == 0) {
436 if (fType ==
"float") {
438 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
439 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
440 << fNW <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &" << OpName <<
"_k, &"
441 << OpName <<
"_beta, " << OpName <<
"_ff_input_gate, &" << OpName <<
"_n);\n";
443 size_t wo_offset = fAttrHiddenSize * input_size;
444 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
445 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
446 << fNW <<
" + " << wo_offset <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &"
447 << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName <<
"_ff_output_gate, &" << OpName <<
"_n);\n";
449 size_t wc_offset = 3 * fAttrHiddenSize * input_size;
450 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
451 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
452 << fNW <<
" + " << wc_offset <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &"
453 << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName <<
"_ff_cell_gate, &" << OpName <<
"_n);\n";
456 if (fType ==
"float") {
458 size_t wi_offset = 4 * fAttrHiddenSize * input_size;
459 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
460 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
461 << fNW <<
" + " << wi_offset <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &"
462 << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName <<
"_ff_input_gate, &" << OpName <<
"_n);\n";
464 size_t wo_offset = 4 * fAttrHiddenSize * input_size + 1 * fAttrHiddenSize * input_size;
465 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
466 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
467 << fNW <<
" + " << wo_offset <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &"
468 << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName <<
"_ff_output_gate, &" << OpName <<
"_n);\n";
470 size_t wc_offset = 4 * fAttrHiddenSize * input_size + 3 * fAttrHiddenSize * input_size;
471 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
472 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
473 << fNW <<
" + " << wc_offset <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &"
474 << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName <<
"_ff_cell_gate, &" << OpName <<
"_n);\n";
477 if (fAttrInputForget == 0) {
479 if (direction == 0) {
480 if (fType ==
"float") {
481 size_t wf_offset = 2 * fAttrHiddenSize * input_size;
482 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
483 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
484 << fNW <<
" + " << wf_offset <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &"
485 << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName <<
"_ff_forget_gate, &" << OpName <<
"_n);\n";
488 if (fType ==
"float") {
489 size_t wf_offset = 4 * fAttrHiddenSize * input_size + 2 * fAttrHiddenSize * input_size;
490 out << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
491 << OpName <<
"_n, &" << OpName <<
"_m, &" << OpName <<
"_k, &" << OpName <<
"_alpha, tensor_"
492 << fNW <<
" + " << wf_offset <<
", &" << OpName <<
"_k, " << OpName <<
"_input, &"
493 << OpName <<
"_k, &" << OpName <<
"_beta, " << OpName <<
"_ff_forget_gate, &" << OpName <<
"_n);\n";
500 if (direction == 0) {
501 if (fType ==
"float") {
503 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
504 << fNB <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_input_gate, &" << OpName <<
"_incy);\n";
506 size_t bo_offset = seq_length * batch_size * fAttrHiddenSize;
507 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
508 << fNB <<
" + " << bo_offset <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_output_gate, &"
509 << OpName <<
"_incy);\n";
511 size_t bc_offset = 3 * seq_length * batch_size * fAttrHiddenSize;
512 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
513 << fNB <<
" + " << bc_offset <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_cell_gate, &"
514 << OpName <<
"_incy);\n";
517 if (fType ==
"float") {
519 size_t bi_offset = 4 * seq_length * batch_size * fAttrHiddenSize;
520 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
521 << fNB <<
" + " << bi_offset <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_input_gate, &"
522 << OpName <<
"_incy);\n";
524 size_t bo_offset = 4 * seq_length * batch_size * fAttrHiddenSize
525 + seq_length * batch_size * fAttrHiddenSize;
526 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
527 << fNB <<
" + " << bo_offset <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_output_gate, &"
528 << OpName <<
"_incy);\n";
530 size_t bc_offset = 4 * num_directions * seq_length * batch_size * fAttrHiddenSize
531 + 3 * seq_length * batch_size * fAttrHiddenSize;
532 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
533 << fNB <<
" + " << bc_offset <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_cell_gate, &"
534 << OpName <<
"_incy);\n";
537 if (fAttrInputForget == 0) {
539 if (direction == 0) {
540 if (fType ==
"float") {
541 size_t bo_offset = 2 * seq_length * batch_size * fAttrHiddenSize;
542 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
543 << fNB <<
" + " << bo_offset <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_forget_gate, &"
544 << OpName <<
"_incy);\n";
547 if (fType ==
"float") {
548 size_t bo_offset = 4 * seq_length * batch_size * fAttrHiddenSize
549 + 2 * seq_length * batch_size * fAttrHiddenSize;
550 out << SP <<
"BLAS::saxpy_(&" << OpName <<
"_bias_size, &" << OpName <<
"_alpha, tensor_"
551 << fNB <<
" + " << bo_offset <<
", &" << OpName <<
"_incx, " << OpName <<
"_ff_forget_gate, &"
552 << OpName <<
"_incy);\n";
560 out << SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
561 out << SP << SP <<
"size_t ff_offset = seq * " << batch_size * fAttrHiddenSize <<
";\n";
562 if (direction == 0) {
563 out << SP << SP <<
"size_t gate_offset = seq * " << num_directions * batch_size * fAttrHiddenSize
566 out << SP << SP <<
"size_t gate_offset = seq * " << num_directions * batch_size * fAttrHiddenSize
567 <<
" + " << batch_size * fAttrHiddenSize <<
";\n";
569 size_t ff_seq_size = batch_size * fAttrHiddenSize;
570 out << SP << SP <<
"std::copy(" << OpName <<
"_ff_input_gate + ff_offset, " << OpName
571 <<
"_ff_input_gate + ff_offset + " << ff_seq_size <<
", " << OpName <<
"_input_gate + gate_offset);\n";
572 out << SP << SP <<
"std::copy(" << OpName <<
"_ff_output_gate + ff_offset, " << OpName
573 <<
"_ff_output_gate + ff_offset + " << ff_seq_size <<
", " << OpName <<
"_output_gate + gate_offset);\n";
574 out << SP << SP <<
"std::copy(" << OpName <<
"_ff_cell_gate + ff_offset, " << OpName
575 <<
"_ff_cell_gate + ff_offset + " << ff_seq_size <<
", " << OpName <<
"_cell_gate + gate_offset);\n";
576 if (fAttrInputForget == 0) {
577 out << SP << SP <<
"std::copy(" << OpName <<
"_ff_forget_gate + ff_offset, " << OpName
578 <<
"_ff_forget_gate + ff_offset + " << ff_seq_size <<
", " << OpName <<
"_forget_gate + gate_offset);\n";
582 out << SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
583 if (fAttrDirection ==
"backward" || direction == 1) {
584 out << SP << SP <<
"size_t index = " << seq_length - 1 <<
" - seq;\n";
586 out << SP << SP <<
"size_t index = seq;\n";
588 out << SP << SP <<
"int m2 = " << batch_size <<
";\n";
589 if (direction == 0) {
590 out << SP << SP <<
"size_t offset = index * " << num_directions * batch_size * fAttrHiddenSize
593 out << SP << SP <<
"size_t offset = index * " << num_directions * batch_size * fAttrHiddenSize
594 <<
" + " << batch_size * fAttrHiddenSize <<
";\n";
596 size_t size = batch_size * fAttrHiddenSize;
598 out << SP << SP <<
"if (seq == 0) {\n";
599 if (!fNInitial_h.empty()) {
600 if (direction == 0) {
601 if (fType ==
"float") {
602 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
603 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
", &"
604 << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName <<
"_n, &" << OpName
605 <<
"_alpha, " << OpName <<
"_input_gate + offset, &" << OpName <<
"_n);\n";
606 size_t ro_offset = fAttrHiddenSize * fAttrHiddenSize;
607 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
608 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
609 << ro_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName
610 <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_output_gate + offset, &" << OpName <<
"_n);\n";
611 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
612 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
613 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
614 << rc_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName
615 <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_cell_gate + offset, &" << OpName <<
"_n);\n";
616 if (fAttrInputForget == 0) {
617 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
618 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
619 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
620 << rf_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName
621 <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_forget_gate + offset, &" << OpName <<
"_n);\n";
625 if (fType ==
"float") {
626 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
627 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
628 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
629 << ri_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName
630 <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_input_gate + offset, &" << OpName <<
"_n);\n";
631 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 1 * fAttrHiddenSize * fAttrHiddenSize;
632 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
633 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
634 << ro_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName
635 <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_output_gate + offset, &" << OpName <<
"_n);\n";
636 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
637 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
638 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
639 << rc_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName
640 <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_cell_gate + offset, &" << OpName <<
"_n);\n";
641 if (fAttrInputForget == 0) {
642 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
643 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
644 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
645 << rf_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_initial_hidden_state, &" << OpName
646 <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_forget_gate + offset, &" << OpName <<
"_n);\n";
651 out << SP << SP <<
"} else {\n";
653 if (direction == 0) {
654 if (fAttrDirection ==
"backward") {
655 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
656 << num_directions * batch_size * fAttrHiddenSize <<
";\n";
658 out << SP << SP << SP <<
"size_t previous_offset = (seq - 1) * "
659 << num_directions * batch_size * fAttrHiddenSize <<
";\n";
661 if (fType ==
"float") {
662 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
663 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
", &"
664 << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &" << OpName <<
"_n, &"
665 << OpName <<
"_alpha, " << OpName <<
"_input_gate + offset, &" << OpName <<
"_n);\n";
666 size_t ro_offset = 1 * fAttrHiddenSize * fAttrHiddenSize;
667 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
668 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
669 << ro_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &"
670 << OpName <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_output_gate + offset, &"
671 << OpName <<
"_n);\n";
672 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
673 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
674 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
675 << rc_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &"
676 << OpName <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_cell_gate + offset, &"
677 << OpName <<
"_n);\n";
678 if (fAttrInputForget == 0) {
679 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
680 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
681 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
682 << rf_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &"
683 << OpName <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_forget_gate + offset, &"
684 << OpName <<
"_n);\n";
688 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
689 << num_directions * batch_size * fAttrHiddenSize <<
" + " << batch_size * fAttrHiddenSize <<
";\n";
690 if (fType ==
"float") {
691 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
692 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
693 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
694 << ri_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &"
695 << OpName <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_input_gate + offset, &"
696 << OpName <<
"_n);\n";
697 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + fAttrHiddenSize * fAttrHiddenSize;
698 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
699 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
700 << ro_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &"
701 << OpName <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_output_gate + offset, &"
702 << OpName <<
"_n);\n";
703 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
704 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
705 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
706 << rc_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &"
707 << OpName <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_cell_gate + offset, &"
708 << OpName <<
"_n);\n";
709 if (fAttrInputForget == 0) {
710 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
711 out << SP << SP << SP <<
"BLAS::sgemm_(&" << OpName <<
"_transB, &" << OpName <<
"_transA, &"
712 << OpName <<
"_n, &m2, &" << OpName <<
"_n, &" << OpName <<
"_alpha, tensor_" << fNR <<
" + "
713 << rf_offset <<
", &" << OpName <<
"_n, " << OpName <<
"_hidden_state + previous_offset, &"
714 << OpName <<
"_n, &" << OpName <<
"_alpha, " << OpName <<
"_forget_gate + offset, &"
715 << OpName <<
"_n);\n";
719 out << SP << SP <<
"}\n";
722 if (fAttrClip > .0) {
723 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
724 if (fType ==
"float") {
725 out << SP << SP << SP <<
"float x = (" << OpName <<
"_cell_gate[i] > " << -fAttrClip <<
") ? "
726 << OpName <<
"_cell_gate[i] : " << -fAttrClip <<
";\n";
728 out << SP << SP << SP << OpName <<
"_cell_gate[i] = (x < " << fAttrClip <<
") ? x : "
729 << fAttrClip <<
";\n";
730 out << SP << SP <<
"}\n";
733 if (fAttrActivations[direction * 3 + 1] ==
"Relu") {
734 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
735 out << SP << SP << SP <<
"if (" << OpName <<
"_cell_gate[i] < 0.)\n";
736 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = 0.;\n";
737 out << SP << SP <<
"}\n";
738 }
else if (fAttrActivations[direction * 3 + 1] ==
"Tanh") {
739 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
740 if (fType ==
"float") {
741 out << SP << SP << SP <<
"float ex = exp(-2 * " << OpName <<
"_cell_gate[i]);\n";
743 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = (1. - ex) / (1. + ex);\n";
744 out << SP << SP <<
"}\n";
745 }
else if (fAttrActivations[direction * 3 + 1] ==
"Sigmoid") {
746 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
747 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = 1. / (1. + exp(-" << OpName
748 <<
"_cell_gate[i]));\n";
749 out << SP << SP <<
"}\n";
750 }
else if (fAttrActivations[direction * 3 + 1] ==
"Affine") {
751 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
752 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = "
753 << fAttrActivationAlpha[direction * 3 + 1] <<
" * " << OpName <<
"_cell_gate[i] + "
754 << fAttrActivationBeta[direction * 3 + 1] <<
";\n";
755 out << SP << SP <<
"}\n";
756 }
else if (fAttrActivations[direction * 3 + 1] ==
"ScaledTanh") {
757 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
758 if (fType ==
"float") {
759 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[direction * 3 + 1]
760 <<
" * "<< OpName <<
"_cell_gate[i]);\n";
762 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = "
763 << fAttrActivationAlpha[direction * 3 + 1] <<
" * (1. - ex) / (1. + ex);\n";
764 out << SP << SP <<
"}\n";
765 }
else if (fAttrActivations[direction * 3 + 1] ==
"HardSigmoid") {
766 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
767 if (fType ==
"float") {
768 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[direction * 3 + 1] <<
" * "
769 << OpName <<
"_cell_gate[i] + " << fAttrActivationBeta[direction * 3 + 1] <<
";\n";
770 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
772 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = (b < 1.) ? b : 1.;\n";
773 out << SP << SP <<
"}\n";
774 }
else if (fAttrActivations[direction * 3 + 1] ==
"LeakyRelu") {
775 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
776 out << SP << SP << SP <<
"if (" << OpName <<
"_cell_gate[i] < 0.)\n";
777 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = "
778 << fAttrActivationAlpha[direction * 3 + 1] <<
" * " << OpName <<
"_cell_gate[i];\n";
779 out << SP << SP <<
"}\n";
780 }
else if (fAttrActivations[direction * 3 + 1] ==
"ThresholdRelu") {
781 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
782 out << SP << SP << SP <<
"if (" << OpName <<
"_cell_gate[i] < "
783 << fAttrActivationAlpha[direction * 3 + 1] <<
")\n";
784 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = 0.;\n";
785 out << SP << SP <<
"}";
786 }
else if (fAttrActivations[direction * 3 + 1] ==
"Elu") {
787 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
788 out << SP << SP << SP <<
"if (" << OpName <<
"_cell_gate[i] < 0.)\n";
789 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = "
790 << fAttrActivationAlpha[direction * 3 + 1] <<
" * exp(" << OpName <<
"_cell_gate[i] - 1.);\n";
791 out << SP << SP <<
"}\n";
792 }
else if (fAttrActivations[direction * 3 + 1] ==
"Softsign") {
793 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
794 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = " << OpName
795 <<
"_cell_gate[i] / (1. + abs(" << OpName <<
"_cell_gate[i]));\n";
796 out << SP << SP <<
"}\n";
798 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
799 out << SP << SP << SP << SP << OpName <<
"_cell_gate[i] = log(1. + exp("
800 << OpName <<
"_cell_gate[i]));\n";
801 out << SP << SP <<
"}\n";
807 out << SP << SP <<
"if (seq == 0) {\n";
808 if (!fNInitial_c.empty()) {
809 if (direction == 0) {
810 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
811 out << SP << SP << SP << SP << OpName <<
"_input_gate[i + offset] += tensor_" << fNP
812 <<
"[i] * " << OpName <<
"_initial_cell_state[i];\n";
813 out << SP << SP << SP <<
"}\n";
814 if (fAttrInputForget == 0) {
815 size_t pf_offset = batch_size * fAttrHiddenSize;
816 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
817 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i + offset] += tensor_" << fNP
818 <<
"[i + " << pf_offset <<
"] * " << OpName <<
"_initial_cell_state[i];\n";
819 out << SP << SP << SP <<
"}\n";
822 size_t pi_offset = 3 * batch_size * fAttrHiddenSize;
823 size_t initial_c_offset = batch_size * fAttrHiddenSize;
824 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
825 out << SP << SP << SP << SP << OpName <<
"_input_gate[i + offset] += tensor_" << fNP
826 <<
"[i + " << pi_offset <<
"] * " << OpName <<
"_initial_cell_state[i + " << initial_c_offset
828 out << SP << SP << SP <<
"}\n";
829 if (fAttrInputForget == 0) {
830 size_t pf_offset = 3 * batch_size * fAttrHiddenSize + batch_size * fAttrHiddenSize;
831 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
832 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i + offset] += tensor_" << fNP
833 <<
"[i + " << pf_offset <<
"] * " << OpName <<
"_initial_cell_state[i + " << initial_c_offset
835 out << SP << SP << SP <<
"}\n";
839 out << SP << SP <<
"} else {\n";
840 if (direction == 0) {
841 if (fAttrDirection ==
"backward") {
842 out << SP << SP << SP <<
"size_t c_offset = (index + 1) * "
843 << num_directions * batch_size * fAttrHiddenSize <<
";\n";
845 out << SP << SP << SP <<
"size_t c_offset = (seq - 1) * "
846 << num_directions * batch_size * fAttrHiddenSize <<
";\n";
848 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
849 out << SP << SP << SP << SP << OpName <<
"_input_gate[i + offset] += tensor_" << fNP
850 <<
"[i] * " << OpName <<
"_cell_state[i + c_offset];\n";
851 out << SP << SP << SP <<
"}\n";
852 if (fAttrInputForget == 0) {
853 size_t pf_offset = batch_size * fAttrHiddenSize;
854 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
855 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i + offset] += tensor_" << fNP
856 <<
"[i + " << pf_offset <<
"] * " << OpName <<
"_cell_state[i + c_offset];\n";
857 out << SP << SP << SP <<
"}\n";
860 size_t pi_offset = 3 * batch_size * fAttrHiddenSize;
861 out << SP << SP << SP <<
"size_t c_offset = (index + 1) * "
862 << num_directions * batch_size * fAttrHiddenSize <<
" + " << batch_size * fAttrHiddenSize <<
";\n";
863 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
864 out << SP << SP << SP << SP << OpName <<
"_input_gate[i + offset] += tensor_" << fNP
865 <<
"[i + " << pi_offset <<
"] * " << OpName <<
"_cell_state[i + c_offset];\n";
866 out << SP << SP << SP <<
"}\n";
867 if (fAttrInputForget == 0) {
868 size_t pf_offset = 3 * batch_size * fAttrHiddenSize + batch_size * fAttrHiddenSize;
869 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
870 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i + offset] += tensor_" << fNP
871 <<
"[i + " << pf_offset <<
"] * " << OpName <<
"_cell_state[i + c_offset];\n";
872 out << SP << SP << SP <<
"}\n";
875 out << SP << SP <<
"}\n";
879 if (fAttrClip > .0) {
880 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
881 if (fType ==
"float") {
882 out << SP << SP << SP <<
"float x = (" << OpName <<
"_input_gate[i] > " << -fAttrClip <<
") ? "
883 << OpName <<
"_input_gate[i] : " << -fAttrClip <<
";\n";
885 out << SP << SP << SP << OpName <<
"_input_gate[i] = (x < " << fAttrClip <<
") ? x : "
886 << fAttrClip <<
";\n";
887 out << SP << SP <<
"}\n";
890 if (fAttrActivations[direction * 3] ==
"Relu") {
891 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
892 out << SP << SP << SP <<
"if (" << OpName <<
"_input_gate[i] < 0.)\n";
893 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = 0.;\n";
894 out << SP << SP <<
"}\n";
895 }
else if (fAttrActivations[direction * 3] ==
"Tanh") {
896 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
897 if (fType ==
"float") {
898 out << SP << SP << SP <<
"float ex = exp(-2 * " << OpName <<
"_input_gate[i]);\n";
900 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = (1. - ex) / (1. + ex);\n";
901 out << SP << SP <<
"}\n";
902 }
else if (fAttrActivations[direction * 3] ==
"Sigmoid") {
903 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
904 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = 1. / (1. + exp(-" << OpName
905 <<
"_input_gate[i]));\n";
906 out << SP << SP <<
"}\n";
907 }
else if (fAttrActivations[direction * 3] ==
"Affine") {
908 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
909 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = "
910 << fAttrActivationAlpha[direction * 3] <<
" * " << OpName <<
"_input_gate[i] + "
911 << fAttrActivationBeta[direction * 3] <<
";\n";
912 out << SP << SP <<
"}\n";
913 }
else if (fAttrActivations[direction * 3] ==
"ScaledTanh") {
914 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
915 if (fType ==
"float") {
916 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[direction * 3]
917 <<
" * "<< OpName <<
"_input_gate[i]);\n";
919 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = "
920 << fAttrActivationAlpha[direction * 3] <<
" * (1. - ex) / (1. + ex);\n";
921 out << SP << SP <<
"}\n";
922 }
else if (fAttrActivations[direction * 3] ==
"HardSigmoid") {
923 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
924 if (fType ==
"float") {
925 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[direction * 3] <<
" * "
926 << OpName <<
"_input_gate[i] + " << fAttrActivationBeta[direction * 3] <<
";\n";
927 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
929 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = (b < 1.) ? b : 1.;\n";
930 out << SP << SP <<
"}\n";
931 }
else if (fAttrActivations[direction * 3] ==
"LeakyRelu") {
932 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
933 out << SP << SP << SP <<
"if (" << OpName <<
"_input_gate[i] < 0.)\n";
934 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = "
935 << fAttrActivationAlpha[direction * 3] <<
" * " << OpName <<
"_input_gate[i];\n";
936 out << SP << SP <<
"}\n";
937 }
else if (fAttrActivations[direction * 3] ==
"ThresholdRelu") {
938 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
939 out << SP << SP << SP <<
"if (" << OpName <<
"_input_gate[i] < "
940 << fAttrActivationAlpha[direction * 3] <<
")\n";
941 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = 0.;\n";
942 out << SP << SP <<
"}";
943 }
else if (fAttrActivations[direction * 3] ==
"Elu") {
944 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
945 out << SP << SP << SP <<
"if (" << OpName <<
"_input_gate[i] < 0.)\n";
946 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = "
947 << fAttrActivationAlpha[direction * 3] <<
" * exp(" << OpName <<
"_input_gate[i] - 1.);\n";
948 out << SP << SP <<
"}\n";
949 }
else if (fAttrActivations[direction * 3] ==
"Softsign") {
950 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
951 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = " << OpName
952 <<
"_input_gate[i] / (1. + abs(" << OpName <<
"_input_gate[i]));\n";
953 out << SP << SP <<
"}\n";
955 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
956 out << SP << SP << SP << SP << OpName <<
"_input_gate[i] = log(1. + exp("
957 << OpName <<
"_input_gate[i]));\n";
958 out << SP << SP <<
"}\n";
961 if (fAttrInputForget == 0) {
963 if (fAttrClip > .0) {
964 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
965 if (fType ==
"float") {
966 out << SP << SP << SP <<
"float x = (" << OpName <<
"_forget_gate[i] > "
967 << -fAttrClip <<
") ? " << OpName <<
"_forget_gate[i] : " << -fAttrClip <<
";\n";
969 out << SP << SP << SP << OpName <<
"_forget_gate[i] = (x < " << fAttrClip
970 <<
") ? x : " << fAttrClip <<
";\n";
971 out << SP << SP <<
"}\n";
974 if (fAttrActivations[direction * 3] ==
"Relu") {
975 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
976 out << SP << SP << SP <<
"if (" << OpName <<
"_forget_gate[i] < 0.)\n";
977 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = 0.;\n";
978 out << SP << SP <<
"}\n";
979 }
else if (fAttrActivations[direction * 3] ==
"Tanh") {
980 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
981 if (fType ==
"float") {
982 out << SP << SP << SP <<
"float ex = exp(-2 * " << OpName <<
"_forget_gate[i]);\n";
984 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = (1. - ex) / (1. + ex);\n";
985 out << SP << SP <<
"}\n";
986 }
else if (fAttrActivations[direction * 3] ==
"Sigmoid") {
987 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
988 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = 1. / (1. + exp(-"
989 << OpName <<
"_forget_gate[i]));\n";
990 out << SP << SP <<
"}\n";
991 }
else if (fAttrActivations[direction * 3] ==
"Affine") {
992 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
993 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = "
994 << fAttrActivationAlpha[direction * 3] <<
" * " << OpName <<
"_forget_gate[i] + "
995 << fAttrActivationBeta[direction * 3] <<
";\n";
996 out << SP << SP <<
"}\n";
997 }
else if (fAttrActivations[direction * 3] ==
"ScaledTanh") {
998 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
999 if (fType ==
"float") {
1000 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[direction * 3]
1001 <<
" * "<< OpName <<
"_forget_gate[i]);\n";
1003 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = "
1004 << fAttrActivationAlpha[direction * 3] <<
" * (1. - ex) / (1. + ex);\n";
1005 out << SP << SP <<
"}\n";
1006 }
else if (fAttrActivations[direction * 3] ==
"HardSigmoid") {
1007 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1008 if (fType ==
"float") {
1009 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[direction * 3] <<
" * "
1010 << OpName <<
"_forget_gate[i] + " << fAttrActivationBeta[direction * 3] <<
";\n";
1011 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1013 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = (b < 1.) ? b : 1.;\n";
1014 out << SP << SP <<
"}\n";
1015 }
else if (fAttrActivations[direction * 3] ==
"LeakyRelu") {
1016 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1017 out << SP << SP << SP <<
"if (" << OpName <<
"_forget_gate[i] < 0.)\n";
1018 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = "
1019 << fAttrActivationAlpha[direction * 3] <<
" * " << OpName <<
"_forget_gate[i];\n";
1020 out << SP << SP <<
"}\n";
1021 }
else if (fAttrActivations[direction * 3] ==
"ThresholdRelu") {
1022 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1023 out << SP << SP << SP <<
"if (" << OpName <<
"_forget_gate[i] < "
1024 << fAttrActivationAlpha[direction * 3] <<
")\n";
1025 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = 0.;\n";
1026 out << SP << SP <<
"}";
1027 }
else if (fAttrActivations[direction * 3] ==
"Elu") {
1028 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1029 out << SP << SP << SP <<
"if (" << OpName <<
"_forget_gate[i] < 0.)\n";
1030 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = "
1031 << fAttrActivationAlpha[direction * 3] <<
" * exp(" << OpName <<
"_forget_gate[i] - 1.);\n";
1032 out << SP << SP <<
"}\n";
1033 }
else if (fAttrActivations[direction * 3] ==
"Softsign") {
1034 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1035 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = " << OpName
1036 <<
"_forget_gate[i] / (1. + abs(" << OpName <<
"_forget_gate[i]));\n";
1037 out << SP << SP <<
"}\n";
1039 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1040 out << SP << SP << SP << SP << OpName <<
"_forget_gate[i] = log(1. + exp("
1041 << OpName <<
"_forget_gate[i]));\n";
1042 out << SP << SP <<
"}\n";
1047 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1048 out << SP << SP << SP << OpName <<
"_cell_state[i] = " << OpName <<
"_input_gate[i] * "
1049 << OpName <<
"_cell_gate[i];\n";
1050 out << SP << SP <<
"}\n";
1052 if (fAttrInputForget == 0) {
1053 out << SP << SP <<
"if (seq == 0) {\n";
1054 if (!fNInitial_c.empty()) {
1056 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1057 out << SP << SP << SP << SP << OpName <<
"_cell_state[i + offset] += "
1058 << OpName <<
"_forget_gate[i + offset] * " << OpName <<
"_initial_cell_state[i];\n";
1059 out << SP << SP << SP <<
"}\n";
1061 out << SP << SP <<
"} else {\n";
1063 if (direction == 0) {
1064 if (fAttrDirection ==
"backward") {
1065 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
1066 << num_directions * batch_size * fAttrHiddenSize <<
";\n";
1068 out << SP << SP << SP <<
"size_t previous_offset = (seq - 1) * "
1069 << num_directions * batch_size * fAttrHiddenSize <<
";\n";
1072 out << SP << SP << SP <<
"size_t previous_offset = (index + 1) * "
1073 << num_directions * batch_size * fAttrHiddenSize <<
" + " << batch_size * fAttrHiddenSize <<
";\n";
1075 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1076 out << SP << SP << SP << SP << OpName <<
"_cell_state[i + offset] += "
1077 << OpName <<
"_forget_gate[i + offset] * " << OpName <<
"_cell_state[i + previous_offset];\n";
1078 out << SP << SP << SP <<
"}\n";
1079 out << SP << SP <<
"}\n";
1084 if (direction == 0) {
1085 size_t p_offset = 2 * batch_size * fAttrHiddenSize;
1086 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1087 out << SP << SP << SP << SP << OpName <<
"_output_gate[i + offset] += tensor_"
1088 << fNP <<
"[i + " << p_offset <<
"] * " << OpName <<
"_cell_state[i + offset];\n";
1089 out << SP << SP << SP <<
"}\n";
1091 size_t p_offset = 3 * batch_size * fAttrHiddenSize + 2 * batch_size * fAttrHiddenSize;
1092 out << SP << SP << SP <<
"for (size_t i = 0; i < " <<
size <<
"; i++) {\n";
1093 out << SP << SP << SP << SP << OpName <<
"_output_gate[i + offset] += tensor_"
1094 << fNP <<
"[i + " << p_offset <<
"] * " << OpName <<
"_cell_state[i + offset];\n";
1095 out << SP << SP << SP <<
"}\n";
1100 if (fAttrClip > .0) {
1101 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1102 if (fType ==
"float") {
1103 out << SP << SP << SP <<
"float x = (" << OpName <<
"_output_gate[i] > " << -fAttrClip
1104 <<
") ? " << OpName <<
"_output_gate[i] : " << -fAttrClip <<
";\n";
1106 out << SP << SP << SP << OpName <<
"_output_gate[i] = (x < " << fAttrClip <<
") ? x : "
1107 << fAttrClip <<
";\n";
1108 out << SP << SP <<
"}\n";
1111 if (fAttrActivations[direction * 3] ==
"Relu") {
1112 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1113 out << SP << SP << SP <<
"if (" << OpName <<
"_output_gate[i] < 0.)\n";
1114 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = 0.;\n";
1115 out << SP << SP <<
"}\n";
1116 }
else if (fAttrActivations[direction * 3] ==
"Tanh") {
1117 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1118 if (fType ==
"float") {
1119 out << SP << SP << SP <<
"float ex = exp(-2 * " << OpName <<
"_output_gate[i]);\n";
1121 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = (1. - ex) / (1. + ex);\n";
1122 out << SP << SP <<
"}\n";
1123 }
else if (fAttrActivations[direction * 3] ==
"Sigmoid") {
1124 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1125 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = 1. / (1. + exp(-" << OpName
1126 <<
"_output_gate[i]));\n";
1127 out << SP << SP <<
"}\n";
1128 }
else if (fAttrActivations[direction * 3] ==
"Affine") {
1129 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1130 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = "
1131 << fAttrActivationAlpha[direction * 3] <<
" * " << OpName <<
"_output_gate[i] + "
1132 << fAttrActivationBeta[direction * 3] <<
";\n";
1133 out << SP << SP <<
"}\n";
1134 }
else if (fAttrActivations[direction * 3] ==
"ScaledTanh") {
1135 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1136 if (fType ==
"float") {
1137 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[direction * 3]
1138 <<
" * "<< OpName <<
"_output_gate[i]);\n";
1140 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = "
1141 << fAttrActivationAlpha[direction * 3] <<
" * (1. - ex) / (1. + ex);\n";
1142 out << SP << SP <<
"}\n";
1143 }
else if (fAttrActivations[direction * 3] ==
"HardSigmoid") {
1144 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1145 if (fType ==
"float") {
1146 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[direction * 3] <<
" * "
1147 << OpName <<
"_output_gate[i] + " << fAttrActivationBeta[direction * 3] <<
";\n";
1148 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1150 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = (b < 1.) ? b : 1.;\n";
1151 out << SP << SP <<
"}\n";
1152 }
else if (fAttrActivations[direction * 3] ==
"LeakyRelu") {
1153 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1154 out << SP << SP << SP <<
"if (" << OpName <<
"_output_gate[i] < 0.)\n";
1155 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = "
1156 << fAttrActivationAlpha[direction * 3] <<
" * " << OpName <<
"_output_gate[i];\n";
1157 out << SP << SP <<
"}\n";
1158 }
else if (fAttrActivations[direction * 3] ==
"ThresholdRelu") {
1159 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1160 out << SP << SP << SP <<
"if (" << OpName <<
"_output_gate[i] < "
1161 << fAttrActivationAlpha[direction * 3] <<
")\n";
1162 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = 0.;\n";
1163 out << SP << SP <<
"}";
1164 }
else if (fAttrActivations[direction * 3] ==
"Elu") {
1165 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1166 out << SP << SP << SP <<
"if (" << OpName <<
"_output_gate[i] < 0.)\n";
1167 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = "
1168 << fAttrActivationAlpha[direction * 3] <<
" * exp(" << OpName <<
"_output_gate[i] - 1.);\n";
1169 out << SP << SP <<
"}\n";
1170 }
else if (fAttrActivations[direction * 3] ==
"Softsign") {
1171 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1172 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = " << OpName
1173 <<
"_output_gate[i] / (1. + abs(" << OpName <<
"_output_gate[i]));\n";
1174 out << SP << SP <<
"}\n";
1176 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1177 out << SP << SP << SP << SP << OpName <<
"_output_gate[i] = log(1. + exp("
1178 << OpName <<
"_output_gate[i]));\n";
1179 out << SP << SP <<
"}\n";
1183 out << SP << SP <<
"std::copy(" << OpName <<
"_cell_state + offset, " << OpName
1184 <<
"_cell_state + offset + " <<
size <<
", "<< OpName <<
"_new_cell_state + offset);\n";
1186 if (fAttrClip > .0) {
1187 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1188 if (fType ==
"float") {
1189 out << SP << SP << SP <<
"float x = (" << OpName <<
"_new_cell_state[i] > " << -fAttrClip
1190 <<
") ? " << OpName <<
"_new_cell_state[i] : " << -fAttrClip <<
";\n";
1192 out << SP << SP << SP << OpName <<
"_new_cell_state[i] = (x < " << fAttrClip <<
") ? x : "
1193 << fAttrClip <<
";\n";
1194 out << SP << SP <<
"}\n";
1197 if (fAttrActivations[direction * 3 + 2] ==
"Relu") {
1198 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1199 out << SP << SP << SP <<
"if (" << OpName <<
"_new_cell_state[i] < 0.)\n";
1200 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = 0.;\n";
1201 out << SP << SP <<
"}\n";
1202 }
else if (fAttrActivations[direction * 3 + 2] ==
"Tanh") {
1203 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1204 if (fType ==
"float") {
1205 out << SP << SP << SP <<
"float ex = exp(-2 * " << OpName <<
"_new_cell_state[i]);\n";
1207 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = (1. - ex) / (1. + ex);\n";
1208 out << SP << SP <<
"}\n";
1209 }
else if (fAttrActivations[direction * 3 + 2] ==
"Sigmoid") {
1210 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1211 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = 1. / (1. + exp(-" << OpName
1212 <<
"_new_cell_state[i]));\n";
1213 out << SP << SP <<
"}\n";
1214 }
else if (fAttrActivations[direction * 3 + 2] ==
"Affine") {
1215 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1216 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = "
1217 << fAttrActivationAlpha[direction * 3 + 2] <<
" * " << OpName <<
"_new_cell_state[i] + "
1218 << fAttrActivationBeta[direction * 3 + 2] <<
";\n";
1219 out << SP << SP <<
"}\n";
1220 }
else if (fAttrActivations[direction * 3 + 2] ==
"ScaledTanh") {
1221 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1222 if (fType ==
"float") {
1223 out << SP << SP << SP <<
"float ex = exp(-2 * " << fAttrActivationBeta[direction * 3 + 2]
1224 <<
" * "<< OpName <<
"_new_cell_state[i]);\n";
1226 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = "
1227 << fAttrActivationAlpha[direction * 3 + 2] <<
" * (1. - ex) / (1. + ex);\n";
1228 out << SP << SP <<
"}\n";
1229 }
else if (fAttrActivations[direction * 3 + 2] ==
"HardSigmoid") {
1230 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1231 if (fType ==
"float") {
1232 out << SP << SP << SP <<
"float a = " << fAttrActivationAlpha[direction * 3 + 2] <<
" * "
1233 << OpName <<
"_new_cell_state[i] + " << fAttrActivationBeta[direction * 3 + 2] <<
";\n";
1234 out << SP << SP << SP <<
"float b = (a > 0.) ? a : 0.;\n";
1236 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = (b < 1.) ? b : 1.;\n";
1237 out << SP << SP <<
"}\n";
1238 }
else if (fAttrActivations[direction * 3 + 2] ==
"LeakyRelu") {
1239 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1240 out << SP << SP << SP <<
"if (" << OpName <<
"_new_cell_state[i] < 0.)\n";
1241 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = "
1242 << fAttrActivationAlpha[direction * 3 + 2] <<
" * " << OpName <<
"_new_cell_state[i];\n";
1243 out << SP << SP <<
"}\n";
1244 }
else if (fAttrActivations[direction * 3 + 2] ==
"ThresholdRelu") {
1245 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1246 out << SP << SP << SP <<
"if (" << OpName <<
"_new_cell_state[i] < "
1247 << fAttrActivationAlpha[direction * 3 + 2] <<
")\n";
1248 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = 0.;\n";
1249 out << SP << SP <<
"}";
1250 }
else if (fAttrActivations[direction * 3 + 2] ==
"Elu") {
1251 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1252 out << SP << SP << SP <<
"if (" << OpName <<
"_new_cell_state[i] < 0.)\n";
1253 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = "
1254 << fAttrActivationAlpha[direction * 3 + 2] <<
" * exp(" << OpName <<
"_new_cell_state[i] - 1.);\n";
1255 out << SP << SP <<
"}\n";
1256 }
else if (fAttrActivations[direction * 3 + 2] ==
"Softsign") {
1257 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1258 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = " << OpName
1259 <<
"_new_cell_state[i] / (1. + abs(" << OpName <<
"_new_cell_state[i]));\n";
1260 out << SP << SP <<
"}\n";
1262 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1263 out << SP << SP << SP << SP << OpName <<
"_new_cell_state[i] = log(1. + exp("
1264 << OpName <<
"_new_cell_state[i]));\n";
1265 out << SP << SP <<
"}\n";
1269 out << SP << SP <<
"for (size_t i = offset; i < offset + " <<
size <<
"; i++) {\n";
1270 out << SP << SP << SP << OpName <<
"_hidden_state[i] = " << OpName <<
"_output_gate[i] * "
1271 << OpName <<
"_new_cell_state[i];\n";
1272 out << SP << SP <<
"}\n";
1277 if (!fNSequence_lens.empty()) {
1278 out << SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
1279 out << SP << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1280 out << SP << SP << SP <<
"if (seq >= tensor_" << fNSequence_lens <<
"[batch]) {\n";
1281 for (
size_t direction = 0; direction < num_directions; direction++) {
1282 out << SP << SP << SP << SP << SP <<
"for (size_t h = 0; h < " << fAttrHiddenSize <<
"; h++) {\n";
1283 out << SP << SP << SP << SP << SP << SP <<
"size_t idx = seq * "
1284 << num_directions * batch_size * fAttrHiddenSize + direction * batch_size * fAttrHiddenSize
1285 <<
" + batch * " << fAttrHiddenSize <<
" + h;\n";
1286 out << SP << SP << SP << SP << SP << SP << OpName <<
"_cell_state[idx] = 0.;\n";
1287 out << SP << SP << SP << SP << SP << SP << OpName <<
"_hidden_state[idx] = 0.;\n";
1288 out << SP << SP << SP << SP << SP <<
"}\n";
1290 out << SP << SP << SP <<
"}\n";
1291 out << SP << SP <<
"}\n";
1296 if (fAttrLayout == 0) {
1297 if (!fNY_h.empty()) {
1299 if (fNSequence_lens.empty()) {
1300 size_t y_h_size = batch_size * fAttrHiddenSize;
1301 if (fAttrDirection ==
"backward") {
1302 out << SP <<
"std::copy(" << OpName <<
"_hidden_state, " << OpName <<
"_hidden_state + "
1303 << y_h_size <<
", tensor_" << fNY_h <<
");\n";
1305 size_t offset = (seq_length - 1) * num_directions * batch_size * fAttrHiddenSize;
1306 out << SP <<
"std::copy(" << OpName <<
"_hidden_state + " <<
offset <<
", " << OpName
1307 <<
"_hidden_state + " <<
offset <<
" + " << y_h_size <<
", tensor_" << fNY_h <<
");\n";
1309 if (num_directions == 2) {
1310 out << SP <<
"std::copy(" << OpName <<
"_hidden_state + " << y_h_size <<
", " << OpName
1311 <<
"_hidden_state + " << 2 * y_h_size <<
", tensor_" << fNY_h <<
" + " << y_h_size <<
");\n";
1314 if (fAttrDirection ==
"backward") {
1315 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1316 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1317 out << SP << SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
1318 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + offset);\n";
1321 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1322 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1323 out << SP << SP <<
"size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1324 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1325 out << SP << SP <<
"size_t y_h_offset = batch * " << fAttrHiddenSize <<
";\n";
1326 out << SP << SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
1327 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1330 if (num_directions == 2) {
1331 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1332 out << SP << SP <<
"size_t offset = " << batch_size * fAttrHiddenSize
1333 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1334 out << SP << SP <<
"size_t y_h_offset = " << batch_size * fAttrHiddenSize
1335 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1336 out << SP << SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
1337 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1342 if (!fNY_c.empty()) {
1344 if (fNSequence_lens.empty()) {
1345 size_t y_h_size = batch_size * fAttrHiddenSize;
1346 if (fAttrDirection ==
"backward") {
1347 out << SP <<
"std::copy(" << OpName <<
"_cell_state, " << OpName <<
"_hidden_state + "
1348 << y_h_size <<
", tensor_" << fNY_c <<
");\n";
1350 size_t offset = (seq_length - 1) * num_directions * batch_size * fAttrHiddenSize;
1351 out << SP <<
"std::copy(" << OpName <<
"_cell_state + " <<
offset <<
", " << OpName
1352 <<
"_cell_state + " <<
offset <<
" + " << y_h_size <<
", tensor_" << fNY_c <<
");\n";
1354 if (num_directions == 2) {
1355 out << SP <<
"std::copy(" << OpName <<
"_cell_state + " << y_h_size <<
", " << OpName
1356 <<
"_cell_state + " << 2 * y_h_size <<
", tensor_" << fNY_c <<
" + " << y_h_size <<
");\n";
1359 if (fAttrDirection ==
"backward") {
1360 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1361 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1362 out << SP << SP <<
"std::copy(" << OpName <<
"_cell_state + offset, " << OpName
1363 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + offset);\n";
1366 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1367 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1368 out << SP << SP <<
"size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1369 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1370 out << SP << SP <<
"size_t y_h_offset = batch * " << fAttrHiddenSize <<
";\n";
1371 out << SP << SP <<
"std::copy(" << OpName <<
"_cell_state + offset, " << OpName
1372 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1375 if (num_directions == 2) {
1376 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1377 out << SP << SP <<
"size_t offset = " << batch_size * fAttrHiddenSize
1378 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1379 out << SP << SP <<
"size_t y_h_offset = " << batch_size * fAttrHiddenSize
1380 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1381 out << SP << SP <<
"std::copy(" << OpName <<
"_cell_state + offset, " << OpName
1382 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1390 for (
size_t direction = 0; direction < num_directions; direction++) {
1391 out << SP <<
"for (size_t seq = 0; seq < " << seq_length <<
"; seq++) {\n";
1392 out << SP << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1393 out << SP << SP << SP <<
"size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1394 <<
" + " << direction * batch_size * fAttrHiddenSize <<
" + batch * " << fAttrHiddenSize <<
";\n";
1395 out << SP << SP << SP <<
"size_t y_offset = batch * " << seq_length * num_directions * fAttrHiddenSize
1396 <<
" + seq * " << num_directions * fAttrHiddenSize <<
" + " << direction * fAttrHiddenSize <<
";\n";
1397 out << SP << SP << SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
1398 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY <<
" + y_offset);\n";
1399 out << SP << SP <<
"}\n";
1403 if (!fNY_h.empty()) {
1405 if (fAttrDirection ==
"backward") {
1406 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1407 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1408 out << SP << SP <<
"size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize <<
";\n";
1409 out << SP << SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
1410 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1413 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1414 if (fNSequence_lens.empty()) {
1415 out << SP << SP <<
"size_t seq = " << seq_length - 1 <<
";\n";
1417 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1419 out << SP << SP <<
"size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1420 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1421 out << SP << SP <<
"size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize <<
";\n";
1422 out << SP << SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
1423 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1426 if (num_directions == 2) {
1427 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1428 out << SP << SP <<
"size_t offset = " << batch_size * fAttrHiddenSize <<
" + batch * "
1429 << fAttrHiddenSize <<
";\n";
1430 out << SP << SP <<
"size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize <<
" + "
1431 << fAttrHiddenSize <<
";\n";
1432 out << SP << SP <<
"std::copy(" << OpName <<
"_hidden_state + offset, " << OpName
1433 <<
"_hidden_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_h <<
" + y_h_offset);\n";
1438 if (!fNY_c.empty()) {
1440 if (fAttrDirection ==
"backward") {
1441 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1442 out << SP << SP <<
"size_t offset = batch * " << fAttrHiddenSize <<
";\n";
1443 out << SP << SP <<
"size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize <<
";\n";
1444 out << SP << SP <<
"std::copy(" << OpName <<
"_cell_state + offset, " << OpName
1445 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1448 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1449 if (fNSequence_lens.empty()) {
1450 out << SP << SP <<
"size_t seq = " << seq_length - 1 <<
";\n";
1452 out << SP << SP <<
"size_t seq = " <<
"tensor_" << fNSequence_lens <<
"[batch] - 1;\n";
1454 out << SP << SP <<
"size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1455 <<
" + batch * " << fAttrHiddenSize <<
";\n";
1456 out << SP << SP <<
"size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize <<
";\n";
1457 out << SP << SP <<
"std::copy(" << OpName <<
"_cell_state + offset, " << OpName
1458 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
1461 if (num_directions == 2) {
1462 out << SP <<
"for (size_t batch = 0; batch < " << batch_size <<
"; batch++) {\n";
1463 out << SP << SP <<
"size_t offset = " << batch_size * fAttrHiddenSize <<
" + batch * "
1464 << fAttrHiddenSize <<
";\n";
1465 out << SP << SP <<
"size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize <<
" + "
1466 << fAttrHiddenSize <<
";\n";
1467 out << SP << SP <<
"std::copy(" << OpName <<
"_cell_state + offset, " << OpName
1468 <<
"_cell_state + offset + " << fAttrHiddenSize <<
", tensor_" << fNY_c <<
" + y_h_offset);\n";
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input)
Infers the shape of the output tensors.
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input)
Infers the type of the output tensors.
std::string GenerateSessionMembersCode(std::string opName)
Generate the code for the Session internal data vectors.
void Initialize(RModel &model)
Initialize the model.
std::string Generate(std::string OpName)
Generate the inference code.
create variable transformations
static uint64_t sum(uint64_t i)