Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LSTM.icc
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LSTM_I
2#define TMVA_SOFIE_ROPERATOR_LSTM_I
3
4namespace TMVA {
5namespace Experimental {
6namespace SOFIE {
7
8template<typename T>
9auto ROperator_LSTM<T>::TypeInference(std::vector<ETensorType> input)
10-> std::vector<ETensorType> {
11 ETensorType out = input[0];
12 return {out, out};
13}
14
15template<typename T>
16auto ROperator_LSTM<T>::ShapeInference(std::vector<std::vector<size_t>> input)
17-> std::vector<std::vector<size_t>> {
18 size_t num_directions = input[1][0];
19 size_t hidden_size = input[1][1] / 4;
20 if (fAttrLayout == 0) {
21 size_t seq_length = input[0][0];
22 size_t batch_size = input[0][1];
23 std::vector<std::vector<size_t>> ret(
24 {{seq_length, num_directions, batch_size, hidden_size},
25 {num_directions, batch_size, hidden_size},
26 {num_directions, batch_size, hidden_size}});
27 return ret;
28 } else {
29 size_t batch_size = input[0][0];
30 size_t seq_length = input[0][1];
31 std::vector<std::vector<size_t>> ret(
32 {{batch_size, seq_length, num_directions, hidden_size},
33 {batch_size, num_directions, hidden_size},
34 {batch_size, num_directions, hidden_size}});
35 return ret;
36 }
37}
38
39template<typename T>
41-> void {
42 fUseSession = model.UseSession();
43 // Check the input and output tensors
44 if (!model.CheckIfTensorAlreadyExist(fNX)) {
45 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNX + " is not found in model.");
46 }
47 fShapeX = model.GetTensorShape(fNX);
48 if (fShapeX.size() != 3) {
49 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNX + " is not of 3 dimensions.");
50 }
51 if (!model.CheckIfTensorAlreadyExist(fNW)) {
52 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNW + " is not found in model.");
53 }
54 fShapeW = model.GetTensorShape(fNW);
55 if (fShapeW.size() != 3) {
56 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNW + " is not of 3 dimensions.");
57 }
58 if (!model.CheckIfTensorAlreadyExist(fNR)) {
59 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNR + " is not found in model.");
60 }
61 fShapeR = model.GetTensorShape(fNR);
62 if (fShapeR.size() != 3) {
63 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNR + " is not of 3 dimensions.");
64 }
65 if (!fNB.empty()) {
66 if (!model.CheckIfTensorAlreadyExist(fNB)) {
67 throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNB + " is not found in model.");
68 }
69 fShapeB = model.GetTensorShape(fNB);
70 if (fShapeB.size() != 2 && fShapeB.size() != 5) {
71 throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNB + " is not of 2 or 5 dimensions.");
72 }
73 if (fShapeB.size() == 2) {
74 // Broadcasting the bias
75 auto original_data = model.GetInitializedTensorData(fNB);
76 size_t num_directions = fShapeW[0];
77 size_t seq_length = (fAttrLayout == 0)? fShapeX[0] : fShapeX[1];
78 size_t batch_size = (fAttrLayout == 0)? fShapeX[1] : fShapeX[0];
79 if (fType == "float") {
80 float *original_bias = static_cast<float*>(original_data.get());
81 float *new_bias = new float[4 * num_directions * seq_length * batch_size * fAttrHiddenSize];
82 for (size_t gate = 0; gate < 4; gate++) {
83 float sum[fAttrHiddenSize];
84 for (size_t direction = 0; direction < num_directions; direction++) {
85 size_t offset = direction * 8 * fAttrHiddenSize + gate * fAttrHiddenSize;
86 for (size_t h = 0; h < fAttrHiddenSize; h++) {
87 sum[h] = original_bias[offset + h] + original_bias[offset + h + 4 * fAttrHiddenSize];
88 }
89 for (size_t seq = 0; seq < seq_length; seq++) {
90 for (size_t batch = 0; batch < batch_size; batch++) {
91 size_t bias_offset = gate * num_directions * seq_length * batch_size * fAttrHiddenSize
92 + direction * seq_length * batch_size * fAttrHiddenSize
93 + seq * batch_size * fAttrHiddenSize + batch * fAttrHiddenSize;
94 std::copy(sum, sum + fAttrHiddenSize, new_bias + bias_offset);
95 }
96 }
97 }
98 }
99 std::vector<size_t> new_bias_shape = {4, num_directions, seq_length, batch_size, fAttrHiddenSize};
100 std::shared_ptr<void> new_bias_ptr(new_bias, std::default_delete<float[]>());
101 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), new_bias_shape, new_bias_ptr);
102 fShapeB = model.GetTensorShape(fNB);
103 }
104 }
105 }
106 if (!fNSequence_lens.empty()) {
107 if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) {
108 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " +
109 fNSequence_lens +
110 "is not found in model.");
111 }
112 fShapeSequence_lens = model.GetTensorShape(fNSequence_lens);
113 if (fShapeSequence_lens.size() != 1) {
114 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " +
115 fNSequence_lens +
116 " is not of 1 dimension.");
117 }
118 }
119 if (!fNInitial_h.empty()) {
120 if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) {
121 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " +
122 fNInitial_h + " is not found in model.");
123 }
124 fShapeInitial_h = model.GetTensorShape(fNInitial_h);
125 if (fShapeInitial_h.size() != 3) {
126 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " +
127 fNInitial_h + " is not of 3 dimensions.");
128 }
129 }
130 if (!fNInitial_c.empty()) {
131 if (!model.CheckIfTensorAlreadyExist(fNInitial_c)) {
132 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " +
133 fNInitial_c + " is not found in model.");
134 }
135 fShapeInitial_c = model.GetTensorShape(fNInitial_c);
136 if (fShapeInitial_c.size() != 3) {
137 throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " +
138 fNInitial_c + " is not of 3 dimensions.");
139 }
140 }
141 if (!fNP.empty()) {
142 if (!model.CheckIfTensorAlreadyExist(fNP)) {
143 throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNP + " is not found in model.");
144 }
145 fShapeP = model.GetTensorShape(fNP);
146 if (fShapeP.size() != 2 && fShapeP.size() != 4) {
147 throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNP + " is not of 2 or 4 dimensions.");
148 }
149 if (fShapeP.size() == 2) {
150 // Broadcasting the weight for peepholes
151 auto original_data = model.GetInitializedTensorData(fNP);
152 size_t num_directions = fShapeW[0];
153 size_t batch_size = (fAttrLayout == 0)? fShapeX[1] : fShapeX[0];
154 if (fType == "float") {
155 float *original_p = static_cast<float*>(original_data.get());
156 float *new_p = new float[num_directions * 3 * batch_size * fAttrHiddenSize];
157 for (size_t direction = 0; direction < num_directions; direction++) {
158 for (size_t gate = 0; gate < 3; gate++) {
159 size_t p_offset = direction * 3 * fAttrHiddenSize + gate * fAttrHiddenSize;
160 for (size_t batch = 0; batch < batch_size; batch++) {
161 size_t offset = direction * 3 * batch_size * fAttrHiddenSize
162 + gate * batch_size * fAttrHiddenSize + batch * fAttrHiddenSize;
163 std::copy(original_p + p_offset, original_p + p_offset + fAttrHiddenSize,
164 new_p + offset);
165 }
166 }
167 }
168 std::vector<size_t> new_p_shape = {num_directions, 3, batch_size, fAttrHiddenSize};
169 std::shared_ptr<void> new_p_ptr(new_p, std::default_delete<float[]>());
170 model.UpdateInitializedTensor(fNP, model.GetTensorType(fNP), new_p_shape, new_p_ptr);
171 fShapeP = model.GetTensorShape(fNP);
172 }
173 }
174 }
175 if (!fNY.empty()) {
176 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
177 if (!model.CheckIfTensorAlreadyExist(fNY)) {
178 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
179 }
180 }
181 if (!fNY_h.empty()) {
182 fShapeY_h = ShapeInference({fShapeX, fShapeW})[1];
183 if (!model.CheckIfTensorAlreadyExist(fNY_h)) {
184 model.AddIntermediateTensor(fNY_h, model.GetTensorType(fNX), fShapeY_h);
185 }
186 }
187 if (!fNY_c.empty()) {
188 fShapeY_c = ShapeInference({fShapeX, fShapeW})[2];
189 if (!model.CheckIfTensorAlreadyExist(fNY_c)) {
190 model.AddIntermediateTensor(fNY_c, model.GetTensorType(fNX), fShapeY_c);
191 }
192 }
193 // Check the attributes
194 for (auto &activation : fAttrActivations) {
195 if (activation != "Relu" && activation != "Tanh" &&
196 activation != "Sigmoid" && activation != "Affine" &&
197 activation != "LeakyRelu" && activation != "ThresholdRelu" &&
198 activation != "ScaledTanh" && activation != "HardSigmoid" &&
199 activation != "Elu" && activation != "Softsign" &&
200 activation != "Softplus") {
201 throw std::runtime_error("TMVA SOFIE - Activation function " +
202 activation + " not implemented");
203 }
204 }
205 if (fAttrDirection != "forward" && fAttrDirection != "backward" &&
206 fAttrDirection != "bidirectional") {
207 throw std::runtime_error(
208 "TMVA SOFIE - Invalid LSTM direction fAttrDirection = " +
209 fAttrDirection);
210 }
211 if (4 * fAttrHiddenSize != fShapeW[1]) {
212 throw std::runtime_error(
213 "TMVA SOFIE - fAttrHiddenSize must be equal to " +
214 std::to_string(fShapeW[1] / 4));
215 }
216 if (fAttrInputForget > 1) {
217 throw std::runtime_error(
218 "TMVA SOFIE - fAttrInputForget = " + std::to_string(fAttrInputForget)
219 + " must be 0 or 1.");
220 }
221 if (fAttrLayout > 1) {
222 throw std::runtime_error("TMVA SOFIE - Layout fAttrLayout = " +
223 std::to_string(fAttrLayout) +
224 " must be 0 (timewise) or 1 (batchwise)");
225 }
226 if (fAttrActivations.empty()) {
227 if (fAttrDirection == "bidirectional") {
228 fAttrActivations = {"Sigmoid", "Tanh", "Tanh", "Sigmoid", "Tanh", "Tanh"};
229 } else {
230 fAttrActivations = {"Sigmoid", "Tanh", "Tanh"};
231 }
232 }
233}
234
235// generate code for Session data members (e.g. internal vectors)
236template <typename T>
238{
239 opName = "op_" + opName;
240 std::stringstream out;
241
242 size_t num_directions = fShapeW[0];
243 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
244 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
245 size_t input_size = fShapeX[2];
246
247 if (fAttrLayout != 0) {
248 out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">("
249 << seq_length * batch_size * input_size << ");\n";
250 out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">("
251 << num_directions * batch_size * fAttrHiddenSize << ");\n";
252 out << "std::vector<" << fType << "> fVec_" << opName << "_initial_cell_state = std::vector<" << fType << ">("
253 << num_directions * batch_size * fAttrHiddenSize << ");\n";
254 }
255 // Set the feedforward
256 size_t ff_size = seq_length * batch_size * fAttrHiddenSize;
257 out << "std::vector<" << fType << "> fVec_" << opName << "_ff_input_gate = std::vector<" << fType << ">(" << ff_size << ");\n";
258 out << "std::vector<" << fType << "> fVec_" << opName << "_ff_output_gate = std::vector<" << fType << ">(" << ff_size << ");\n";
259 out << "std::vector<" << fType << "> fVec_" << opName << "_ff_cell_gate = std::vector<" << fType << ">(" << ff_size << ");\n";
260 if (fAttrInputForget == 0)
261 out << "std::vector<" << fType << "> fVec_" << opName << "_ff_forget_gate = std::vector<" << fType << ">(" << ff_size << ");\n";
262 // gate results
263 size_t hs_size = seq_length * num_directions * batch_size * fAttrHiddenSize;
264 out << "std::vector<" << fType << "> fVec_" << opName << "_input_gate = std::vector<" << fType << ">(" << hs_size << ");\n";
265 out << "std::vector<" << fType << "> fVec_" << opName << "_output_gate = std::vector<" << fType << ">(" << hs_size << ");\n";
266 out << "std::vector<" << fType << "> fVec_" << opName << "_cell_gate = std::vector<" << fType << ">(" << hs_size << ");\n";
267 if (fAttrInputForget == 0)
268 out << "std::vector<" << fType << "> fVec_" << opName << "_forget_gate = std::vector<" << fType << ">(" << hs_size << ");\n";
269 // cell state
270 out << "std::vector<" << fType << "> fVec_" << opName << "_cell_state = std::vector<" << fType << ">(" << hs_size << ");\n";
271 out << "std::vector<" << fType << "> fVec_" << opName << "_new_cell_state = std::vector<" << fType << ">(" << hs_size << ");\n";
272 // hiddden state
273 if (fAttrLayout != 0 || fNY.empty()) {
274 out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">(" << hs_size << ");\n";
275 }
276
277 out << "\n";
278
279 return out.str();
280}
281
282template<typename T>
283auto ROperator_LSTM<T>::Generate(std::string OpName)
284-> std::string {
285 OpName = "op_" + OpName;
286 std::stringstream out;
287
288 size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1];
289 size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0];
290 size_t input_size = fShapeX[2];
291 size_t num_directions = fShapeW[0];
292
293 // set the input
294 if (fAttrLayout == 0) {
295 out << SP << fType << " *" << OpName << "_input = tensor_" << fNX << ";\n";
296 } else {
297 if (fUseSession)
298 out << SP << fType << " * " << OpName << "_input = fVec_" << OpName << "_input.data();\n";
299 else
300 out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "] = {0};\n";
301
302 out << SP << "for(size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
303 out << SP << SP << "for(size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
304 out << SP << SP << SP << "for(size_t i = 0; i < " << input_size << "; i++) {\n";
305 out << SP << SP << SP << SP << OpName << "_input[seq * " << batch_size * input_size
306 << " + batch * " << input_size << " + i] = " << "tensor_" << fNX << "[batch * "
307 << seq_length * input_size << " + seq * " << input_size << " + i];\n";
308 out << SP << SP << SP << "}\n";
309 out << SP << SP << "}\n";
310 out << SP << "}\n";
311 }
312
313 // Set the initial hidden state
314 if (!fNInitial_h.empty()) {
315 if (fAttrLayout == 0) {
316 out << SP << fType << " *" << OpName << "_initial_hidden_state = " << " tensor_"
317 << fNInitial_h << ";\n";
318 } else {
319 if (fUseSession)
320 out << SP << fType << " * " << OpName << "_initial_hidden_state = fVec_" << OpName
321 << "_initial_hidden_state.data();\n";
322 else
323 out << SP << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size *
324 fAttrHiddenSize << "] = {0};\n";
325
326 for (size_t direction = 0; direction < num_directions; direction++) {
327 out << SP << "for(size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
328 out << SP << SP << "for(size_t h = 0; h < " << fAttrHiddenSize << "; h++) {\n";
329 out << SP << SP << SP << OpName << "_initial_hidden_state["
330 << direction * batch_size * fAttrHiddenSize << " + batch * " << fAttrHiddenSize
331 << " + h] = tensor_" << fNInitial_h << "[batch * " << num_directions * fAttrHiddenSize
332 << " + " << direction * fAttrHiddenSize << " + h];\n";
333 out << SP << SP << "}\n";
334 out << SP << "}\n";
335 }
336 }
337 }
338
339 // Set the initial cell state
340 if (!fNInitial_c.empty()) {
341 if (fAttrLayout == 0) {
342 out << SP << fType << " *" << OpName << "_initial_cell_state = " << " tensor_"
343 << fNInitial_c << ";\n";
344 } else {
345 if (fUseSession)
346 out << SP << fType << " * " << OpName << "_initial_cell_state = fVec_" << OpName
347 << "_initial_cell_state.data();\n";
348 else
349 out << SP << fType << " " << OpName << "_initial_cell_state[" << num_directions * batch_size *
350 fAttrHiddenSize << "] = {0};\n";
351
352 for (size_t direction = 0; direction < num_directions; direction++) {
353 out << SP << "for(size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
354 out << SP << SP << "for(size_t h = 0; h < " << fAttrHiddenSize << "; h++) {\n";
355 out << SP << SP << SP << OpName << "_initial_cell_state["
356 << direction * batch_size * fAttrHiddenSize << " + batch * " << fAttrHiddenSize
357 << " + h] = tensor_" << fNInitial_c << "[batch * " << num_directions * fAttrHiddenSize
358 << " + " << direction * fAttrHiddenSize << " + h];\n";
359 out << SP << SP << "}\n";
360 out << SP << "}\n";
361 }
362 }
363 }
364
365 // Set the feedforward
366 size_t ff_size = seq_length * batch_size * fAttrHiddenSize;
367 if (fUseSession) {
368 out << SP << fType << " * " << OpName << "_ff_input_gate = fVec_" << OpName << "_ff_input_gate.data();\n";
369 out << SP << fType << " * " << OpName << "_ff_output_gate = fVec_" << OpName << "_ff_output_gate.data();\n";
370 out << SP << fType << " * " << OpName << "_ff_cell_gate = fVec_" << OpName << "_ff_cell_gate.data();\n";
371 if (fAttrInputForget == 0) {
372 out << SP << fType << " * " << OpName << "_ff_forget_gate = fVec_" << OpName << "_ff_forget_gate.data();\n";
373 }
374 } else {
375 out << SP << fType << " " << OpName << "_ff_input_gate[" << ff_size << "] = {0};\n";
376 out << SP << fType << " " << OpName << "_ff_output_gate[" << ff_size << "] = {0};\n";
377 out << SP << fType << " " << OpName << "_ff_cell_gate[" << ff_size << "] = {0};\n";
378 if (fAttrInputForget == 0) {
379 out << SP << fType << " " << OpName << "_ff_forget_gate[" << ff_size << "] = {0};\n";
380 }
381 }
382 // Set the gates
383 size_t hidden_state_size = seq_length * num_directions * batch_size * fAttrHiddenSize;
384 if (fUseSession) {
385 out << SP << fType << " * " << OpName << "_input_gate = fVec_" << OpName << "_input_gate.data();\n";
386 out << SP << fType << " * " << OpName << "_output_gate = fVec_" << OpName << "_output_gate.data();\n";
387 out << SP << fType << " * " << OpName << "_cell_gate = fVec_" << OpName << "_cell_gate.data();\n";
388 if (fAttrInputForget == 0) {
389 out << SP << fType << " * " << OpName << "_forget_gate = fVec_" << OpName << "_forget_gate.data();\n";
390 }
391 } else {
392 out << SP << fType << " " << OpName << "_input_gate[" << hidden_state_size << "] = {0};\n";
393 out << SP << fType << " " << OpName << "_output_gate[" << hidden_state_size << "] = {0};\n";
394 out << SP << fType << " " << OpName << "_cell_gate[" << hidden_state_size << "] = {0};\n";
395 if (fAttrInputForget == 0) {
396 out << SP << fType << " " << OpName << "_forget_gate[" << hidden_state_size << "] = {0};\n";
397 }
398 }
399 // Set the cell state and the new cell state = h(cell state)
400 if (fUseSession) {
401 out << SP << fType << " * " << OpName << "_cell_state = fVec_" << OpName << "_cell_state.data();\n";
402 out << SP << fType << " * " << OpName << "_new_cell_state = fVec_" << OpName << "_new_cell_state.data();\n";
403 } else {
404 out << SP << fType << " " << OpName << "_cell_state[" << hidden_state_size << "] = {0};\n";
405 out << SP << fType << " " << OpName << "_new_cell_state[" << hidden_state_size << "] = {0};\n";
406 }
407
408 // Set the hidden state
409 if (fAttrLayout == 0 && !fNY.empty()) {
410 out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n";
411 } else {
412 if (fUseSession) {
413 out << SP << fType << " * " << OpName << "_hidden_state = fVec_" << OpName << "_hidden_state.data();\n";
414 } else {
415 out << SP << fType << " " << OpName << "_hidden_state[" << hidden_state_size << "] = {0};\n";
416 }
417 }
418
419 out << SP << "char " << OpName << "_transA = 'N';\n";
420 out << SP << "char " << OpName << "_transB = 'T';\n";
421 out << SP << "int " << OpName << "_m = " << seq_length * batch_size << ";\n";
422 out << SP << "int " << OpName << "_n = " << fAttrHiddenSize << ";\n";
423 out << SP << "int " << OpName << "_k = " << input_size << ";\n";
424 if (fType == "float") {
425 out << SP << fType << " " << OpName << "_alpha = 1.;\n";
426 out << SP << fType << " " << OpName << "_beta = 0.;\n";
427 }
428 if (!fNB.empty()) {
429 out << SP << "int " << OpName << "_bias_size = " << seq_length * batch_size * fAttrHiddenSize << ";\n";
430 out << SP << "int " << OpName << "_incx = 1;\n";
431 out << SP << "int " << OpName << "_incy = 1;\n";
432 }
433
434 for (size_t direction = 0; direction < num_directions; direction++) {
435 if (direction == 0) {
436 if (fType == "float") {
437 // input_gate = input * weight_i^T
438 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
439 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
440 << fNW << ", &" << OpName << "_k, " << OpName << "_input, &" << OpName << "_k, &"
441 << OpName << "_beta, " << OpName << "_ff_input_gate, &" << OpName << "_n);\n";
442 // output_gate = input * weight_o^T
443 size_t wo_offset = fAttrHiddenSize * input_size;
444 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
445 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
446 << fNW << " + " << wo_offset << ", &" << OpName << "_k, " << OpName << "_input, &"
447 << OpName << "_k, &" << OpName << "_beta, " << OpName << "_ff_output_gate, &" << OpName << "_n);\n";
448 // cell_gate = input * weight_c^T
449 size_t wc_offset = 3 * fAttrHiddenSize * input_size;
450 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
451 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
452 << fNW << " + " << wc_offset << ", &" << OpName << "_k, " << OpName << "_input, &"
453 << OpName << "_k, &" << OpName << "_beta, " << OpName << "_ff_cell_gate, &" << OpName << "_n);\n";
454 }
455 } else {
456 if (fType == "float") {
457 // input_gate = input * weight_i^T
458 size_t wi_offset = 4 * fAttrHiddenSize * input_size;
459 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
460 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
461 << fNW << " + " << wi_offset << ", &" << OpName << "_k, " << OpName << "_input, &"
462 << OpName << "_k, &" << OpName << "_beta, " << OpName << "_ff_input_gate, &" << OpName << "_n);\n";
463 // output_gate = input * weight_o^T
464 size_t wo_offset = 4 * fAttrHiddenSize * input_size + 1 * fAttrHiddenSize * input_size;
465 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
466 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
467 << fNW << " + " << wo_offset << ", &" << OpName << "_k, " << OpName << "_input, &"
468 << OpName << "_k, &" << OpName << "_beta, " << OpName << "_ff_output_gate, &" << OpName << "_n);\n";
469 // cell_gate = input * weight_c^T
470 size_t wc_offset = 4 * fAttrHiddenSize * input_size + 3 * fAttrHiddenSize * input_size;
471 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
472 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
473 << fNW << " + " << wc_offset << ", &" << OpName << "_k, " << OpName << "_input, &"
474 << OpName << "_k, &" << OpName << "_beta, " << OpName << "_ff_cell_gate, &" << OpName << "_n);\n";
475 }
476 }
477 if (fAttrInputForget == 0) {
478 // forget_gate = input * weight_f^T
479 if (direction == 0) {
480 if (fType == "float") {
481 size_t wf_offset = 2 * fAttrHiddenSize * input_size;
482 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
483 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
484 << fNW << " + " << wf_offset << ", &" << OpName << "_k, " << OpName << "_input, &"
485 << OpName << "_k, &" << OpName << "_beta, " << OpName << "_ff_forget_gate, &" << OpName << "_n);\n";
486 }
487 } else {
488 if (fType == "float") {
489 size_t wf_offset = 4 * fAttrHiddenSize * input_size + 2 * fAttrHiddenSize * input_size;
490 out << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
491 << OpName <<"_n, &" << OpName << "_m, &" << OpName << "_k, &" << OpName << "_alpha, tensor_"
492 << fNW << " + " << wf_offset << ", &" << OpName << "_k, " << OpName << "_input, &"
493 << OpName << "_k, &" << OpName << "_beta, " << OpName << "_ff_forget_gate, &" << OpName << "_n);\n";
494 }
495 }
496 }
497
498 // Add the bias
499 if (!fNB.empty()) {
500 if (direction == 0) {
501 if (fType == "float") {
502 // ff_input_gate += bias_i
503 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
504 << fNB << ", &" << OpName << "_incx, " << OpName << "_ff_input_gate, &" << OpName << "_incy);\n";
505 // ff_output_gate += bias_o
506 size_t bo_offset = seq_length * batch_size * fAttrHiddenSize;
507 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
508 << fNB << " + " << bo_offset << ", &" << OpName << "_incx, " << OpName << "_ff_output_gate, &"
509 << OpName << "_incy);\n";
510 // ff_cell_gate += bias_c
511 size_t bc_offset = 3 * seq_length * batch_size * fAttrHiddenSize;
512 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
513 << fNB << " + " << bc_offset << ", &" << OpName << "_incx, " << OpName << "_ff_cell_gate, &"
514 << OpName << "_incy);\n";
515 }
516 } else {
517 if (fType == "float") {
518 // ff_input_gate += bias_i
519 size_t bi_offset = 4 * seq_length * batch_size * fAttrHiddenSize;
520 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
521 << fNB << " + " << bi_offset << ", &" << OpName << "_incx, " << OpName << "_ff_input_gate, &"
522 << OpName << "_incy);\n";
523 // ff_output_gate += bias_o
524 size_t bo_offset = 4 * seq_length * batch_size * fAttrHiddenSize
525 + seq_length * batch_size * fAttrHiddenSize;
526 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
527 << fNB << " + " << bo_offset << ", &" << OpName << "_incx, " << OpName << "_ff_output_gate, &"
528 << OpName << "_incy);\n";
529 // ff_cell_gate += bias_c
530 size_t bc_offset = 4 * num_directions * seq_length * batch_size * fAttrHiddenSize
531 + 3 * seq_length * batch_size * fAttrHiddenSize;
532 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
533 << fNB << " + " << bc_offset << ", &" << OpName << "_incx, " << OpName << "_ff_cell_gate, &"
534 << OpName << "_incy);\n";
535 }
536 }
537 if (fAttrInputForget == 0) {
538 // ff_forget_gate += bias_f
539 if (direction == 0) {
540 if (fType == "float") {
541 size_t bo_offset = 2 * seq_length * batch_size * fAttrHiddenSize;
542 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
543 << fNB << " + " << bo_offset << ", &" << OpName << "_incx, " << OpName << "_ff_forget_gate, &"
544 << OpName << "_incy);\n";
545 }
546 } else {
547 if (fType == "float") {
548 size_t bo_offset = 4 * seq_length * batch_size * fAttrHiddenSize
549 + 2 * seq_length * batch_size * fAttrHiddenSize;
550 out << SP << "BLAS::saxpy_(&" << OpName << "_bias_size, &" << OpName << "_alpha, tensor_"
551 << fNB << " + " << bo_offset << ", &" << OpName << "_incx, " << OpName << "_ff_forget_gate, &"
552 << OpName << "_incy);\n";
553 }
554 }
555 }
556 }
557
558 // Copy ff_input_gate, ff_output_gate, ff_cell_gate and ff_forget_gate into input_gate, output_gate,
559 // cell_gate and forget_gate
560 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
561 out << SP << SP << "size_t ff_offset = seq * " << batch_size * fAttrHiddenSize << ";\n";
562 if (direction == 0) {
563 out << SP << SP << "size_t gate_offset = seq * " << num_directions * batch_size * fAttrHiddenSize
564 << ";\n";
565 } else {
566 out << SP << SP << "size_t gate_offset = seq * " << num_directions * batch_size * fAttrHiddenSize
567 << " + " << batch_size * fAttrHiddenSize << ";\n";
568 }
569 size_t ff_seq_size = batch_size * fAttrHiddenSize;
570 out << SP << SP << "std::copy(" << OpName << "_ff_input_gate + ff_offset, " << OpName
571 << "_ff_input_gate + ff_offset + " << ff_seq_size << ", " << OpName << "_input_gate + gate_offset);\n";
572 out << SP << SP << "std::copy(" << OpName << "_ff_output_gate + ff_offset, " << OpName
573 << "_ff_output_gate + ff_offset + " << ff_seq_size << ", " << OpName << "_output_gate + gate_offset);\n";
574 out << SP << SP << "std::copy(" << OpName << "_ff_cell_gate + ff_offset, " << OpName
575 << "_ff_cell_gate + ff_offset + " << ff_seq_size << ", " << OpName << "_cell_gate + gate_offset);\n";
576 if (fAttrInputForget == 0) {
577 out << SP << SP << "std::copy(" << OpName << "_ff_forget_gate + ff_offset, " << OpName
578 << "_ff_forget_gate + ff_offset + " << ff_seq_size << ", " << OpName << "_forget_gate + gate_offset);\n";
579 }
580 out << SP << "}\n";
581
582 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
583 if (fAttrDirection == "backward" || direction == 1) {
584 out << SP << SP << "size_t index = " << seq_length - 1 << " - seq;\n";
585 } else {
586 out << SP << SP << "size_t index = seq;\n";
587 }
588 out << SP << SP << "int m2 = " << batch_size << ";\n";
589 if (direction == 0) {
590 out << SP << SP << "size_t offset = index * " << num_directions * batch_size * fAttrHiddenSize
591 << ";\n";
592 } else {
593 out << SP << SP << "size_t offset = index * " << num_directions * batch_size * fAttrHiddenSize
594 << " + " << batch_size * fAttrHiddenSize << ";\n";
595 }
596 size_t size = batch_size * fAttrHiddenSize;
597 // gate = gate + initial_hidden_state * Recurrence^T
598 out << SP << SP << "if (seq == 0) {\n";
599 if (!fNInitial_h.empty()) {
600 if (direction == 0) {
601 if (fType == "float") {
602 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
603 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << ", &"
604 << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName << "_n, &" << OpName
605 << "_alpha, " << OpName << "_input_gate + offset, &" << OpName << "_n);\n";
606 size_t ro_offset = fAttrHiddenSize * fAttrHiddenSize;
607 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
608 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
609 << ro_offset << ", &" << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName
610 << "_n, &" << OpName << "_alpha, " << OpName << "_output_gate + offset, &" << OpName << "_n);\n";
611 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
612 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
613 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
614 << rc_offset << ", &" << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName
615 << "_n, &" << OpName << "_alpha, " << OpName << "_cell_gate + offset, &" << OpName << "_n);\n";
616 if (fAttrInputForget == 0) {
617 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
618 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
619 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
620 << rf_offset << ", &" << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName
621 << "_n, &" << OpName << "_alpha, " << OpName << "_forget_gate + offset, &" << OpName << "_n);\n";
622 }
623 }
624 } else { // direction=1
625 if (fType == "float") {
626 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
627 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
628 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
629 << ri_offset << ", &" << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName
630 << "_n, &" << OpName << "_alpha, " << OpName << "_input_gate + offset, &" << OpName << "_n);\n";
631 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 1 * fAttrHiddenSize * fAttrHiddenSize;
632 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
633 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
634 << ro_offset << ", &" << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName
635 << "_n, &" << OpName << "_alpha, " << OpName << "_output_gate + offset, &" << OpName << "_n);\n";
636 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
637 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
638 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
639 << rc_offset << ", &" << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName
640 << "_n, &" << OpName << "_alpha, " << OpName << "_cell_gate + offset, &" << OpName << "_n);\n";
641 if (fAttrInputForget == 0) {
642 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
643 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
644 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
645 << rf_offset << ", &" << OpName << "_n, " << OpName << "_initial_hidden_state, &" << OpName
646 << "_n, &" << OpName << "_alpha, " << OpName << "_forget_gate + offset, &" << OpName << "_n);\n";
647 }
648 }
649 }
650 }
651 out << SP << SP << "} else {\n";
652 // gate = gate + previous_hidden_state * Recurrence^T
653 if (direction == 0) {
654 if (fAttrDirection == "backward") {
655 out << SP << SP << SP << "size_t previous_offset = (index + 1) * "
656 << num_directions * batch_size * fAttrHiddenSize << ";\n";
657 } else {
658 out << SP << SP << SP << "size_t previous_offset = (seq - 1) * "
659 << num_directions * batch_size * fAttrHiddenSize << ";\n";
660 }
661 if (fType == "float") {
662 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
663 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << ", &"
664 << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &" << OpName << "_n, &"
665 << OpName << "_alpha, " << OpName << "_input_gate + offset, &" << OpName << "_n);\n";
666 size_t ro_offset = 1 * fAttrHiddenSize * fAttrHiddenSize;
667 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
668 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
669 << ro_offset << ", &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
670 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_output_gate + offset, &"
671 << OpName << "_n);\n";
672 size_t rc_offset = 3 * fAttrHiddenSize * fAttrHiddenSize;
673 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
674 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
675 << rc_offset << ", &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
676 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_cell_gate + offset, &"
677 << OpName << "_n);\n";
678 if (fAttrInputForget == 0) {
679 size_t rf_offset = 2 * fAttrHiddenSize * fAttrHiddenSize;
680 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
681 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
682 << rf_offset << ", &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
683 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_forget_gate + offset, &"
684 << OpName << "_n);\n";
685 }
686 }
687 } else {
688 out << SP << SP << SP << "size_t previous_offset = (index + 1) * "
689 << num_directions * batch_size * fAttrHiddenSize << " + " << batch_size * fAttrHiddenSize << ";\n";
690 if (fType == "float") {
691 size_t ri_offset = 4 * fAttrHiddenSize * fAttrHiddenSize;
692 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
693 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
694 << ri_offset << ", &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
695 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_input_gate + offset, &"
696 << OpName << "_n);\n";
697 size_t ro_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + fAttrHiddenSize * fAttrHiddenSize;
698 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
699 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
700 << ro_offset << ", &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
701 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_output_gate + offset, &"
702 << OpName << "_n);\n";
703 size_t rc_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 3 * fAttrHiddenSize * fAttrHiddenSize;
704 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
705 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
706 << rc_offset << ", &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
707 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_cell_gate + offset, &"
708 << OpName << "_n);\n";
709 if (fAttrInputForget == 0) {
710 size_t rf_offset = 4 * fAttrHiddenSize * fAttrHiddenSize + 2 * fAttrHiddenSize * fAttrHiddenSize;
711 out << SP << SP << SP << "BLAS::sgemm_(&" << OpName << "_transB, &" << OpName << "_transA, &"
712 << OpName << "_n, &m2, &" << OpName << "_n, &" << OpName << "_alpha, tensor_" << fNR << " + "
713 << rf_offset << ", &" << OpName << "_n, " << OpName << "_hidden_state + previous_offset, &"
714 << OpName << "_n, &" << OpName << "_alpha, " << OpName << "_forget_gate + offset, &"
715 << OpName << "_n);\n";
716 }
717 }
718 }
719 out << SP << SP << "}\n";
720
721 // Clip the elements of the cell gate into the range [-fAttrClip, fAttrClip]
722 if (fAttrClip > .0) {
723 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
724 if (fType == "float") {
725 out << SP << SP << SP << "float x = (" << OpName << "_cell_gate[i] > " << -fAttrClip << ") ? "
726 << OpName << "_cell_gate[i] : " << -fAttrClip << ";\n";
727 }
728 out << SP << SP << SP << OpName << "_cell_gate[i] = (x < " << fAttrClip << ") ? x : "
729 << fAttrClip << ";\n";
730 out << SP << SP << "}\n";
731 }
732 // Apply the activation function to the cell gate, cell_gate = g(cell_gate)
733 if (fAttrActivations[direction * 3 + 1] == "Relu") {
734 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
735 out << SP << SP << SP << "if (" << OpName << "_cell_gate[i] < 0.)\n";
736 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = 0.;\n";
737 out << SP << SP << "}\n";
738 } else if (fAttrActivations[direction * 3 + 1] == "Tanh") {
739 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
740 if (fType == "float") {
741 out << SP << SP << SP << "float ex = exp(-2 * " << OpName << "_cell_gate[i]);\n";
742 }
743 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = (1. - ex) / (1. + ex);\n";
744 out << SP << SP << "}\n";
745 } else if (fAttrActivations[direction * 3 + 1] == "Sigmoid") {
746 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
747 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = 1. / (1. + exp(-" << OpName
748 << "_cell_gate[i]));\n";
749 out << SP << SP << "}\n";
750 } else if (fAttrActivations[direction * 3 + 1] == "Affine") {
751 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
752 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = "
753 << fAttrActivationAlpha[direction * 3 + 1] << " * " << OpName << "_cell_gate[i] + "
754 << fAttrActivationBeta[direction * 3 + 1] << ";\n";
755 out << SP << SP << "}\n";
756 } else if (fAttrActivations[direction * 3 + 1] == "ScaledTanh") {
757 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
758 if (fType == "float") {
759 out << SP << SP << SP << "float ex = exp(-2 * " << fAttrActivationBeta[direction * 3 + 1]
760 << " * "<< OpName << "_cell_gate[i]);\n";
761 }
762 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = "
763 << fAttrActivationAlpha[direction * 3 + 1] << " * (1. - ex) / (1. + ex);\n";
764 out << SP << SP << "}\n";
765 } else if (fAttrActivations[direction * 3 + 1] == "HardSigmoid") {
766 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
767 if (fType == "float") {
768 out << SP << SP << SP << "float a = " << fAttrActivationAlpha[direction * 3 + 1] << " * "
769 << OpName << "_cell_gate[i] + " << fAttrActivationBeta[direction * 3 + 1] << ";\n";
770 out << SP << SP << SP << "float b = (a > 0.) ? a : 0.;\n";
771 }
772 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = (b < 1.) ? b : 1.;\n";
773 out << SP << SP << "}\n";
774 } else if (fAttrActivations[direction * 3 + 1] == "LeakyRelu") {
775 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
776 out << SP << SP << SP << "if (" << OpName << "_cell_gate[i] < 0.)\n";
777 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = "
778 << fAttrActivationAlpha[direction * 3 + 1] << " * " << OpName << "_cell_gate[i];\n";
779 out << SP << SP << "}\n";
780 } else if (fAttrActivations[direction * 3 + 1] == "ThresholdRelu") {
781 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
782 out << SP << SP << SP << "if (" << OpName << "_cell_gate[i] < "
783 << fAttrActivationAlpha[direction * 3 + 1] << ")\n";
784 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = 0.;\n";
785 out << SP << SP << "}";
786 } else if (fAttrActivations[direction * 3 + 1] == "Elu") {
787 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
788 out << SP << SP << SP << "if (" << OpName << "_cell_gate[i] < 0.)\n";
789 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = "
790 << fAttrActivationAlpha[direction * 3 + 1] << " * exp(" << OpName << "_cell_gate[i] - 1.);\n";
791 out << SP << SP << "}\n";
792 } else if (fAttrActivations[direction * 3 + 1] == "Softsign") {
793 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
794 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = " << OpName
795 << "_cell_gate[i] / (1. + abs(" << OpName << "_cell_gate[i]));\n";
796 out << SP << SP << "}\n";
797 } else { // fAttrActivations[direction * 3 + 1] = Softplus
798 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
799 out << SP << SP << SP << SP << OpName << "_cell_gate[i] = log(1. + exp("
800 << OpName << "_cell_gate[i]));\n";
801 out << SP << SP << "}\n";
802 }
803
804 // Peephole connections for the input gate and the forget gate
805 if (!fNP.empty()) {
806 // gate = 1.0 * gate + previous_cell_state * P^T
807 out << SP << SP << "if (seq == 0) {\n";
808 if (!fNInitial_c.empty()) {
809 if (direction == 0) {
810 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
811 out << SP << SP << SP << SP << OpName << "_input_gate[i + offset] += tensor_" << fNP
812 << "[i] * " << OpName << "_initial_cell_state[i];\n";
813 out << SP << SP << SP << "}\n";
814 if (fAttrInputForget == 0) {
815 size_t pf_offset = batch_size * fAttrHiddenSize;
816 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
817 out << SP << SP << SP << SP << OpName << "_forget_gate[i + offset] += tensor_" << fNP
818 << "[i + " << pf_offset << "] * " << OpName << "_initial_cell_state[i];\n";
819 out << SP << SP << SP << "}\n";
820 }
821 } else {
822 size_t pi_offset = 3 * batch_size * fAttrHiddenSize;
823 size_t initial_c_offset = batch_size * fAttrHiddenSize;
824 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
825 out << SP << SP << SP << SP << OpName << "_input_gate[i + offset] += tensor_" << fNP
826 << "[i + " << pi_offset << "] * " << OpName << "_initial_cell_state[i + " << initial_c_offset
827 << "];\n";
828 out << SP << SP << SP << "}\n";
829 if (fAttrInputForget == 0) {
830 size_t pf_offset = 3 * batch_size * fAttrHiddenSize + batch_size * fAttrHiddenSize;
831 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
832 out << SP << SP << SP << SP << OpName << "_forget_gate[i + offset] += tensor_" << fNP
833 << "[i + " << pf_offset << "] * " << OpName << "_initial_cell_state[i + " << initial_c_offset
834 << "];\n";
835 out << SP << SP << SP << "}\n";
836 }
837 }
838 }
839 out << SP << SP << "} else {\n";
840 if (direction == 0) {
841 if (fAttrDirection == "backward") {
842 out << SP << SP << SP << "size_t c_offset = (index + 1) * "
843 << num_directions * batch_size * fAttrHiddenSize << ";\n";
844 } else {
845 out << SP << SP << SP << "size_t c_offset = (seq - 1) * "
846 << num_directions * batch_size * fAttrHiddenSize << ";\n";
847 }
848 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
849 out << SP << SP << SP << SP << OpName << "_input_gate[i + offset] += tensor_" << fNP
850 << "[i] * " << OpName << "_cell_state[i + c_offset];\n";
851 out << SP << SP << SP << "}\n";
852 if (fAttrInputForget == 0) {
853 size_t pf_offset = batch_size * fAttrHiddenSize;
854 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
855 out << SP << SP << SP << SP << OpName << "_forget_gate[i + offset] += tensor_" << fNP
856 << "[i + " << pf_offset << "] * " << OpName << "_cell_state[i + c_offset];\n";
857 out << SP << SP << SP << "}\n";
858 }
859 } else { // direction=1
860 size_t pi_offset = 3 * batch_size * fAttrHiddenSize;
861 out << SP << SP << SP << "size_t c_offset = (index + 1) * "
862 << num_directions * batch_size * fAttrHiddenSize << " + " << batch_size * fAttrHiddenSize << ";\n";
863 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
864 out << SP << SP << SP << SP << OpName << "_input_gate[i + offset] += tensor_" << fNP
865 << "[i + " << pi_offset << "] * " << OpName << "_cell_state[i + c_offset];\n";
866 out << SP << SP << SP << "}\n";
867 if (fAttrInputForget == 0) {
868 size_t pf_offset = 3 * batch_size * fAttrHiddenSize + batch_size * fAttrHiddenSize;
869 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
870 out << SP << SP << SP << SP << OpName << "_forget_gate[i + offset] += tensor_" << fNP
871 << "[i + " << pf_offset << "] * " << OpName << "_cell_state[i + c_offset];\n";
872 out << SP << SP << SP << "}\n";
873 }
874 }
875 out << SP << SP << "}\n";
876 }
877
878 // Clip the elements of the input gate into the range [-fAttrClip, fAttrClip]
879 if (fAttrClip > .0) {
880 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
881 if (fType == "float") {
882 out << SP << SP << SP << "float x = (" << OpName << "_input_gate[i] > " << -fAttrClip << ") ? "
883 << OpName << "_input_gate[i] : " << -fAttrClip << ";\n";
884 }
885 out << SP << SP << SP << OpName << "_input_gate[i] = (x < " << fAttrClip << ") ? x : "
886 << fAttrClip << ";\n";
887 out << SP << SP << "}\n";
888 }
889 // Apply the activation function to the input gate
890 if (fAttrActivations[direction * 3] == "Relu") {
891 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
892 out << SP << SP << SP << "if (" << OpName << "_input_gate[i] < 0.)\n";
893 out << SP << SP << SP << SP << OpName << "_input_gate[i] = 0.;\n";
894 out << SP << SP << "}\n";
895 } else if (fAttrActivations[direction * 3] == "Tanh") {
896 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
897 if (fType == "float") {
898 out << SP << SP << SP << "float ex = exp(-2 * " << OpName << "_input_gate[i]);\n";
899 }
900 out << SP << SP << SP << SP << OpName << "_input_gate[i] = (1. - ex) / (1. + ex);\n";
901 out << SP << SP << "}\n";
902 } else if (fAttrActivations[direction * 3] == "Sigmoid") {
903 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
904 out << SP << SP << SP << SP << OpName << "_input_gate[i] = 1. / (1. + exp(-" << OpName
905 << "_input_gate[i]));\n";
906 out << SP << SP << "}\n";
907 } else if (fAttrActivations[direction * 3] == "Affine") {
908 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
909 out << SP << SP << SP << SP << OpName << "_input_gate[i] = "
910 << fAttrActivationAlpha[direction * 3] << " * " << OpName << "_input_gate[i] + "
911 << fAttrActivationBeta[direction * 3] << ";\n";
912 out << SP << SP << "}\n";
913 } else if (fAttrActivations[direction * 3] == "ScaledTanh") {
914 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
915 if (fType == "float") {
916 out << SP << SP << SP << "float ex = exp(-2 * " << fAttrActivationBeta[direction * 3]
917 << " * "<< OpName << "_input_gate[i]);\n";
918 }
919 out << SP << SP << SP << SP << OpName << "_input_gate[i] = "
920 << fAttrActivationAlpha[direction * 3] << " * (1. - ex) / (1. + ex);\n";
921 out << SP << SP << "}\n";
922 } else if (fAttrActivations[direction * 3] == "HardSigmoid") {
923 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
924 if (fType == "float") {
925 out << SP << SP << SP << "float a = " << fAttrActivationAlpha[direction * 3] << " * "
926 << OpName << "_input_gate[i] + " << fAttrActivationBeta[direction * 3] << ";\n";
927 out << SP << SP << SP << "float b = (a > 0.) ? a : 0.;\n";
928 }
929 out << SP << SP << SP << SP << OpName << "_input_gate[i] = (b < 1.) ? b : 1.;\n";
930 out << SP << SP << "}\n";
931 } else if (fAttrActivations[direction * 3] == "LeakyRelu") {
932 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
933 out << SP << SP << SP << "if (" << OpName << "_input_gate[i] < 0.)\n";
934 out << SP << SP << SP << SP << OpName << "_input_gate[i] = "
935 << fAttrActivationAlpha[direction * 3] << " * " << OpName << "_input_gate[i];\n";
936 out << SP << SP << "}\n";
937 } else if (fAttrActivations[direction * 3] == "ThresholdRelu") {
938 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
939 out << SP << SP << SP << "if (" << OpName << "_input_gate[i] < "
940 << fAttrActivationAlpha[direction * 3] << ")\n";
941 out << SP << SP << SP << SP << OpName << "_input_gate[i] = 0.;\n";
942 out << SP << SP << "}";
943 } else if (fAttrActivations[direction * 3] == "Elu") {
944 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
945 out << SP << SP << SP << "if (" << OpName << "_input_gate[i] < 0.)\n";
946 out << SP << SP << SP << SP << OpName << "_input_gate[i] = "
947 << fAttrActivationAlpha[direction * 3] << " * exp(" << OpName << "_input_gate[i] - 1.);\n";
948 out << SP << SP << "}\n";
949 } else if (fAttrActivations[direction * 3] == "Softsign") {
950 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
951 out << SP << SP << SP << SP << OpName << "_input_gate[i] = " << OpName
952 << "_input_gate[i] / (1. + abs(" << OpName << "_input_gate[i]));\n";
953 out << SP << SP << "}\n";
954 } else { // fAttrActivations[direction * 3] = Softplus
955 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
956 out << SP << SP << SP << SP << OpName << "_input_gate[i] = log(1. + exp("
957 << OpName << "_input_gate[i]));\n";
958 out << SP << SP << "}\n";
959 }
960
961 if (fAttrInputForget == 0) {
962 // Clip the elements of the forget gate into the range [-fAttrClip, fAttrClip]
963 if (fAttrClip > .0) {
964 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
965 if (fType == "float") {
966 out << SP << SP << SP << "float x = (" << OpName << "_forget_gate[i] > "
967 << -fAttrClip << ") ? " << OpName << "_forget_gate[i] : " << -fAttrClip << ";\n";
968 }
969 out << SP << SP << SP << OpName << "_forget_gate[i] = (x < " << fAttrClip
970 << ") ? x : " << fAttrClip << ";\n";
971 out << SP << SP << "}\n";
972 }
973 // Apply the activation function to the forget gate, cell_gate = g(cell_gate)
974 if (fAttrActivations[direction * 3] == "Relu") {
975 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
976 out << SP << SP << SP << "if (" << OpName << "_forget_gate[i] < 0.)\n";
977 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = 0.;\n";
978 out << SP << SP << "}\n";
979 } else if (fAttrActivations[direction * 3] == "Tanh") {
980 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
981 if (fType == "float") {
982 out << SP << SP << SP << "float ex = exp(-2 * " << OpName << "_forget_gate[i]);\n";
983 }
984 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = (1. - ex) / (1. + ex);\n";
985 out << SP << SP << "}\n";
986 } else if (fAttrActivations[direction * 3] == "Sigmoid") {
987 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
988 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = 1. / (1. + exp(-"
989 << OpName << "_forget_gate[i]));\n";
990 out << SP << SP << "}\n";
991 } else if (fAttrActivations[direction * 3] == "Affine") {
992 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
993 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = "
994 << fAttrActivationAlpha[direction * 3] << " * " << OpName << "_forget_gate[i] + "
995 << fAttrActivationBeta[direction * 3] << ";\n";
996 out << SP << SP << "}\n";
997 } else if (fAttrActivations[direction * 3] == "ScaledTanh") {
998 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
999 if (fType == "float") {
1000 out << SP << SP << SP << "float ex = exp(-2 * " << fAttrActivationBeta[direction * 3]
1001 << " * "<< OpName << "_forget_gate[i]);\n";
1002 }
1003 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = "
1004 << fAttrActivationAlpha[direction * 3] << " * (1. - ex) / (1. + ex);\n";
1005 out << SP << SP << "}\n";
1006 } else if (fAttrActivations[direction * 3] == "HardSigmoid") {
1007 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1008 if (fType == "float") {
1009 out << SP << SP << SP << "float a = " << fAttrActivationAlpha[direction * 3] << " * "
1010 << OpName << "_forget_gate[i] + " << fAttrActivationBeta[direction * 3] << ";\n";
1011 out << SP << SP << SP << "float b = (a > 0.) ? a : 0.;\n";
1012 }
1013 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = (b < 1.) ? b : 1.;\n";
1014 out << SP << SP << "}\n";
1015 } else if (fAttrActivations[direction * 3] == "LeakyRelu") {
1016 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1017 out << SP << SP << SP << "if (" << OpName << "_forget_gate[i] < 0.)\n";
1018 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = "
1019 << fAttrActivationAlpha[direction * 3] << " * " << OpName << "_forget_gate[i];\n";
1020 out << SP << SP << "}\n";
1021 } else if (fAttrActivations[direction * 3] == "ThresholdRelu") {
1022 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1023 out << SP << SP << SP << "if (" << OpName << "_forget_gate[i] < "
1024 << fAttrActivationAlpha[direction * 3] << ")\n";
1025 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = 0.;\n";
1026 out << SP << SP << "}";
1027 } else if (fAttrActivations[direction * 3] == "Elu") {
1028 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1029 out << SP << SP << SP << "if (" << OpName << "_forget_gate[i] < 0.)\n";
1030 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = "
1031 << fAttrActivationAlpha[direction * 3] << " * exp(" << OpName << "_forget_gate[i] - 1.);\n";
1032 out << SP << SP << "}\n";
1033 } else if (fAttrActivations[direction * 3] == "Softsign") {
1034 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1035 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = " << OpName
1036 << "_forget_gate[i] / (1. + abs(" << OpName << "_forget_gate[i]));\n";
1037 out << SP << SP << "}\n";
1038 } else { // fAttrActivations[direction * 3] = Softplus
1039 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1040 out << SP << SP << SP << SP << OpName << "_forget_gate[i] = log(1. + exp("
1041 << OpName << "_forget_gate[i]));\n";
1042 out << SP << SP << "}\n";
1043 }
1044 }
1045
1046 // cell_state = input_gate o cell_gate
1047 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1048 out << SP << SP << SP << OpName << "_cell_state[i] = " << OpName << "_input_gate[i] * "
1049 << OpName << "_cell_gate[i];\n";
1050 out << SP << SP << "}\n";
1051
1052 if (fAttrInputForget == 0) {
1053 out << SP << SP << "if (seq == 0) {\n";
1054 if (!fNInitial_c.empty()) {
1055 // cell_state += forget_gate o initial_cell_state
1056 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
1057 out << SP << SP << SP << SP << OpName << "_cell_state[i + offset] += "
1058 << OpName << "_forget_gate[i + offset] * " << OpName << "_initial_cell_state[i];\n";
1059 out << SP << SP << SP << "}\n";
1060 }
1061 out << SP << SP << "} else {\n";
1062 // cell_state += forget_gate o previous_cell_state
1063 if (direction == 0) {
1064 if (fAttrDirection == "backward") {
1065 out << SP << SP << SP << "size_t previous_offset = (index + 1) * "
1066 << num_directions * batch_size * fAttrHiddenSize << ";\n";
1067 } else {
1068 out << SP << SP << SP << "size_t previous_offset = (seq - 1) * "
1069 << num_directions * batch_size * fAttrHiddenSize << ";\n";
1070 }
1071 } else { // direction=1
1072 out << SP << SP << SP << "size_t previous_offset = (index + 1) * "
1073 << num_directions * batch_size * fAttrHiddenSize << " + " << batch_size * fAttrHiddenSize << ";\n";
1074 }
1075 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
1076 out << SP << SP << SP << SP << OpName << "_cell_state[i + offset] += "
1077 << OpName << "_forget_gate[i + offset] * " << OpName << "_cell_state[i + previous_offset];\n";
1078 out << SP << SP << SP << "}\n";
1079 out << SP << SP << "}\n";
1080 }
1081
1082 if (!fNP.empty()) {
1083 // Peephole connection for the output gate
1084 if (direction == 0) {
1085 size_t p_offset = 2 * batch_size * fAttrHiddenSize;
1086 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
1087 out << SP << SP << SP << SP << OpName << "_output_gate[i + offset] += tensor_"
1088 << fNP << "[i + " << p_offset << "] * " << OpName << "_cell_state[i + offset];\n";
1089 out << SP << SP << SP << "}\n";
1090 } else { // direction=1
1091 size_t p_offset = 3 * batch_size * fAttrHiddenSize + 2 * batch_size * fAttrHiddenSize;
1092 out << SP << SP << SP << "for (size_t i = 0; i < " << size << "; i++) {\n";
1093 out << SP << SP << SP << SP << OpName << "_output_gate[i + offset] += tensor_"
1094 << fNP << "[i + " << p_offset << "] * " << OpName << "_cell_state[i + offset];\n";
1095 out << SP << SP << SP << "}\n";
1096 }
1097 }
1098
1099 // Clip the elements of the output gate into the range [-fAttrClip, fAttrClip]
1100 if (fAttrClip > .0) {
1101 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1102 if (fType == "float") {
1103 out << SP << SP << SP << "float x = (" << OpName << "_output_gate[i] > " << -fAttrClip
1104 << ") ? " << OpName << "_output_gate[i] : " << -fAttrClip << ";\n";
1105 }
1106 out << SP << SP << SP << OpName << "_output_gate[i] = (x < " << fAttrClip << ") ? x : "
1107 << fAttrClip << ";\n";
1108 out << SP << SP << "}\n";
1109 }
1110 // Apply the activation function to the output gate
1111 if (fAttrActivations[direction * 3] == "Relu") {
1112 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1113 out << SP << SP << SP << "if (" << OpName << "_output_gate[i] < 0.)\n";
1114 out << SP << SP << SP << SP << OpName << "_output_gate[i] = 0.;\n";
1115 out << SP << SP << "}\n";
1116 } else if (fAttrActivations[direction * 3] == "Tanh") {
1117 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1118 if (fType == "float") {
1119 out << SP << SP << SP << "float ex = exp(-2 * " << OpName << "_output_gate[i]);\n";
1120 }
1121 out << SP << SP << SP << SP << OpName << "_output_gate[i] = (1. - ex) / (1. + ex);\n";
1122 out << SP << SP << "}\n";
1123 } else if (fAttrActivations[direction * 3] == "Sigmoid") {
1124 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1125 out << SP << SP << SP << SP << OpName << "_output_gate[i] = 1. / (1. + exp(-" << OpName
1126 << "_output_gate[i]));\n";
1127 out << SP << SP << "}\n";
1128 } else if (fAttrActivations[direction * 3] == "Affine") {
1129 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1130 out << SP << SP << SP << SP << OpName << "_output_gate[i] = "
1131 << fAttrActivationAlpha[direction * 3] << " * " << OpName << "_output_gate[i] + "
1132 << fAttrActivationBeta[direction * 3] << ";\n";
1133 out << SP << SP << "}\n";
1134 } else if (fAttrActivations[direction * 3] == "ScaledTanh") {
1135 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1136 if (fType == "float") {
1137 out << SP << SP << SP << "float ex = exp(-2 * " << fAttrActivationBeta[direction * 3]
1138 << " * "<< OpName << "_output_gate[i]);\n";
1139 }
1140 out << SP << SP << SP << SP << OpName << "_output_gate[i] = "
1141 << fAttrActivationAlpha[direction * 3] << " * (1. - ex) / (1. + ex);\n";
1142 out << SP << SP << "}\n";
1143 } else if (fAttrActivations[direction * 3] == "HardSigmoid") {
1144 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1145 if (fType == "float") {
1146 out << SP << SP << SP << "float a = " << fAttrActivationAlpha[direction * 3] << " * "
1147 << OpName << "_output_gate[i] + " << fAttrActivationBeta[direction * 3] << ";\n";
1148 out << SP << SP << SP << "float b = (a > 0.) ? a : 0.;\n";
1149 }
1150 out << SP << SP << SP << SP << OpName << "_output_gate[i] = (b < 1.) ? b : 1.;\n";
1151 out << SP << SP << "}\n";
1152 } else if (fAttrActivations[direction * 3] == "LeakyRelu") {
1153 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1154 out << SP << SP << SP << "if (" << OpName << "_output_gate[i] < 0.)\n";
1155 out << SP << SP << SP << SP << OpName << "_output_gate[i] = "
1156 << fAttrActivationAlpha[direction * 3] << " * " << OpName << "_output_gate[i];\n";
1157 out << SP << SP << "}\n";
1158 } else if (fAttrActivations[direction * 3] == "ThresholdRelu") {
1159 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1160 out << SP << SP << SP << "if (" << OpName << "_output_gate[i] < "
1161 << fAttrActivationAlpha[direction * 3] << ")\n";
1162 out << SP << SP << SP << SP << OpName << "_output_gate[i] = 0.;\n";
1163 out << SP << SP << "}";
1164 } else if (fAttrActivations[direction * 3] == "Elu") {
1165 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1166 out << SP << SP << SP << "if (" << OpName << "_output_gate[i] < 0.)\n";
1167 out << SP << SP << SP << SP << OpName << "_output_gate[i] = "
1168 << fAttrActivationAlpha[direction * 3] << " * exp(" << OpName << "_output_gate[i] - 1.);\n";
1169 out << SP << SP << "}\n";
1170 } else if (fAttrActivations[direction * 3] == "Softsign") {
1171 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1172 out << SP << SP << SP << SP << OpName << "_output_gate[i] = " << OpName
1173 << "_output_gate[i] / (1. + abs(" << OpName << "_output_gate[i]));\n";
1174 out << SP << SP << "}\n";
1175 } else { // fAttrActivations[direction * 3] = Softplus
1176 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1177 out << SP << SP << SP << SP << OpName << "_output_gate[i] = log(1. + exp("
1178 << OpName << "_output_gate[i]));\n";
1179 out << SP << SP << "}\n";
1180 }
1181
1182 // copy cell_state into new_cell_state
1183 out << SP << SP << "std::copy(" << OpName << "_cell_state + offset, " << OpName
1184 << "_cell_state + offset + " << size << ", "<< OpName << "_new_cell_state + offset);\n";
1185 // Clip the elements of the new_cell_state into the range [-fAttrClip, fAttrClip]
1186 if (fAttrClip > .0) {
1187 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1188 if (fType == "float") {
1189 out << SP << SP << SP << "float x = (" << OpName << "_new_cell_state[i] > " << -fAttrClip
1190 << ") ? " << OpName << "_new_cell_state[i] : " << -fAttrClip << ";\n";
1191 }
1192 out << SP << SP << SP << OpName << "_new_cell_state[i] = (x < " << fAttrClip << ") ? x : "
1193 << fAttrClip << ";\n";
1194 out << SP << SP << "}\n";
1195 }
1196 // Apply the activation function to the new cell state
1197 if (fAttrActivations[direction * 3 + 2] == "Relu") {
1198 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1199 out << SP << SP << SP << "if (" << OpName << "_new_cell_state[i] < 0.)\n";
1200 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = 0.;\n";
1201 out << SP << SP << "}\n";
1202 } else if (fAttrActivations[direction * 3 + 2] == "Tanh") {
1203 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1204 if (fType == "float") {
1205 out << SP << SP << SP << "float ex = exp(-2 * " << OpName << "_new_cell_state[i]);\n";
1206 }
1207 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = (1. - ex) / (1. + ex);\n";
1208 out << SP << SP << "}\n";
1209 } else if (fAttrActivations[direction * 3 + 2] == "Sigmoid") {
1210 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1211 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = 1. / (1. + exp(-" << OpName
1212 << "_new_cell_state[i]));\n";
1213 out << SP << SP << "}\n";
1214 } else if (fAttrActivations[direction * 3 + 2] == "Affine") {
1215 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1216 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = "
1217 << fAttrActivationAlpha[direction * 3 + 2] << " * " << OpName << "_new_cell_state[i] + "
1218 << fAttrActivationBeta[direction * 3 + 2] << ";\n";
1219 out << SP << SP << "}\n";
1220 } else if (fAttrActivations[direction * 3 + 2] == "ScaledTanh") {
1221 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1222 if (fType == "float") {
1223 out << SP << SP << SP << "float ex = exp(-2 * " << fAttrActivationBeta[direction * 3 + 2]
1224 << " * "<< OpName << "_new_cell_state[i]);\n";
1225 }
1226 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = "
1227 << fAttrActivationAlpha[direction * 3 + 2] << " * (1. - ex) / (1. + ex);\n";
1228 out << SP << SP << "}\n";
1229 } else if (fAttrActivations[direction * 3 + 2] == "HardSigmoid") {
1230 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1231 if (fType == "float") {
1232 out << SP << SP << SP << "float a = " << fAttrActivationAlpha[direction * 3 + 2] << " * "
1233 << OpName << "_new_cell_state[i] + " << fAttrActivationBeta[direction * 3 + 2] << ";\n";
1234 out << SP << SP << SP << "float b = (a > 0.) ? a : 0.;\n";
1235 }
1236 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = (b < 1.) ? b : 1.;\n";
1237 out << SP << SP << "}\n";
1238 } else if (fAttrActivations[direction * 3 + 2] == "LeakyRelu") {
1239 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1240 out << SP << SP << SP << "if (" << OpName << "_new_cell_state[i] < 0.)\n";
1241 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = "
1242 << fAttrActivationAlpha[direction * 3 + 2] << " * " << OpName << "_new_cell_state[i];\n";
1243 out << SP << SP << "}\n";
1244 } else if (fAttrActivations[direction * 3 + 2] == "ThresholdRelu") {
1245 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1246 out << SP << SP << SP << "if (" << OpName << "_new_cell_state[i] < "
1247 << fAttrActivationAlpha[direction * 3 + 2] << ")\n";
1248 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = 0.;\n";
1249 out << SP << SP << "}";
1250 } else if (fAttrActivations[direction * 3 + 2] == "Elu") {
1251 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1252 out << SP << SP << SP << "if (" << OpName << "_new_cell_state[i] < 0.)\n";
1253 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = "
1254 << fAttrActivationAlpha[direction * 3 + 2] << " * exp(" << OpName << "_new_cell_state[i] - 1.);\n";
1255 out << SP << SP << "}\n";
1256 } else if (fAttrActivations[direction * 3 + 2] == "Softsign") {
1257 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1258 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = " << OpName
1259 << "_new_cell_state[i] / (1. + abs(" << OpName << "_new_cell_state[i]));\n";
1260 out << SP << SP << "}\n";
1261 } else { // fAttrActivations[direction * 3 + 2] = Softplus
1262 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1263 out << SP << SP << SP << SP << OpName << "_new_cell_state[i] = log(1. + exp("
1264 << OpName << "_new_cell_state[i]));\n";
1265 out << SP << SP << "}\n";
1266 }
1267
1268 // hidden_state = output_gate o new_cell_state
1269 out << SP << SP << "for (size_t i = offset; i < offset + " << size << "; i++) {\n";
1270 out << SP << SP << SP << OpName << "_hidden_state[i] = " << OpName << "_output_gate[i] * "
1271 << OpName << "_new_cell_state[i];\n";
1272 out << SP << SP << "}\n";
1273 out << SP << "}\n";
1274 }
1275
1276 // Padding the hidden state for LSTM with different sequence lengths
1277 if (!fNSequence_lens.empty()) {
1278 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
1279 out << SP << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1280 out << SP << SP << SP << "if (seq >= tensor_" << fNSequence_lens << "[batch]) {\n";
1281 for (size_t direction = 0; direction < num_directions; direction++) {
1282 out << SP << SP << SP << SP << SP << "for (size_t h = 0; h < " << fAttrHiddenSize << "; h++) {\n";
1283 out << SP << SP << SP << SP << SP << SP << "size_t idx = seq * "
1284 << num_directions * batch_size * fAttrHiddenSize + direction * batch_size * fAttrHiddenSize
1285 << " + batch * " << fAttrHiddenSize << " + h;\n";
1286 out << SP << SP << SP << SP << SP << SP << OpName << "_cell_state[idx] = 0.;\n";
1287 out << SP << SP << SP << SP << SP << SP << OpName << "_hidden_state[idx] = 0.;\n";
1288 out << SP << SP << SP << SP << SP << "}\n";
1289 }
1290 out << SP << SP << SP << "}\n";
1291 out << SP << SP << "}\n";
1292 out << SP << "}\n";
1293 }
1294
1295 // Copy the hidden state into y and y_h and copy cell_state into y_c
1296 if (fAttrLayout == 0) {
1297 if (!fNY_h.empty()) {
1298 // Copy hidden_state into Y_h
1299 if (fNSequence_lens.empty()) {
1300 size_t y_h_size = batch_size * fAttrHiddenSize;
1301 if (fAttrDirection == "backward") {
1302 out << SP << "std::copy(" << OpName << "_hidden_state, " << OpName << "_hidden_state + "
1303 << y_h_size << ", tensor_" << fNY_h << ");\n";
1304 } else {
1305 size_t offset = (seq_length - 1) * num_directions * batch_size * fAttrHiddenSize;
1306 out << SP << "std::copy(" << OpName << "_hidden_state + " << offset << ", " << OpName
1307 << "_hidden_state + " << offset << " + " << y_h_size << ", tensor_" << fNY_h << ");\n";
1308 }
1309 if (num_directions == 2) {
1310 out << SP << "std::copy(" << OpName << "_hidden_state + " << y_h_size << ", " << OpName
1311 << "_hidden_state + " << 2 * y_h_size << ", tensor_" << fNY_h << " + " << y_h_size << ");\n";
1312 }
1313 } else { // LSTM with different sequence lengths
1314 if (fAttrDirection == "backward") {
1315 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1316 out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
1317 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
1318 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + offset);\n";
1319 out << SP << "}\n";
1320 } else {
1321 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1322 out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
1323 out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1324 << " + batch * " << fAttrHiddenSize << ";\n";
1325 out << SP << SP << "size_t y_h_offset = batch * " << fAttrHiddenSize << ";\n";
1326 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
1327 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + y_h_offset);\n";
1328 out << SP << "}\n";
1329 }
1330 if (num_directions == 2) {
1331 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1332 out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize
1333 << " + batch * " << fAttrHiddenSize << ";\n";
1334 out << SP << SP << "size_t y_h_offset = " << batch_size * fAttrHiddenSize
1335 << " + batch * " << fAttrHiddenSize << ";\n";
1336 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
1337 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + y_h_offset);\n";
1338 out << SP << "}\n";
1339 }
1340 }
1341 }
1342 if (!fNY_c.empty()) {
1343 // Copy cell_state into Y_c
1344 if (fNSequence_lens.empty()) {
1345 size_t y_h_size = batch_size * fAttrHiddenSize;
1346 if (fAttrDirection == "backward") {
1347 out << SP << "std::copy(" << OpName << "_cell_state, " << OpName << "_hidden_state + "
1348 << y_h_size << ", tensor_" << fNY_c << ");\n";
1349 } else {
1350 size_t offset = (seq_length - 1) * num_directions * batch_size * fAttrHiddenSize;
1351 out << SP << "std::copy(" << OpName << "_cell_state + " << offset << ", " << OpName
1352 << "_cell_state + " << offset << " + " << y_h_size << ", tensor_" << fNY_c << ");\n";
1353 }
1354 if (num_directions == 2) {
1355 out << SP << "std::copy(" << OpName << "_cell_state + " << y_h_size << ", " << OpName
1356 << "_cell_state + " << 2 * y_h_size << ", tensor_" << fNY_c << " + " << y_h_size << ");\n";
1357 }
1358 } else { // LSTM with different sequence lengths
1359 if (fAttrDirection == "backward") {
1360 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1361 out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
1362 out << SP << SP << "std::copy(" << OpName << "_cell_state + offset, " << OpName
1363 << "_cell_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_c << " + offset);\n";
1364 out << SP << "}\n";
1365 } else {
1366 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1367 out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
1368 out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1369 << " + batch * " << fAttrHiddenSize << ";\n";
1370 out << SP << SP << "size_t y_h_offset = batch * " << fAttrHiddenSize << ";\n";
1371 out << SP << SP << "std::copy(" << OpName << "_cell_state + offset, " << OpName
1372 << "_cell_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_c << " + y_h_offset);\n";
1373 out << SP << "}\n";
1374 }
1375 if (num_directions == 2) {
1376 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1377 out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize
1378 << " + batch * " << fAttrHiddenSize << ";\n";
1379 out << SP << SP << "size_t y_h_offset = " << batch_size * fAttrHiddenSize
1380 << " + batch * " << fAttrHiddenSize << ";\n";
1381 out << SP << SP << "std::copy(" << OpName << "_cell_state + offset, " << OpName
1382 << "_cell_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_c << " + y_h_offset);\n";
1383 out << SP << "}\n";
1384 }
1385 }
1386 }
1387 } else { // fAttrLayout=1
1388 if (!fNY.empty()) {
1389 // Copy hidden_state into Y
1390 for (size_t direction = 0; direction < num_directions; direction++) {
1391 out << SP << "for (size_t seq = 0; seq < " << seq_length << "; seq++) {\n";
1392 out << SP << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1393 out << SP << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1394 << " + " << direction * batch_size * fAttrHiddenSize << " + batch * " << fAttrHiddenSize << ";\n";
1395 out << SP << SP << SP << "size_t y_offset = batch * " << seq_length * num_directions * fAttrHiddenSize
1396 << " + seq * " << num_directions * fAttrHiddenSize << " + " << direction * fAttrHiddenSize << ";\n";
1397 out << SP << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
1398 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY << " + y_offset);\n";
1399 out << SP << SP << "}\n";
1400 out << SP << "}\n";
1401 }
1402 }
1403 if (!fNY_h.empty()) {
1404 // Copy the hidden_state into Y_h
1405 if (fAttrDirection == "backward") {
1406 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1407 out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
1408 out << SP << SP << "size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
1409 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
1410 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + y_h_offset);\n";
1411 out << SP << "}\n";
1412 } else {
1413 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1414 if (fNSequence_lens.empty()) {
1415 out << SP << SP << "size_t seq = " << seq_length - 1 << ";\n";
1416 } else {
1417 out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
1418 }
1419 out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1420 << " + batch * " << fAttrHiddenSize << ";\n";
1421 out << SP << SP << "size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
1422 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
1423 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + y_h_offset);\n";
1424 out << SP << "}\n";
1425 }
1426 if (num_directions == 2) {
1427 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1428 out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize << " + batch * "
1429 << fAttrHiddenSize << ";\n";
1430 out << SP << SP << "size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize << " + "
1431 << fAttrHiddenSize << ";\n";
1432 out << SP << SP << "std::copy(" << OpName << "_hidden_state + offset, " << OpName
1433 << "_hidden_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_h << " + y_h_offset);\n";
1434 out << SP << "}\n";
1435 }
1436 }
1437
1438 if (!fNY_c.empty()) {
1439 // copy the cell_state into Y_c
1440 if (fAttrDirection == "backward") {
1441 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1442 out << SP << SP << "size_t offset = batch * " << fAttrHiddenSize << ";\n";
1443 out << SP << SP << "size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
1444 out << SP << SP << "std::copy(" << OpName << "_cell_state + offset, " << OpName
1445 << "_cell_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_c << " + y_h_offset);\n";
1446 out << SP << "}\n";
1447 } else {
1448 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1449 if (fNSequence_lens.empty()) {
1450 out << SP << SP << "size_t seq = " << seq_length - 1 << ";\n";
1451 } else {
1452 out << SP << SP << "size_t seq = " << "tensor_" << fNSequence_lens << "[batch] - 1;\n";
1453 }
1454 out << SP << SP << "size_t offset = seq * " << num_directions * batch_size * fAttrHiddenSize
1455 << " + batch * " << fAttrHiddenSize << ";\n";
1456 out << SP << SP << "size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize << ";\n";
1457 out << SP << SP << "std::copy(" << OpName << "_cell_state + offset, " << OpName
1458 << "_cell_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_c << " + y_h_offset);\n";
1459 out << SP << "}\n";
1460 }
1461 if (num_directions == 2) {
1462 out << SP << "for (size_t batch = 0; batch < " << batch_size << "; batch++) {\n";
1463 out << SP << SP << "size_t offset = " << batch_size * fAttrHiddenSize << " + batch * "
1464 << fAttrHiddenSize << ";\n";
1465 out << SP << SP << "size_t y_h_offset = batch * " << num_directions * fAttrHiddenSize << " + "
1466 << fAttrHiddenSize << ";\n";
1467 out << SP << SP << "std::copy(" << OpName << "_cell_state + offset, " << OpName
1468 << "_cell_state + offset + " << fAttrHiddenSize << ", tensor_" << fNY_c << " + y_h_offset);\n";
1469 out << SP << "}\n";
1470 }
1471 }
1472 }
1473
1474 return out.str();
1475}
1476
1477} // namespace SOFIE
1478} // namespace Experimental
1479} // namespace TMVA
1480
1481#endif
#define h(i)
Definition RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input)
Infers the shape of the output tensors.
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input)
Infers the type of the output tensors.
std::string GenerateSessionMembersCode(std::string opName)
Generate the code for the Session internal data vectors.
void Initialize(RModel &model)
Initialize the model.
std::string Generate(std::string OpName)
Generate the inference code.
create variable transformations
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345