Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_LayerNormalization.hxx
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
3
4#include "TMVA/RModel.hxx"
6
7#include <sstream>
8#include <string>
9
10namespace TMVA {
11namespace Experimental {
12namespace SOFIE {
13
14template <typename T>
16private:
20
21 std::string fNX;
22 std::string fNScale;
23 std::string fNB;
24 std::string fNY;
25 std::string fNMean;
26 std::string fNInvStdDev;
27
28 std::string fNCastedX;
29 std::string fNNormalizedX;
30 std::string fNBroadcastedB;
31
32 std::vector<size_t> fShapeX;
33 std::vector<size_t> fShapeScale;
34 std::vector<size_t> fShapeB;
35 std::vector<size_t> fShapeY;
36 std::vector<size_t> fShapeMean;
37 std::vector<size_t> fShapeInvStdDev;
38
39 size_t fAxis; // axis in [0, size)
40 size_t fSize; // Size of the input
41 // size_t fAxisDim;
42 size_t fLength; // Length of the input X
43
44 std::vector<size_t> fNormalizedShape;
45 std::vector<size_t> fAxesShape;
48
49 std::string fType;
50
51public:
53
54 ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX,
55 const std::string &nameScale, const std::string &nameB, const std::string &nameY,
56 const std::string &nameMean, const std::string &nameInvStdDev)
57 : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)),
58 fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)),
59 fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev))
60 {
61 }
62
63 std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override { return input; }
64
65 std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override { return input; }
66
67 void Initialize(RModel &model) override
68 {
69 if (!model.CheckIfTensorAlreadyExist(fNX)) {
70 throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found.");
71 }
72 fShapeX = model.GetTensorShape(fNX);
75 // Type of the output
77 // Size of the input
78 fSize = fShapeX.size();
79 // Axis in [0, size)
81 // Shape of fShapeX[0, ..., fAxis)
82 fAxesShape = std::vector<size_t>(fShapeX.begin(), fShapeX.begin() + fAxis);
83 // Length of the axes
85 // Shape of fShapeX[fAxis, ..., fSize)
86 fNormalizedShape = std::vector<size_t>(fShapeX.begin() + fAxis, fShapeX.end());
87 // Length of the normalized axis
89 // length of the input
91 // Type of mean and std
93 // Mean
94 if (fNMean.empty()) {
95 fNMean = "Mean" + fNX;
97 }
98 // Inverse Standard Deviation
99 if (fNInvStdDev.empty()) {
100 fNInvStdDev = "InvStdDev" + fNX;
102 }
103 // Cast X to float
104 if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) {
105 fNCastedX = "Casted" + fNX;
107 fNNormalizedX = "Normalized" + fNX;
109 }
110 // Broadcast the bias
111 if (!fNB.empty()) {
112 fShapeB = model.GetTensorShape(fNB);
113 size_t lengthB = ConvertShapeToLength(fShapeB);
114 if (lengthB < fLength) {
115 fNBroadcastedB = "Broadcasted" + fNB;
117 }
118 }
119 }
120
121 std::string GenerateInitCode() override
122 {
123 std::stringstream out;
124 if (!fNBroadcastedB.empty()) {
125 out << SP << "// Broadcasting the bias of LayerNormlization op\n";
126 out << SP << "{\n";
127 out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
128 out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeX) << ");\n";
129 out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
130 out << SP << "delete[] data;\n";
131 out << SP << "}\n";
132 }
133 return out.str();
134 }
135
136 std::string Generate(std::string OpName) override
137 {
138 OpName = "op_" + OpName;
139 if (fShapeX.empty()) {
140 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + OpName +
141 " called to generate without beging initialized first.");
142 }
143 if (fShapeX.size() > 5) {
144 throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not "
145 "implemented for input tensor of size > 5.");
146 }
147
148 std::stringstream out;
149
150 out << SP << "// Operator " << OpName << "\n";
151
152 // Loop over all the normalized axes i.e. [axis, ..., size)
153 out << SP << "std::vector<size_t> " << OpName << "_InputShape ({";
154 for (size_t i = 0; i < fSize; i++) {
155 out << fShapeX[i];
156 if (i + 1 < fSize) {
157 out << ",";
158 }
159 }
160 out << "});\n";
161 std::string inputShape = OpName + "_InputShape";
162
164 std::string InputIndex = "axis_0 * " + std::to_string(strides[0]);
165 for (size_t i = 1; i < fSize; i++) {
166 InputIndex += " + axis_" + std::to_string(i) + " * " + std::to_string(strides[i]);
167 }
168
170 std::string axesIndex = "axis_" + std::to_string(0) + " * " + std::to_string(axesStrides[0]);
171 for (size_t i = 1; i < fAxis; i++) {
172 axesIndex += " + axis_" + std::to_string(i) + " * " + std::to_string(axesStrides[i]);
173 }
174
175 auto normalizedStrides = UTILITY::ComputeStrideFromShape(fNormalizedShape);
176 std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + std::to_string(normalizedStrides[0]);
177 for (size_t i = fAxis + 1; i < fSize; i++) {
178 normalizedIndex += " + axis_" + std::to_string(i) + " * " + std::to_string(normalizedStrides[i - fAxis]);
179 }
180
181 if (!fNCastedX.empty()) {
182 // Cast X to float
183 out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n";
184 out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast<float>(tensor_" << fNX;
185 out << "[i]);\n";
186 out << SP << "}\n";
187 }
188
189 out << SP << "// Compute the mean\n";
190 // Loop over the normalized dimensions
191 for (size_t i = 0; i < fAxis; i++) {
192 std::string iIdx = "axis_" + std::to_string(i);
193 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
194 out << "[" << i << "]; " << iIdx << "++) {\n";
195 }
196 out << SP << SP << fType << " sum = 0.;\n";
197 // loop over all the dims in [0, fAxis)
198 for (size_t j = fAxis; j < fSize; j++) {
199 std::string jIdx = "axis_" + std::to_string(j);
200 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
201 out << "[" << j << "]; " << jIdx << "++) {\n";
202 }
203 out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n";
204 for (size_t j = fAxis; j < fSize; j++) {
205 out << SP << SP << "}\n";
206 }
207 out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "(";
208 out << fNormalizedLength << ");\n";
209 for (size_t i = fAxis; i < fSize; i++) {
210 out << SP << "}\n";
211 }
212
213 out << SP << "// Compute the inverse Standard Deviation\n";
214 // Loop over the normalized dimensions
215 for (size_t i = 0; i < fAxis; i++) {
216 std::string iIdx = "axis_" + std::to_string(i);
217 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
218 out << "[" << i << "]; " << iIdx << "++){\n";
219 }
220 // Set sum = 0
221 out << SP << SP << fType << " sum = 0.;\n";
222 // loop over all the dims in [0, fAxis)
223 for (size_t j = fAxis; j < fSize; j++) {
224 std::string jIdx = "axis_" + std::to_string(j);
225 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
226 out << "[" << j << "]; " << jIdx << "++){\n";
227 }
228 out << SP << SP << SP << "sum += std::pow(tensor_" << fNX << "[" << InputIndex << "] - tensor_";
229 out << fNMean << "[" << axesIndex << "], 2);\n";
230 for (size_t j = fAxis; j < fSize; j++) {
231 out << SP << SP << "}\n";
232 }
233 out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt(";
234 out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n";
235 for (size_t i = 0; i < fAxis; i++) {
236 out << SP << "}\n";
237 }
238
239 if (!fNCastedX.empty()) {
240 out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n";
241 for (size_t i = 0; i < fAxis; i++) {
242 std::string iIdx = "axis_" + std::to_string(i);
243 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
244 out << "[" << i << "]; " << iIdx << "++){\n";
245 }
246 for (size_t j = fAxis; j < fSize; j++) {
247 std::string jIdx = "axis_" + std::to_string(j);
248 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
249 out << "[" << j << "]; " << jIdx << "++){\n";
250 }
251 out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_";
252 out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex;
253 out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n";
254 for (size_t j = fAxis; j < fSize; j++) {
255 out << SP << SP << "}\n";
256 }
257 for (size_t i = fAxis; i < fSize; i++) {
258 out << SP << "}\n";
259 }
260 out << "// Y = Scale o NormalizedX";
261 for (size_t i = 0; i < fAxis; i++) {
262 std::string iIdx = "axis_" + std::to_string(i);
263 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
264 out << "[" << i << "]; " << iIdx << "++){\n";
265 }
266 for (size_t j = fAxis; j < fSize; j++) {
267 std::string jIdx = "axis_" + std::to_string(j);
268 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
269 out << "[" << j << "]; " << jIdx << "++){\n";
270 }
271 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
272 out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex;
273 out << "]);\n";
274 for (size_t j = fAxis; j < fSize; j++) {
275 out << SP << SP << "}\n";
276 }
277 for (size_t i = fAxis; i < fSize; i++) {
278 out << SP << "}\n";
279 }
280 } else {
281 out << SP << "// Y = Scale o InvStdDev (X - Mean)\n";
282 for (size_t i = 0; i < fAxis; i++) {
283 std::string iIdx = "axis_" + std::to_string(i);
284 out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape;
285 out << "[" << i << "]; " << iIdx << "++){\n";
286 }
287 for (size_t j = fAxis; j < fSize; j++) {
288 std::string jIdx = "axis_" + std::to_string(j);
289 out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape;
290 out << "[" << j << "]; " << jIdx << "++){\n";
291 }
292 out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale;
293 out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex;
294 out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "[";
295 out << axesIndex << "]);\n";
296 for (size_t j = fAxis; j < fSize; j++) {
297 out << SP << SP << "}\n";
298 }
299 for (size_t i = fAxis; i < fSize; i++) {
300 out << SP << "}\n";
301 }
302 }
303
304 if (!fNB.empty()) {
305 std::string Bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB);
306 out << SP << "// Add the bias to Y\n";
307 out << SP << "int " << OpName << "_n = " << fLength << ";\n";
308 out << SP << "float " << OpName << "_alpha = 1.;\n";
309 out << SP << "int " << OpName << "_inc = 1;\n";
310 out << SP << "BLAS::saxpy_(&" << OpName << "_n, &" << OpName << "_alpha, " << Bias << ", &";
311 out << OpName << "_inc, " << "tensor_" << fNY << ", &" << OpName << "_inc);\n";
312 }
313
314 return out.str();
315 }
316
317 std::vector<std::string> GetBlasRoutines() override { return { std::string("Axpy") }; }
318
319 std::vector<std::string> GetStdLibs() override { return { std::string("cmath") }; }
320};
321
322} // namespace SOFIE
323} // namespace Experimental
324} // namespace TMVA
325
326#endif
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const ETensorType & GetTensorType(std::string name)
Definition RModel.cxx:76
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape)
Definition RModel.cxx:156
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:97
const std::vector< size_t > & GetTensorShape(std::string name)
Definition RModel.cxx:55
ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
const std::string SP
space used to correctly indent the generated C++ code
Definition ROperator.hxx:41
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertShapeToString(std::vector< size_t > shape)
std::string ConvertTypeToString(ETensorType type)
ETensorType ConvertStringToType(std::string type)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations
double epsilon
Definition triangle.c:618