1#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
2#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION
57 const std::string &nameScale,
const std::string &nameB,
const std::string &nameY,
58 const std::string &nameMean,
const std::string &nameInvStdDev)
65 fInputTensorNames.emplace_back(fNB);
70 fOutputTensorNames.emplace_back(fNMean);
73 fOutputTensorNames.emplace_back(fNInvStdDev);
77 std::vector<std::vector<size_t>>
ShapeInference(std::vector<std::vector<size_t>> input)
override {
return input; }
79 std::vector<ETensorType>
TypeInference(std::vector<ETensorType> input)
override {
return input; }
82 if (!model.CheckIfTensorAlreadyExist(
fNX)) {
83 throw std::runtime_error(
"TMVA::SOFIE - LayerNormalization - Tensor " +
fNX +
" not found.");
85 bool isDynamic = model.IsDynamicTensor(
fNX);
111 model.AddIntermediateTensor(
fNMean, type, std::vector<Dim>(1,
Dim{
fAxesLength,std::size_t(-1)}));
113 model.AddIntermediateTensor(
fNMean, type, std::vector<size_t>(1,std::stoi(
fAxesLength)));
132 if (dimScale <
fSize) {
133 for (
size_t i = 0; i <
fSize-dimScale; i++)
137 for (
size_t i = 0; i <
fSize; i++) {
146 for (
size_t i = 0; i <
fSize-dimB; i++)
149 for (
size_t i = 0; i <
fSize; i++) {
155 model.AddIntermediateTensor(
fNY, model.GetTensorType(
fNX),
fShapeY);
156 if (model.Verbose()){
162 model.AddNeededStdLib(
"cmath");
167 std::stringstream out;
169 out <<
SP <<
"// Broadcasting the bias of LayerNormalization op\n";
171 out <<
SP <<
SP <<
"float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_";
174 out <<
SP <<
"delete[] data;\n";
182 opName =
"op_" + opName;
184 throw std::runtime_error(
"TMVA::SOFIE LayerNormalization operator " + opName +
185 " called to generate without being initialized first.");
188 std::stringstream out;
190 out <<
"//---- Layer Normalization operator " << opName <<
"\n";
193 std::vector<std::string> inputShape(
fSize);
195 for (
size_t i = 0; i <
fSize; i++) {
196 inputShape[i] =
fShapeX[i].GetVal();
200 std::string inputIndex =
"axis_0 * " + strides[0].GetVal();
201 for (
size_t i = 1; i <
fSize; i++) {
202 inputIndex +=
" + axis_" + std::to_string(i);
203 if (i <
fSize-1) inputIndex +=
" * " + strides[i].GetVal();
206 std::string scaleIndex;
207 for (
size_t i = 0; i <
fSize; i++) {
209 if (!scaleIndex.empty()) scaleIndex +=
" + ";
210 scaleIndex +=
"axis_" + std::to_string(i);
211 if ( scaleStrides[i].dim != 1) scaleIndex +=
" * " + scaleStrides[i].GetVal();
214 if (scaleIndex.empty()) scaleIndex =
"0";
217 std::string biasIndex;
218 for (
size_t i = 0; i <
fSize; i++) {
220 if (!biasIndex.empty()) biasIndex +=
" + ";
221 biasIndex +=
"axis_" + std::to_string(i);
222 if ( biasStrides[i].dim != 1) biasIndex +=
" * " + biasStrides[i].GetVal();
225 if (biasIndex.empty()) biasIndex =
"0";
228 std::string axesIndex =
"axis_" + std::to_string(0) +
" * " + axesStrides[0].GetVal();
229 for (
size_t i = 1; i <
fAxis; i++) {
230 axesIndex +=
" + axis_" + std::to_string(i) +
" * " + axesStrides[i].GetVal();
236 out <<
SP <<
"// Compute the mean\n";
239 for (
size_t i = 0; i <
fAxis; i++) {
240 std::string iIdx =
"axis_" + std::to_string(i);
241 out <<
SP <<
"for (size_t " << iIdx <<
" = 0; " << iIdx <<
" < " << inputShape[i]
242 <<
"; " << iIdx <<
"++) {\n";
244 out <<
SP <<
SP <<
fType <<
" mean = 0.;\n";
247 std::string jIdx =
"axis_" + std::to_string(j);
248 out <<
SP <<
SP <<
"for (size_t " << jIdx <<
" = 0; " << jIdx <<
" < " << inputShape[j]
249 <<
"; " << jIdx <<
"++) {\n";
251 out <<
SP <<
SP <<
SP <<
"mean += tensor_" <<
fNX <<
"[" << inputIndex <<
"];\n";
253 out <<
SP <<
SP <<
"}\n";
258 out <<
SP <<
"// Compute the inverse Standard Deviation\n";
261 out <<
SP <<
SP <<
fType <<
" sum = 0.;\n";
264 std::string jIdx =
"axis_" + std::to_string(j);
265 out <<
SP <<
SP <<
"for (size_t " << jIdx <<
" = 0; " << jIdx <<
" < " << inputShape[j]
266 <<
"; " << jIdx <<
"++){\n";
268 out <<
SP <<
SP <<
SP <<
"float tmp = tensor_" <<
fNX <<
"[" << inputIndex <<
"] - mean;\n";
269 out <<
SP <<
SP <<
SP <<
"sum += tmp*tmp;\n";
271 out <<
SP <<
SP <<
"}\n";
273 out <<
SP <<
SP <<
fType <<
" invStdDev = 1 / std::sqrt(";
279 out <<
SP <<
SP <<
"tensor_" <<
fNMean <<
"[" << axesIndex <<
"] = mean;\n";
281 out <<
SP <<
SP <<
"tensor_" <<
fNInvStdDev <<
"[" << axesIndex <<
"] = invStdDev;\n";
285 out <<
SP <<
"// Y = Scale o InvStdDev (X - Mean)\n";
288 std::string jIdx =
"axis_" + std::to_string(j);
289 out <<
SP <<
SP <<
"for (size_t " << jIdx <<
" = 0; " << jIdx <<
" < " << inputShape[j] <<
"; " << jIdx
292 out <<
SP <<
SP <<
SP <<
"tensor_" <<
fNY <<
"[" << inputIndex <<
"] = tensor_" <<
fNScale;
293 out <<
"[" << scaleIndex <<
"] * invStdDev * (tensor_" <<
fNX <<
"[" << inputIndex <<
"] - mean)";
298 out <<
" + tensor_" <<
fNB <<
"[" << biasIndex <<
"]";
303 out <<
SP <<
SP <<
"}\n";
306 for (
size_t i = 0; i <
fAxis; i++) {
313 std::vector<std::string>
GetBlasRoutines()
override {
return { std::string(
"Axpy") }; }
315 std::vector<std::string>
GetStdLibs()
override {
return { std::string(
"cmath") }; }
std::vector< std::string > GetBlasRoutines() override
std::vector< Dim > fShapeX
ROperator_LayerNormalization()
std::vector< Dim > fShapeInvStdDev
ROperator_LayerNormalization(int axis, float epsilon, size_t stashType, const std::string &nameX, const std::string &nameScale, const std::string &nameB, const std::string &nameY, const std::string &nameMean, const std::string &nameInvStdDev)
std::vector< Dim > fShapeB
std::string fNormalizedLength
std::string Generate(std::string opName) override
std::vector< std::string > GetStdLibs() override
std::string GenerateInitCode() override
std::vector< Dim > fNormalizedShape
std::vector< Dim > fShapeScale
void Initialize(RModel &model) override
std::vector< Dim > fShapeY
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > > input) override
std::string fNBroadcastedB
std::vector< ETensorType > TypeInference(std::vector< ETensorType > input) override
std::vector< Dim > fShapeMean
std::string fNNormalizedX
std::vector< Dim > fAxesShape
std::vector< std::string_view > fInputTensorNames
const std::string SP
space used to correctly indent the generated C++ code
std::vector< std::string_view > fOutputTensorNames
std::vector< size_t > ComputeStrideFromShape(const std::vector< size_t > &shape)
compute stride of a tensor given its shape (assume layout is row-major)
std::string ConvertDimShapeToString(const std::vector< Dim > &shape)
std::string ConvertTypeToString(ETensorType type)
std::string ConvertDimShapeToLength(const std::vector< Dim > &shape)
create variable transformations