Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RSofieReader.hxx
Go to the documentation of this file.
1/**********************************************************************************
2 * Project: ROOT - a Root-integrated toolkit for multivariate data analysis *
3 * Package: TMVA * *
4 * *
5 * Description: *
6 * *
7 * Authors: *
8 * Lorenzo Moneta *
9 * *
10 * Copyright (c) 2022: *
11 * CERN, Switzerland *
12 * *
13 **********************************************************************************/
14
15
16#ifndef TMVA_RSOFIEREADER
17#define TMVA_RSOFIEREADER
18
19
20#include <string>
21#include <vector>
22#include <memory> // std::unique_ptr
23#include <sstream> // std::stringstream
24#include <iostream>
25#include "TROOT.h"
26#include "TSystem.h"
27#include "TError.h"
28#include "TInterpreter.h"
29#include "TUUID.h"
30#include "TMVA/RTensor.hxx"
31#include "Math/Util.h"
32
33namespace TMVA {
34namespace Experimental {
35
36
37
38
39/// TMVA::RSofieReader class for reading external Machine Learning models
40/// in ONNX files, Keras .h5 or .keras files or PyTorch .pt files
41/// and performing the inference using SOFIE
42/// It is reccomended to use ONNX if possible since there is a larger support for
43/// model operators.
44
46
47
48public:
49 /// Dummy constructor which needs model loading afterwards
51 /// Create TMVA model from ONNX file
52 /// print level can be 0 (minimal) 1 with info , 2 with all ONNX parsing info
53 RSofieReader(const std::string &path, std::vector<std::vector<size_t>> inputShapes = {}, int verbose = 0)
54 {
55 Load(path, inputShapes, verbose);
56 }
57
58 void Load(const std::string &path, std::vector<std::vector<size_t>> inputShapes = {}, int verbose = 0)
59 {
60
61 enum EModelType {kONNX, kKeras, kPt, kROOT, kNotDef}; // type of model
63
64 size_t pos2 = std::string::npos;
65 if ( (pos2 = path.find(".onnx")) != std::string::npos) {
66 if (verbose) std::cout << "input model type is ONNX" << std::endl;
67 type = kONNX;
68 } else if ( (pos2 = path.find(".h5")) != std::string::npos || (pos2 = path.find(".keras")) != std::string::npos) {
69 if (verbose) std::cout << "input model type is Keras" << std::endl;
70 type = kKeras;
71 } else if ( (pos2 = path.find(".pt")) != std::string::npos) {
72 if (verbose) std::cout << "input model type is PyTorch" << std::endl;
73 type = kPt;
74 } else if ( (pos2 = path.find(".root")) != std::string::npos) {
75 if (verbose) std::cout << "input model type is ROOT" << std::endl;
76 type = kROOT;
77 }
78
79 if (type == kNotDef) {
80 throw std::runtime_error("Input file is not an ONNX or Keras or PyTorch file");
81 }
82 auto pos1 = path.rfind("/");
83 if (pos1 == std::string::npos)
84 pos1 = 0;
85 else
86 pos1 += 1;
87 std::string modelName = path.substr(pos1,pos2-pos1);
88 std::string fileType = path.substr(pos2+1, path.length()-pos2-1);
89 if (verbose) std::cout << "Parsing SOFIE model " << modelName << " of type " << fileType << std::endl;
90
91 // append a suffix to headerfile
92 std::string modelHeader = modelName + "_fromRSofieR.hxx";
93 std::string modelWeights = modelName + "_fromRSofieR.dat";
94
95 // create code for parsing model and generate C++ code for inference
96 // make it in a separate scope to avoid polluting global interpreter space
97 std::string parserCode;
98 std::string parserPythonCode; // for Python parsers
99 if (type == kONNX) {
100 // check first if we can load the SOFIE parser library
101 if (gSystem->Load("libROOTTMVASofieParser") < 0) {
102 throw std::runtime_error("RSofieReader: cannot use SOFIE with ONNX since libROOTTMVASofieParser is missing");
103 }
104 gInterpreter->Declare("#include \"TMVA/RModelParser_ONNX.hxx\"");
105 parserCode += "{\nTMVA::Experimental::SOFIE::RModelParser_ONNX parser ; \n";
106 if (verbose == 2)
107 parserCode += "TMVA::Experimental::SOFIE::RModel model = parser.Parse(\"" + path + "\",true); \n";
108 else
109 parserCode += "TMVA::Experimental::SOFIE::RModel model = parser.Parse(\"" + path + "\"); \n";
110 }
111 else if (type == kKeras) {
112 // use Keras Python parser
113 parserPythonCode += "\"\"\"\n";
114 parserPythonCode += "import ROOT\n";
115
116 // assume batch size is first entry in first input otherwise set to 1
117 std::string batch_size = "1"; // need to fix parser with parm batch sizes
118 if (!inputShapes.empty() && ! inputShapes[0].empty())
119 batch_size = std::to_string(inputShapes[0][0]);
120 parserPythonCode += "model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse('" + path + "'," + batch_size + ")\n";
121 }
122 else if (type == kPt) {
123 // use PyTorch direct parser
124 if (gSystem->Load("libROOTTMVASofiePyParsers") < 0) {
125 throw std::runtime_error("RSofieReader: cannot use SOFIE with PyTorch since libROOTTMVASofiePyParsers is missing");
126 }
127 if (inputShapes.size() == 0) {
128 throw std::runtime_error("RSofieReader: cannot use SOFIE with PyTorch since the input tensor shape is missing and is needed by the PyTorch parser");
129 }
130 std::string inputShapesStr = "{";
131 for (unsigned int i = 0; i < inputShapes.size(); i++) {
132 inputShapesStr += "{ ";
133 for (unsigned int j = 0; j < inputShapes[i].size(); j++) {
135 if (j < inputShapes[i].size()-1) inputShapesStr += ", ";
136 }
137 inputShapesStr += "}";
138 if (i < inputShapes.size()-1) inputShapesStr += ", ";
139 }
140 inputShapesStr += "}";
141 parserCode += "{\nTMVA::Experimental::SOFIE::RModel model = TMVA::Experimental::SOFIE::PyTorch::Parse(\"" + path + "\", "
142 + inputShapesStr + "); \n";
143 }
144 else if (type == kROOT) {
145 // use parser from ROOT
146 parserCode += "{\nauto fileRead = TFile::Open(\"" + path + "\",\"READ\");\n";
147 parserCode += "TMVA::Experimental::SOFIE::RModel * modelPtr;\n";
148 parserCode += "auto keyList = fileRead->GetListOfKeys(); TString name;\n";
149 parserCode += "for (const auto&& k : *keyList) { \n";
150 parserCode += " TString cname = ((TKey*)k)->GetClassName(); if (cname==\"TMVA::Experimental::SOFIE::RModel\") name = k->GetName(); }\n";
151 parserCode += "fileRead->GetObject(name,modelPtr); fileRead->Close(); delete fileRead;\n";
152 parserCode += "TMVA::Experimental::SOFIE::RModel & model = *modelPtr;\n";
153 }
154
155 // add custom operators if needed
156 if (fCustomOperators.size() > 0) {
157 if (!parserPythonCode.empty())
158 throw std::runtime_error("Cannot use Custom operator with a Python parser (e.g. from a Keras model)");
159
160 for (auto & op : fCustomOperators) {
161 parserCode += "{ auto p = new TMVA::Experimental::SOFIE::ROperator_Custom<float>(\""
162 + op.fOpName + "\"," + op.fInputNames + "," + op.fOutputNames + "," + op.fOutputShapes + ",\"" + op.fFileName + "\");\n";
163 parserCode += "std::unique_ptr<TMVA::Experimental::SOFIE::ROperator> op(p);\n";
164 parserCode += "model.AddOperator(std::move(op));\n}\n";
165 }
166 }
167
168 int batchSize = 1;
169 if (inputShapes.size() > 0 && inputShapes[0].size() > 0) {
170 batchSize = inputShapes[0][0];
171 if (batchSize < 1) batchSize = 1;
172 }
173 if (verbose) std::cout << "generating the code with batch size = " << batchSize << " ...\n";
174
175 if (parserPythonCode.empty()) {
176 parserCode += "model.Generate(TMVA::Experimental::SOFIE::Options::kDefault,"
177 + ROOT::Math::Util::ToString(batchSize) + ", 0, " + std::to_string(verbose) + ");\n";
178
179 parserCode += "model.OutputGenerated(\"" + modelHeader + "\");\n";
180 if (verbose) {
181 parserCode += "model.PrintRequiredInputTensors();\n";
182 parserCode += "model.PrintIntermediateTensors();\n";
183 parserCode += "model.PrintOutputTensors();\n";
184 if (verbose > 1)
185 parserCode += "model.PrintGenerated(); \n";
186 }
187
188 // need information on number of inputs (assume output is 1)
189 parserCode += "int nInputs = model.GetInputTensorNames().size();\n";
190
191 //end of parsing C++ code
192 parserCode += "return nInputs;\n}\n";
193 } else {
194 // Python case
195 parserPythonCode += "model.Generate(ROOT.TMVA.Experimental.SOFIE.Options.kDefault,"
196 + ROOT::Math::Util::ToString(batchSize) + ", 0, " + std::to_string(verbose) + ")\n";
197
198 parserPythonCode += "model.OutputGenerated('" + modelHeader + "');\n";
199 if (verbose) {
200 parserPythonCode += "model.PrintRequiredInputTensors()\n";
201 parserPythonCode += "model.PrintIntermediateTensors()\n";
202 parserPythonCode += "model.PrintOutputTensors()\n";
203 if (verbose > 1)
204 parserPythonCode += "model.PrintGenerated()\n";
205 }
206 // end of Python parsing code
207 parserPythonCode += "\"\"\"";
208 }
209 // executing parsing and generating code
210 int iret = -1;
211 if (parserPythonCode.empty()) {
212 if (verbose) {
213 std::cout << "...ParserCode being executed...:\n";
214 std::cout << parserCode << std::endl;
215 }
216 iret = gROOT->ProcessLine(parserCode.c_str());
217 fNInputs = iret;
218 } else {
219 if (verbose) {
220 std::cout << "executing python3 -c ......" << std::endl;
221 std::cout << parserPythonCode << std::endl;
222 }
223 iret = gSystem->Exec(TString("python3 -c ") + TString(parserPythonCode.c_str()));
224 fNInputs = 1;
225 // need number of inputs from input shapes
226 if (!inputShapes.empty()) fNInputs = inputShapes.size();
227 }
228
229 if (iret < 0) {
230 std::string msg = "RSofieReader: error processing the parser code: \n" + parserCode;
231 throw std::runtime_error(msg);
232 } else if (verbose) {
233 std::cout << "Model Header file is generated!" << std::endl;
234 }
235 if (fNInputs > 3) {
236 throw std::runtime_error("RSofieReader does not yet support model with > 3 inputs");
237 }
238
239 // compile now the generated code and create Session class
240 if (verbose) std::cout << "compile generated code from file " <<modelHeader << std::endl;
241 if (gSystem->AccessPathName(modelHeader.c_str())) {
242 std::string msg = "RSofieReader: input header file " + modelHeader + " is not existing";
243 throw std::runtime_error(msg);
244 }
245 if (verbose) std::cout << "Creating Inference function for model " << modelName << std::endl;
246 std::string declCode;
247 declCode += "#pragma cling optimize(2)\n";
248 declCode += "#include \"" + modelHeader + "\"\n";
249 // create global session instance: use UUID to have an unique name
250 std::string sessionClassName = "TMVA_SOFIE_" + modelName + "::Session";
251 TUUID uuid;
252 std::string uidName = uuid.AsString();
253 uidName.erase(std::remove_if(uidName.begin(), uidName.end(),
254 []( char const& c ) -> bool { return !std::isalnum(c); } ), uidName.end());
255
256 std::string sessionName = "session_" + uidName;
257 declCode += sessionClassName + " " + sessionName + "(\"" + modelWeights + "\");";
258
259 if (verbose) std::cout << "//global session declaration\n" << declCode << std::endl;
260
261 // need to load the ROOTTMVASOFIE library for some symbols used in generated code
262 iret = gSystem->Load("libROOTTMVASofie");
263 if (iret < 0)
264 throw std::runtime_error("Error loading libROOTTMVASofie library");
265
266 bool ret = gInterpreter->Declare(declCode.c_str());
267 if (!ret) {
268 std::string msg = "RSofieReader: error compiling inference code and creating session class\n" + declCode;
269 throw std::runtime_error(msg);
270 }
271
272 fSessionPtr = (void *) gInterpreter->Calc(sessionName.c_str());
273
274 // define a function to be called for inference
275 std::stringstream ifuncCode;
276 std::string funcName = "SofieInference_" + uidName;
277 ifuncCode << "std::vector<float> " + funcName + "( void * ptr";
278 for (int i = 0; i < fNInputs; i++)
279 ifuncCode << ", float * data" << i;
280 ifuncCode << ") {\n";
281 ifuncCode << " " << sessionClassName << " * s = " << "(" << sessionClassName << "*) (ptr);\n";
282 ifuncCode << " return s->infer(";
283 for (int i = 0; i < fNInputs; i++) {
284 if (i>0) ifuncCode << ",";
285 ifuncCode << "data" << i;
286 }
287 ifuncCode << ");\n";
288 ifuncCode << "}\n";
289
290 if (verbose) std::cout << "//Inference function code using global session instance\n"
291 << ifuncCode.str() << std::endl;
292
293 ret = gInterpreter->Declare(ifuncCode.str().c_str());
294 if (!ret) {
295 std::string msg = "RSofieReader: error compiling inference function\n" + ifuncCode.str();
296 throw std::runtime_error(msg);
297 }
298 fFuncPtr = (void *) gInterpreter->Calc(funcName.c_str());
299 //fFuncPtr = reinterpret_cast<std::vector<float> (*)(void *, const float *)>(fptr);
300 fInitialized = true;
301 }
302
303 // Add custom operator
304 void AddCustomOperator(const std::string &opName, const std::string &inputNames, const std::string & outputNames,
305 const std::string & outputShapes, const std::string & fileName) {
306 if (fInitialized) std::cout << "WARNING: Model is already loaded and initialised. It must be done after adding the custom operators" << std::endl;
308 }
309
310 // implementations for different outputs
311 std::vector<float> DoCompute(const std::vector<float> & x1) {
312 if (fNInputs != 1) {
313 std::string msg = "Wrong number of inputs - model requires " + std::to_string(fNInputs);
314 throw std::runtime_error(msg);
315 }
316 auto fptr = reinterpret_cast<std::vector<float> (*)(void *, const float *)>(fFuncPtr);
317 return fptr(fSessionPtr, x1.data());
318 }
319 std::vector<float> DoCompute(const std::vector<float> & x1, const std::vector<float> & x2) {
320 if (fNInputs != 2) {
321 std::string msg = "Wrong number of inputs - model requires " + std::to_string(fNInputs);
322 throw std::runtime_error(msg);
323 }
324 auto fptr = reinterpret_cast<std::vector<float> (*)(void *, const float *, const float *)>(fFuncPtr);
325 return fptr(fSessionPtr, x1.data(),x2.data());
326 }
327 std::vector<float> DoCompute(const std::vector<float> & x1, const std::vector<float> & x2, const std::vector<float> & x3) {
328 if (fNInputs != 3) {
329 std::string msg = "Wrong number of inputs - model requires " + std::to_string(fNInputs);
330 throw std::runtime_error(msg);
331 }
332 auto fptr = reinterpret_cast<std::vector<float> (*)(void *, const float *, const float *, const float *)>(fFuncPtr);
333 return fptr(fSessionPtr, x1.data(),x2.data(),x3.data());
334 }
335
336 /// Compute model prediction on vector
337 template<typename... T>
338 std::vector<float> Compute(T... x)
339 {
340 if(!fInitialized) {
341 return std::vector<float>();
342 }
343
344 // Take lock to protect model evaluation
346
347 // Evaluate TMVA model (need to add support for multiple outputs)
348 return DoCompute(x...);
349
350 }
351 std::vector<float> Compute(const std::vector<float> &x) {
352 if(!fInitialized) {
353 return std::vector<float>();
354 }
355
356 // Take lock to protect model evaluation
358
359 // Evaluate TMVA model (need to add support for multiple outputs)
360 return DoCompute(x);
361 }
362 /// Compute model prediction on input RTensor
363 /// The shape of the input tensor should be {nevents, nfeatures}
364 /// and the return shape will be {nevents, noutputs}
365 /// support for now only a single input
367 {
368 if(!fInitialized) {
369 return RTensor<float>({0});
370 }
371 const auto nrows = x.GetShape()[0];
372 const auto rowsize = x.GetStrides()[0];
373 auto fptr = reinterpret_cast<std::vector<float> (*)(void *, const float *)>(fFuncPtr);
374 auto result = fptr(fSessionPtr, x.GetData());
375
376 RTensor<float> y({nrows, result.size()}, MemoryLayout::ColumnMajor);
377 std::copy(result.begin(),result.end(), y.GetData());
378 //const bool layout = x.GetMemoryLayout() == MemoryLayout::ColumnMajor ? false : true;
379 // assume column major layout
380 for (size_t i = 1; i < nrows; i++) {
381 result = fptr(fSessionPtr, x.GetData() + i*rowsize);
382 std::copy(result.begin(),result.end(), y.GetData() + i*result.size());
383 }
384 return y;
385 }
386
387private:
388
389 bool fInitialized = false;
390 int fNInputs = 0;
391 void * fSessionPtr = nullptr;
392 void * fFuncPtr = nullptr;
393
394 // data to insert custom operators
396 std::string fFileName; // code implementing the custom operator
397 std::string fOpName; // operator name
398 std::string fInputNames; // input tensor names (convert as string as {"n1", "n2"})
399 std::string fOutputNames; // output tensor names converted as trind
400 std::string fOutputShapes; // output shapes
401 };
402 std::vector<CustomOperatorData> fCustomOperators;
403
404};
405
406} // namespace Experimental
407} // namespace TMVA
408
409#endif // TMVA_RREADER
#define c(i)
Definition RSha256.hxx:101
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
#define gInterpreter
#define gROOT
Definition TROOT.h:414
R__EXTERN TSystem * gSystem
Definition TSystem.h:582
#define R__WRITE_LOCKGUARD(mutex)
const_iterator begin() const
const_iterator end() const
TMVA::RSofieReader class for reading external Machine Learning models in ONNX files,...
RSofieReader(const std::string &path, std::vector< std::vector< size_t > > inputShapes={}, int verbose=0)
Create TMVA model from ONNX file print level can be 0 (minimal) 1 with info , 2 with all ONNX parsing...
RTensor< float > Compute(RTensor< float > &x)
Compute model prediction on input RTensor The shape of the input tensor should be {nevents,...
std::vector< float > Compute(const std::vector< float > &x)
std::vector< float > Compute(T... x)
Compute model prediction on vector.
void Load(const std::string &path, std::vector< std::vector< size_t > > inputShapes={}, int verbose=0)
std::vector< float > DoCompute(const std::vector< float > &x1, const std::vector< float > &x2, const std::vector< float > &x3)
std::vector< CustomOperatorData > fCustomOperators
std::vector< float > DoCompute(const std::vector< float > &x1)
void AddCustomOperator(const std::string &opName, const std::string &inputNames, const std::string &outputNames, const std::string &outputShapes, const std::string &fileName)
std::vector< float > DoCompute(const std::vector< float > &x1, const std::vector< float > &x2)
RSofieReader()
Dummy constructor which needs model loading afterwards.
Basic string class.
Definition TString.h:138
virtual Int_t Exec(const char *shellcmd)
Execute a command.
Definition TSystem.cxx:651
virtual int Load(const char *module, const char *entry="", Bool_t system=kFALSE)
Load a shared library.
Definition TSystem.cxx:1868
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition TSystem.cxx:1307
This class defines a UUID (Universally Unique IDentifier), also known as GUIDs (Globally Unique IDent...
Definition TUUID.h:42
const char * AsString() const
Return UUID as string. Copy string immediately since it will be reused.
Definition TUUID.cxx:570
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
std::string ToString(const T &val)
Utility function for conversion to strings.
Definition Util.h:64
R__EXTERN TVirtualRWMutex * gCoreMutex
create variable transformations