Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RFieldUtils.cxx
Go to the documentation of this file.
1/// \file RFieldUtils.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jonas Hahnfeld <jonas.hahnfeld@cern.ch>
4/// \date 2024-11-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
9
10#include <ROOT/RField.hxx>
11#include <ROOT/RLogger.hxx>
12#include <ROOT/RNTupleUtil.hxx>
13
14#include <TClass.h>
15#include <TClassEdit.h>
16#include <TDictAttributeMap.h>
17
18#include <algorithm>
19#include <charconv>
20#include <limits>
21#include <string>
22#include <string_view>
23#include <system_error>
24#include <unordered_map>
25#include <utility>
26#include <vector>
27
28namespace {
29
30const std::unordered_map<std::string_view, std::string_view> typeTranslationMap{
31 {"Bool_t", "bool"},
32 {"Float_t", "float"},
33 {"Double_t", "double"},
34 {"string", "std::string"},
35
36 {"byte", "std::byte"},
37 {"Char_t", "char"},
38 {"int8_t", "std::int8_t"},
39 {"UChar_t", "unsigned char"},
40 {"uint8_t", "std::uint8_t"},
41
42 {"Short_t", "short"},
43 {"int16_t", "std::int16_t"},
44 {"UShort_t", "unsigned short"},
45 {"uint16_t", "std::uint16_t"},
46
47 {"Int_t", "int"},
48 {"int32_t", "std::int32_t"},
49 {"UInt_t", "unsigned int"},
50 {"unsigned", "unsigned int"},
51 {"uint32_t", "std::uint32_t"},
52
53 // Long_t and ULong_t follow the platform's size of long and unsigned long: They are 64 bit on 64-bit Linux and
54 // macOS, but 32 bit on 32-bit platforms and Windows (regardless of pointer size).
55 {"Long_t", "long"},
56 {"ULong_t", "unsigned long"},
57
58 {"Long64_t", "long long"},
59 {"int64_t", "std::int64_t"},
60 {"ULong64_t", "unsigned long long"},
61 {"uint64_t", "std::uint64_t"}};
62
63// Recursively normalizes a template argument using the regular type name normalizer F as a helper.
64template <typename F>
65std::string GetNormalizedTemplateArg(const std::string &arg, F fnTypeNormalizer)
66{
67 R__ASSERT(!arg.empty());
68
69 if (std::isdigit(arg[0]) || arg[0] == '-') {
70 // Integer template argument
72 }
73
74 std::string qualifier;
75 // Type name template argument; template arguments must keep their CV qualifier
76 if (arg.substr(0, 6) == "const " || (arg.length() > 14 && arg.substr(9, 6) == "const "))
77 qualifier += "const ";
78 if (arg.substr(0, 9) == "volatile " || (arg.length() > 14 && arg.substr(6, 9) == "volatile "))
79 qualifier += "volatile ";
80 return qualifier + fnTypeNormalizer(arg);
81}
82
83using AnglePos = std::pair<std::string::size_type, std::string::size_type>;
84std::vector<AnglePos> FindTemplateAngleBrackets(const std::string &typeName)
85{
86 std::vector<AnglePos> result;
87 std::string::size_type currentPos = 0;
88 while (currentPos < typeName.size()) {
89 const auto posOpen = typeName.find('<', currentPos);
90 if (posOpen == std::string::npos) {
91 // If there are no more templates, the function is done.
92 break;
93 }
94
95 auto posClose = posOpen + 1;
96 int level = 1;
97 while (posClose < typeName.size()) {
98 const auto c = typeName[posClose];
99 if (c == '<') {
100 level++;
101 } else if (c == '>') {
102 if (level == 1) {
103 break;
104 }
105 level--;
106 }
107 posClose++;
108 }
109 // We should have found a closing angle bracket at the right level.
110 R__ASSERT(posClose < typeName.size());
111 result.emplace_back(posOpen, posClose);
112
113 // If we are not at the end yet, the following two characeters should be :: for nested types.
114 if (posClose < typeName.size() - 1) {
115 R__ASSERT(typeName.substr(posClose + 1, 2) == "::");
116 }
117 currentPos = posClose + 1;
118 }
119
120 return result;
121}
122
123} // namespace
124
125std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
126{
127 std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
128 if (canonicalType.substr(0, 7) == "struct ") {
129 canonicalType.erase(0, 7);
130 } else if (canonicalType.substr(0, 5) == "enum ") {
131 canonicalType.erase(0, 5);
132 } else if (canonicalType.substr(0, 2) == "::") {
133 canonicalType.erase(0, 2);
134 }
135
136 // TClassEdit::CleanType inserts blanks between closing angle brackets, as they were required before C++11. We want
137 // to remove them for RNTuple.
138 auto angle = canonicalType.find('<');
139 if (angle != std::string::npos) {
140 auto dst = canonicalType.begin() + angle;
141 auto end = canonicalType.end();
142 for (auto src = dst; src != end; ++src) {
143 if (*src == ' ') {
144 auto next = src + 1;
145 if (next != end && *next == '>') {
146 // Skip this space before a closing angle bracket.
147 continue;
148 }
149 }
150 *(dst++) = *src;
151 }
152 canonicalType.erase(dst, end);
153 }
154
155 if (canonicalType.substr(0, 6) == "array<") {
156 canonicalType = "std::" + canonicalType;
157 } else if (canonicalType.substr(0, 7) == "atomic<") {
158 canonicalType = "std::" + canonicalType;
159 } else if (canonicalType.substr(0, 7) == "bitset<") {
160 canonicalType = "std::" + canonicalType;
161 } else if (canonicalType.substr(0, 4) == "map<") {
162 canonicalType = "std::" + canonicalType;
163 } else if (canonicalType.substr(0, 9) == "multimap<") {
164 canonicalType = "std::" + canonicalType;
165 } else if (canonicalType.substr(0, 9) == "multiset<") {
166 canonicalType = "std::" + canonicalType;
167 }
168 if (canonicalType.substr(0, 5) == "pair<") {
169 canonicalType = "std::" + canonicalType;
170 } else if (canonicalType.substr(0, 4) == "set<") {
171 canonicalType = "std::" + canonicalType;
172 } else if (canonicalType.substr(0, 6) == "tuple<") {
173 canonicalType = "std::" + canonicalType;
174 } else if (canonicalType.substr(0, 11) == "unique_ptr<") {
175 canonicalType = "std::" + canonicalType;
176 } else if (canonicalType.substr(0, 14) == "unordered_map<") {
177 canonicalType = "std::" + canonicalType;
178 } else if (canonicalType.substr(0, 19) == "unordered_multimap<") {
179 canonicalType = "std::" + canonicalType;
180 } else if (canonicalType.substr(0, 19) == "unordered_multiset<") {
181 canonicalType = "std::" + canonicalType;
182 } else if (canonicalType.substr(0, 14) == "unordered_set<") {
183 canonicalType = "std::" + canonicalType;
184 } else if (canonicalType.substr(0, 8) == "variant<") {
185 canonicalType = "std::" + canonicalType;
186 } else if (canonicalType.substr(0, 7) == "vector<") {
187 canonicalType = "std::" + canonicalType;
188 } else if (canonicalType.substr(0, 11) == "ROOT::RVec<") {
189 canonicalType = "ROOT::VecOps::RVec<" + canonicalType.substr(11);
190 }
191
192 if (auto it = typeTranslationMap.find(canonicalType); it != typeTranslationMap.end()) {
193 canonicalType = it->second;
194 }
195
196 // Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
197 if (canonicalType == "signed char") {
199 } else if (canonicalType == "unsigned char") {
201 } else if (canonicalType == "short" || canonicalType == "short int" || canonicalType == "signed short" ||
202 canonicalType == "signed short int") {
204 } else if (canonicalType == "unsigned short" || canonicalType == "unsigned short int") {
206 } else if (canonicalType == "int" || canonicalType == "signed" || canonicalType == "signed int") {
208 } else if (canonicalType == "unsigned" || canonicalType == "unsigned int") {
210 } else if (canonicalType == "long" || canonicalType == "long int" || canonicalType == "signed long" ||
211 canonicalType == "signed long int") {
213 } else if (canonicalType == "unsigned long" || canonicalType == "unsigned long int") {
215 } else if (canonicalType == "long long" || canonicalType == "long long int" || canonicalType == "signed long long" ||
216 canonicalType == "signed long long int") {
218 } else if (canonicalType == "unsigned long long" || canonicalType == "unsigned long long int") {
220 }
221
222 return canonicalType;
223}
224
226{
228 // RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
229 // (also in template parameters)
230 if (canonicalTypePrefix == "Double32_t")
231 return "double";
232
233 if (canonicalTypePrefix.find('<') == std::string::npos) {
234 // If there are no templates, the function is done.
235 return canonicalTypePrefix;
236 }
237
239 R__ASSERT(!angleBrackets.empty());
240
241 std::string normName;
242 std::string::size_type currentPos = 0;
243 for (std::size_t i = 0; i < angleBrackets.size(); i++) {
244 const auto [posOpen, posClose] = angleBrackets[i];
245 // Append the type prefix until the open angle bracket.
247
248 const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
249 const auto templateArgs = TokenizeTypeList(argList);
250 R__ASSERT(!templateArgs.empty());
251
252 for (const auto &a : templateArgs) {
253 normName += GetNormalizedTemplateArg(a, GetRenormalizedTypeName) + ",";
254 }
255
256 normName[normName.size() - 1] = '>';
257 currentPos = posClose + 1;
258 }
259
260 // Append the rest of the type from the last closing angle bracket.
261 const auto lastClosePos = angleBrackets.back().second;
263
264 return normName;
265}
266
268{
272 std::string canonicalTypePrefix;
275
276 if (canonicalTypePrefix.find('<') == std::string::npos) {
277 // If there are no templates, the function is done.
278 return canonicalTypePrefix;
279 }
280
282 R__ASSERT(!angleBrackets.empty());
283
284 // For user-defined class types, we will need to get the default-initialized template arguments.
285 const bool isUserClass =
286 (canonicalTypePrefix.substr(0, 5) != "std::") && (canonicalTypePrefix.substr(0, 19) != "ROOT::VecOps::RVec<");
287
288 std::string normName;
289 std::string::size_type currentPos = 0;
290 for (std::size_t i = 0; i < angleBrackets.size(); i++) {
291 const auto [posOpen, posClose] = angleBrackets[i];
292 // Append the type prefix until the open angle bracket.
294
295 const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
296 const auto templateArgs = TokenizeTypeList(argList);
297 R__ASSERT(!templateArgs.empty());
298
299 for (const auto &a : templateArgs) {
301 }
302
303 // For user-defined classes, append default-initialized template arguments.
304 if (isUserClass) {
305 const auto cl = TClass::GetClass(canonicalTypePrefix.substr(0, posClose + 1).c_str());
306 if (cl) {
307 const std::string expandedName = cl->GetName();
309 // We can have fewer pairs than angleBrackets, for example in case of type aliases.
311
313 const auto expandedArgList =
317
318 for (std::size_t j = templateArgs.size(); j < expandedTemplateArgs.size(); ++j) {
320 }
321 }
322 }
323
324 normName[normName.size() - 1] = '>';
325 currentPos = posClose + 1;
326 }
327
328 // Append the rest of the type from the last closing angle bracket.
329 const auto lastClosePos = angleBrackets.back().second;
331
332 return normName;
333}
334
335std::string ROOT::Internal::GetNormalizedInteger(long long val)
336{
337 return std::to_string(val);
338}
339
340std::string ROOT::Internal::GetNormalizedInteger(unsigned long long val)
341{
342 if (val > std::numeric_limits<std::int64_t>::max())
343 return std::to_string(val) + "u";
344 return std::to_string(val);
345}
346
354
355long long ROOT::Internal::ParseIntTypeToken(const std::string &intToken)
356{
357 std::size_t nChars = 0;
358 long long res = std::stoll(intToken, &nChars);
359 if (nChars == intToken.size())
360 return res;
361
362 assert(nChars < intToken.size());
363 if (nChars == 0) {
364 throw RException(R__FAIL("invalid integer type token: " + intToken));
365 }
366
367 auto suffix = intToken.substr(nChars);
368 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
369 if (suffix == "L" || suffix == "LL")
370 return res;
371 if (res >= 0 && (suffix == "U" || suffix == "UL" || suffix == "ULL"))
372 return res;
373
374 throw RException(R__FAIL("invalid integer type token: " + intToken));
375}
376
377unsigned long long ROOT::Internal::ParseUIntTypeToken(const std::string &uintToken)
378{
379 std::size_t nChars = 0;
380 unsigned long long res = std::stoull(uintToken, &nChars);
381 if (nChars == uintToken.size())
382 return res;
383
384 assert(nChars < uintToken.size());
385 if (nChars == 0) {
386 throw RException(R__FAIL("invalid integer type token: " + uintToken));
387 }
388
389 auto suffix = uintToken.substr(nChars);
390 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
391 if (suffix == "U" || suffix == "L" || suffix == "LL" || suffix == "UL" || suffix == "ULL")
392 return res;
393
394 throw RException(R__FAIL("invalid integer type token: " + uintToken));
395}
396
398{
399 auto am = cl->GetAttributeMap();
400 if (!am || !am->HasKey("rntuple.streamerMode"))
401 return ERNTupleSerializationMode::kUnset;
402
403 std::string value = am->GetPropertyAsString("rntuple.streamerMode");
404 std::transform(value.begin(), value.end(), value.begin(), ::toupper);
405 if (value == "TRUE") {
406 return ERNTupleSerializationMode::kForceStreamerMode;
407 } else if (value == "FALSE") {
408 return ERNTupleSerializationMode::kForceNativeMode;
409 } else {
410 R__LOG_WARNING(ROOT::Internal::NTupleLog()) << "invalid setting for 'rntuple.streamerMode' class attribute: "
411 << am->GetPropertyAsString("rntuple.streamerMode");
412 return ERNTupleSerializationMode::kUnset;
413 }
414}
415
416std::tuple<std::string, std::vector<std::size_t>> ROOT::Internal::ParseArrayType(const std::string &typeName)
417{
418 std::vector<std::size_t> sizeVec;
419
420 // Only parse outer array definition, i.e. the right `]` should be at the end of the type name
421 std::string prefix{typeName};
422 while (prefix.back() == ']') {
423 auto posRBrace = prefix.size() - 1;
424 auto posLBrace = prefix.rfind('[', posRBrace);
425 if (posLBrace == std::string_view::npos) {
426 throw RException(R__FAIL(std::string("invalid array type: ") + typeName));
427 }
428
429 const std::size_t size = ParseUIntTypeToken(prefix.substr(posLBrace + 1, posRBrace - posLBrace - 1));
430 if (size == 0) {
431 throw RException(R__FAIL(std::string("invalid array size: ") + typeName));
432 }
433
434 sizeVec.insert(sizeVec.begin(), size);
435 prefix.resize(posLBrace);
436 }
437 return std::make_tuple(prefix, sizeVec);
438}
439
440std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templateType)
441{
442 std::vector<std::string> result;
443 if (templateType.empty())
444 return result;
445
446 const char *eol = templateType.data() + templateType.length();
447 const char *typeBegin = templateType.data();
448 const char *typeCursor = templateType.data();
449 unsigned int nestingLevel = 0;
450 while (typeCursor != eol) {
451 switch (*typeCursor) {
452 case '<': ++nestingLevel; break;
453 case '>': --nestingLevel; break;
454 case ',':
455 if (nestingLevel == 0) {
456 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
457 typeBegin = typeCursor + 1;
458 }
459 break;
460 }
461 typeCursor++;
462 }
463 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
464 return result;
465}
466
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint angle
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Classes with dictionaries that can be inspected by TClass.
Definition RField.hxx:286
const_iterator begin() const
const_iterator end() const
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3069
TDictAttributeMap * GetAttributeMap() const
ERNTupleSerializationMode
Possible settings for the "rntuple.streamerMode" class attribute in the dictionary.
std::tuple< std::string, std::vector< std::size_t > > ParseArrayType(const std::string &typeName)
Parse a type name of the form T[n][m]... and return the base type T and a vector that contains,...
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
unsigned long long ParseUIntTypeToken(const std::string &uintToken)
std::string GetNormalizedInteger(const std::string &intTemplateArg)
Appends 'll' or 'ull' to the where necessary and strips the suffix if not needed.
ERNTupleSerializationMode GetRNTupleSerializationMode(TClass *cl)
std::string GetCanonicalTypePrefix(const std::string &typeName)
Applies RNTuple specific type name normalization rules (see specs) that help the string parsing in RF...
std::string GetNormalizedUnresolvedTypeName(const std::string &origName)
Applies all RNTuple type normalization rules except typedef resolution.
std::string GetRenormalizedDemangledTypeName(const std::type_info &ti)
Given a type info ask ROOT meta to demangle it, then renormalize the resulting type name for RNTuple.
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::vector< std::string > TokenizeTypeList(std::string_view templateType)
Used in RFieldBase::Create() in order to get the comma-separated list of template types E....
long long ParseIntTypeToken(const std::string &intToken)
std::string GetDemangledTypeName(const std::type_info &t)
std::string CleanType(const char *typeDesc, int mode=0, const char **tail=nullptr)
Cleanup type description, redundant blanks removed and redundant tail ignored return *tail = pointer ...
@ kDropComparator
Definition TClassEdit.h:83
@ kDropStlDefault
Definition TClassEdit.h:82