Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RFieldUtils.cxx
Go to the documentation of this file.
1/// \file RFieldUtils.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jonas Hahnfeld <jonas.hahnfeld@cern.ch>
4/// \date 2024-11-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
9
10#include <ROOT/RField.hxx>
11#include <ROOT/RLogger.hxx>
12#include <ROOT/RNTupleUtil.hxx>
13
14#include <TClass.h>
15#include <TClassEdit.h>
16#include <TDictAttributeMap.h>
17
18#include <algorithm>
19#include <charconv>
20#include <limits>
21#include <string>
22#include <string_view>
23#include <system_error>
24#include <unordered_map>
25#include <utility>
26#include <vector>
27
28namespace {
29
30const std::unordered_map<std::string_view, std::string_view> typeTranslationMap{
31 {"Bool_t", "bool"},
32 {"Float_t", "float"},
33 {"Double_t", "double"},
34 {"string", "std::string"},
35
36 {"byte", "std::byte"},
37 {"Char_t", "char"},
38 {"int8_t", "std::int8_t"},
39 {"UChar_t", "unsigned char"},
40 {"uint8_t", "std::uint8_t"},
41
42 {"Short_t", "short"},
43 {"int16_t", "std::int16_t"},
44 {"UShort_t", "unsigned short"},
45 {"uint16_t", "std::uint16_t"},
46
47 {"Int_t", "int"},
48 {"int32_t", "std::int32_t"},
49 {"UInt_t", "unsigned int"},
50 {"unsigned", "unsigned int"},
51 {"uint32_t", "std::uint32_t"},
52
53 // Long_t and ULong_t follow the platform's size of long and unsigned long: They are 64 bit on 64-bit Linux and
54 // macOS, but 32 bit on 32-bit platforms and Windows (regardless of pointer size).
55 {"Long_t", "long"},
56 {"ULong_t", "unsigned long"},
57
58 {"Long64_t", "long long"},
59 {"int64_t", "std::int64_t"},
60 {"ULong64_t", "unsigned long long"},
61 {"uint64_t", "std::uint64_t"}};
62
63// Recursively normalizes a template argument using the regular type name normalizer F as a helper.
64template <typename F>
65std::string GetNormalizedTemplateArg(const std::string &arg, F fnTypeNormalizer)
66{
67 R__ASSERT(!arg.empty());
68
69 if (std::isdigit(arg[0]) || arg[0] == '-') {
70 // Integer template argument
72 }
73
74 std::string qualifier;
75 // Type name template argument; template arguments must keep their CV qualifier
76 if (arg.substr(0, 6) == "const " || (arg.length() > 14 && arg.substr(9, 6) == "const "))
77 qualifier += "const ";
78 if (arg.substr(0, 9) == "volatile " || (arg.length() > 14 && arg.substr(6, 9) == "volatile "))
79 qualifier += "volatile ";
80 return qualifier + fnTypeNormalizer(arg);
81}
82
83std::pair<std::string, std::string> SplitTypePrefixFromTemplateArgs(const std::string &typeName)
84{
85 auto idxOpen = typeName.find_first_of("<");
86 if (idxOpen == std::string::npos)
87 return {typeName, ""};
88
89 R__ASSERT(idxOpen > 0);
90 R__ASSERT(typeName.back() == '>');
91 R__ASSERT((typeName.size() - 1) > idxOpen);
92
93 return {typeName.substr(0, idxOpen), typeName.substr(idxOpen + 1, typeName.size() - idxOpen - 2)};
94}
95
96} // namespace
97
98std::string ROOT::Experimental::Internal::GetCanonicalTypePrefix(const std::string &typeName)
99{
100 std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
101 if (canonicalType.substr(0, 7) == "struct ") {
102 canonicalType.erase(0, 7);
103 } else if (canonicalType.substr(0, 5) == "enum ") {
104 canonicalType.erase(0, 5);
105 } else if (canonicalType.substr(0, 2) == "::") {
106 canonicalType.erase(0, 2);
107 }
108
109 // TClassEdit::CleanType inserts blanks between closing angle brackets, as they were required before C++11. We want
110 // to remove them for RNTuple.
111 auto angle = canonicalType.find('<');
112 if (angle != std::string::npos) {
113 auto dst = canonicalType.begin() + angle;
114 auto end = canonicalType.end();
115 for (auto src = dst; src != end; ++src) {
116 if (*src == ' ') {
117 auto next = src + 1;
118 if (next != end && *next == '>') {
119 // Skip this space before a closing angle bracket.
120 continue;
121 }
122 }
123 *(dst++) = *src;
124 }
125 canonicalType.erase(dst, end);
126 }
127
128 if (canonicalType.substr(0, 6) == "array<") {
129 canonicalType = "std::" + canonicalType;
130 } else if (canonicalType.substr(0, 7) == "atomic<") {
131 canonicalType = "std::" + canonicalType;
132 } else if (canonicalType.substr(0, 7) == "bitset<") {
133 canonicalType = "std::" + canonicalType;
134 } else if (canonicalType.substr(0, 4) == "map<") {
135 canonicalType = "std::" + canonicalType;
136 } else if (canonicalType.substr(0, 9) == "multimap<") {
137 canonicalType = "std::" + canonicalType;
138 } else if (canonicalType.substr(0, 9) == "multiset<") {
139 canonicalType = "std::" + canonicalType;
140 }
141 if (canonicalType.substr(0, 5) == "pair<") {
142 canonicalType = "std::" + canonicalType;
143 } else if (canonicalType.substr(0, 4) == "set<") {
144 canonicalType = "std::" + canonicalType;
145 } else if (canonicalType.substr(0, 6) == "tuple<") {
146 canonicalType = "std::" + canonicalType;
147 } else if (canonicalType.substr(0, 11) == "unique_ptr<") {
148 canonicalType = "std::" + canonicalType;
149 } else if (canonicalType.substr(0, 14) == "unordered_map<") {
150 canonicalType = "std::" + canonicalType;
151 } else if (canonicalType.substr(0, 19) == "unordered_multimap<") {
152 canonicalType = "std::" + canonicalType;
153 } else if (canonicalType.substr(0, 19) == "unordered_multiset<") {
154 canonicalType = "std::" + canonicalType;
155 } else if (canonicalType.substr(0, 14) == "unordered_set<") {
156 canonicalType = "std::" + canonicalType;
157 } else if (canonicalType.substr(0, 8) == "variant<") {
158 canonicalType = "std::" + canonicalType;
159 } else if (canonicalType.substr(0, 7) == "vector<") {
160 canonicalType = "std::" + canonicalType;
161 } else if (canonicalType.substr(0, 11) == "ROOT::RVec<") {
162 canonicalType = "ROOT::VecOps::RVec<" + canonicalType.substr(11);
163 }
164
165 if (auto it = typeTranslationMap.find(canonicalType); it != typeTranslationMap.end()) {
166 canonicalType = it->second;
167 }
168
169 // Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
170 if (canonicalType == "signed char") {
172 } else if (canonicalType == "unsigned char") {
174 } else if (canonicalType == "short" || canonicalType == "short int" || canonicalType == "signed short" ||
175 canonicalType == "signed short int") {
177 } else if (canonicalType == "unsigned short" || canonicalType == "unsigned short int") {
179 } else if (canonicalType == "int" || canonicalType == "signed" || canonicalType == "signed int") {
181 } else if (canonicalType == "unsigned" || canonicalType == "unsigned int") {
183 } else if (canonicalType == "long" || canonicalType == "long int" || canonicalType == "signed long" ||
184 canonicalType == "signed long int") {
186 } else if (canonicalType == "unsigned long" || canonicalType == "unsigned long int") {
188 } else if (canonicalType == "long long" || canonicalType == "long long int" || canonicalType == "signed long long" ||
189 canonicalType == "signed long long int") {
191 } else if (canonicalType == "unsigned long long" || canonicalType == "unsigned long long int") {
193 }
194
195 return canonicalType;
196}
197
199{
201 // RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
202 // (also in template parameters)
203 if (normName == "Double32_t")
204 return "double";
205
206 const auto [typePrefix, argList] = SplitTypePrefixFromTemplateArgs(normName);
207 if (argList.empty())
208 return typePrefix;
209
210 auto templateArgs = TokenizeTypeList(argList);
211 R__ASSERT(!templateArgs.empty());
212
213 normName = typePrefix + "<";
214 for (const auto &a : templateArgs) {
216 }
217 normName[normName.size() - 1] = '>';
218
219 return normName;
220}
221
223{
226 std::string normName{origName};
228 splitname.ShortType(normName, modType);
230
231 const auto [typePrefix, argList] = SplitTypePrefixFromTemplateArgs(normName);
232 if (argList.empty())
233 return normName;
234
235 auto templateArgs = TokenizeTypeList(argList);
236 R__ASSERT(!templateArgs.empty());
237
238 // Get default-initialized template arguments; we only need to do this for user-defined class types
239 auto expandedName = normName;
240 if ((expandedName.substr(0, 5) != "std::") && (expandedName.substr(0, 19) != "ROOT::VecOps::RVec<")) {
241 auto cl = TClass::GetClass(origName.c_str());
242 if (cl)
243 expandedName = cl->GetName();
244 }
247
248 normName = typePrefix + "<";
249 for (const auto &a : templateArgs) {
251 }
252 for (std::size_t i = templateArgs.size(); i < expandedTemplateArgs.size(); ++i) {
254 }
255 normName[normName.size() - 1] = '>';
256
257 return normName;
258}
259
261{
262 return std::to_string(val);
263}
264
265std::string ROOT::Experimental::Internal::GetNormalizedInteger(unsigned long long val)
266{
267 if (val > std::numeric_limits<std::int64_t>::max())
268 return std::to_string(val) + "u";
269 return std::to_string(val);
270}
271
279
281{
282 std::size_t nChars = 0;
283 long long res = std::stoll(intToken, &nChars);
284 if (nChars == intToken.size())
285 return res;
286
287 assert(nChars < intToken.size());
288 if (nChars == 0) {
289 throw RException(R__FAIL("invalid integer type token: " + intToken));
290 }
291
292 auto suffix = intToken.substr(nChars);
293 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
294 if (suffix == "L" || suffix == "LL")
295 return res;
296 if (res >= 0 && (suffix == "U" || suffix == "UL" || suffix == "ULL"))
297 return res;
298
299 throw RException(R__FAIL("invalid integer type token: " + intToken));
300}
301
303{
304 std::size_t nChars = 0;
305 unsigned long long res = std::stoull(uintToken, &nChars);
306 if (nChars == uintToken.size())
307 return res;
308
309 assert(nChars < uintToken.size());
310 if (nChars == 0) {
311 throw RException(R__FAIL("invalid integer type token: " + uintToken));
312 }
313
314 auto suffix = uintToken.substr(nChars);
315 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
316 if (suffix == "U" || suffix == "L" || suffix == "LL" || suffix == "UL" || suffix == "ULL")
317 return res;
318
319 throw RException(R__FAIL("invalid integer type token: " + uintToken));
320}
321
324{
325 auto am = cl->GetAttributeMap();
326 if (!am || !am->HasKey("rntuple.streamerMode"))
327 return ERNTupleSerializationMode::kUnset;
328
329 std::string value = am->GetPropertyAsString("rntuple.streamerMode");
330 std::transform(value.begin(), value.end(), value.begin(), ::toupper);
331 if (value == "TRUE") {
332 return ERNTupleSerializationMode::kForceStreamerMode;
333 } else if (value == "FALSE") {
334 return ERNTupleSerializationMode::kForceNativeMode;
335 } else {
336 R__LOG_WARNING(ROOT::Internal::NTupleLog()) << "invalid setting for 'rntuple.streamerMode' class attribute: "
337 << am->GetPropertyAsString("rntuple.streamerMode");
338 return ERNTupleSerializationMode::kUnset;
339 }
340}
341
342std::tuple<std::string, std::vector<std::size_t>>
344{
345 std::vector<std::size_t> sizeVec;
346
347 // Only parse outer array definition, i.e. the right `]` should be at the end of the type name
348 std::string prefix{typeName};
349 while (prefix.back() == ']') {
350 auto posRBrace = prefix.size() - 1;
351 auto posLBrace = prefix.find_last_of('[', posRBrace);
352 if (posLBrace == std::string_view::npos) {
353 throw RException(R__FAIL(std::string("invalid array type: ") + typeName));
354 }
355
356 const std::size_t size = ParseUIntTypeToken(prefix.substr(posLBrace + 1, posRBrace - posLBrace - 1));
357 if (size == 0) {
358 throw RException(R__FAIL(std::string("invalid array size: ") + typeName));
359 }
360
361 sizeVec.insert(sizeVec.begin(), size);
362 prefix.resize(posLBrace);
363 }
364 return std::make_tuple(prefix, sizeVec);
365}
366
367std::vector<std::string> ROOT::Experimental::Internal::TokenizeTypeList(std::string_view templateType)
368{
369 std::vector<std::string> result;
370 if (templateType.empty())
371 return result;
372
373 const char *eol = templateType.data() + templateType.length();
374 const char *typeBegin = templateType.data();
375 const char *typeCursor = templateType.data();
376 unsigned int nestingLevel = 0;
377 while (typeCursor != eol) {
378 switch (*typeCursor) {
379 case '<': ++nestingLevel; break;
380 case '>': --nestingLevel; break;
381 case ',':
382 if (nestingLevel == 0) {
383 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
384 typeBegin = typeCursor + 1;
385 }
386 break;
387 }
388 typeCursor++;
389 }
390 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
391 return result;
392}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint angle
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
static std::string TypeName()
Definition RField.hxx:289
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
const_iterator begin() const
const_iterator end() const
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3069
TDictAttributeMap * GetAttributeMap() const
std::vector< std::string > TokenizeTypeList(std::string_view templateType)
Used in RFieldBase::Create() in order to get the comma-separated list of template types E....
std::string GetCanonicalTypePrefix(const std::string &typeName)
Applies RNTuple specific type name normalization rules (see specs) that help the string parsing in RF...
ERNTupleSerializationMode
Possible settings for the "rntuple.streamerMode" class attribute in the dictionary.
std::string GetNormalizedInteger(const std::string &intTemplateArg)
Appends 'll' or 'ull' to the where necessary and strips the suffix if not needed.
std::tuple< std::string, std::vector< std::size_t > > ParseArrayType(const std::string &typeName)
Parse a type name of the form T[n][m]... and return the base type T and a vector that contains,...
ERNTupleSerializationMode GetRNTupleSerializationMode(TClass *cl)
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::string GetNormalizedUnresolvedTypeName(const std::string &origName)
Applies all RNTuple type normalization rules except typedef resolution.
unsigned long long ParseUIntTypeToken(const std::string &uintToken)
long long ParseIntTypeToken(const std::string &intToken)
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
std::string CleanType(const char *typeDesc, int mode=0, const char **tail=nullptr)
Cleanup type description, redundant blanks removed and redundant tail ignored return *tail = pointer ...
@ kDropComparator
Definition TClassEdit.h:83
@ kDropStlDefault
Definition TClassEdit.h:82