Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include "RConfigure.h" // R__USE_IMT
12#include "ROOT/RDataSource.hxx"
15#include "ROOT/RDF/RSample.hxx"
17#include "ROOT/RDF/Utils.hxx"
18#include "ROOT/RLogger.hxx"
19#include "RtypesCore.h"
20#include "TBranch.h"
21#include "TBranchElement.h"
22#include "TClass.h"
23#include "TClassEdit.h"
24#include "TClassRef.h"
25#include "TError.h" // Info
26#include "TInterpreter.h"
27#include "TLeaf.h"
28#include "TROOT.h" // IsImplicitMTEnabled, GetThreadPoolSize
29#include "TTree.h"
30
31#include <fstream>
32#include <mutex>
33#include <nlohmann/json.hpp> // nlohmann::json::parse
34#include <stdexcept>
35#include <string>
36#include <cstring>
37#include <typeinfo>
38#include <cstdint>
39
40using namespace ROOT::Detail::RDF;
41using namespace ROOT::RDF;
42
44{
45 static RLogChannel c("ROOT.RDF");
46 return c;
47}
48
49// A static function, not in an anonymous namespace, because the function name is included in the user-visible message.
50static void WarnHist()
51{
52 R__LOG_WARNING(RDFLogChannel()) << "Filling RHist is experimental and still under development.";
53}
54
56{
57 static std::once_flag once;
58 std::call_once(once, ::WarnHist);
59}
60
61namespace {
62using TypeInfoRef = std::reference_wrapper<const std::type_info>;
63struct TypeInfoRefHash {
64 std::size_t operator()(TypeInfoRef id) const { return id.get().hash_code(); }
65};
66
67struct TypeInfoRefEqualComp {
68 bool operator()(TypeInfoRef left, TypeInfoRef right) const { return left.get() == right.get(); }
69};
70} // namespace
71
72namespace ROOT {
73namespace Internal {
74namespace RDF {
75
76unsigned int &NThreadPerTH3()
77{
78 static unsigned int nThread = 1;
79 return nThread;
80}
81
82/// Return the type_info associated to a name. If the association fails, an
83/// exception is thrown.
84/// References and pointers are not supported since those cannot be stored in
85/// columns.
86const std::type_info &TypeName2TypeID(const std::string &name)
87{
88 // This map includes all relevant C++ fundamental types found at
89 // https://en.cppreference.com/w/cpp/language/types.html and the associated
90 // ROOT portable types when available.
91 const static std::unordered_map<std::string, TypeInfoRef> typeName2TypeIDMap{
92 // Integral types
93 // Standard integer types
94 {"short", typeid(short)},
95 {"short int", typeid(short int)},
96 {"signed short", typeid(signed short)},
97 {"signed short int", typeid(signed short int)},
98 {"unsigned short", typeid(unsigned short)},
99 {"unsigned short int", typeid(unsigned short int)},
100 {"int", typeid(int)},
101 {"signed", typeid(signed)},
102 {"signed int", typeid(signed int)},
103 {"unsigned", typeid(unsigned)},
104 {"unsigned int", typeid(unsigned int)},
105 {"long", typeid(long)},
106 {"long int", typeid(long int)},
107 {"signed long", typeid(signed long)},
108 {"signed long int", typeid(signed long int)},
109 {"unsigned long", typeid(unsigned long)},
110 {"unsigned long int", typeid(unsigned long int)},
111 {"long long", typeid(long long)},
112 {"long long int", typeid(long long int)},
113 {"signed long long", typeid(signed long long)},
114 {"signed long long int", typeid(signed long long int)},
115 {"unsigned long long", typeid(unsigned long long)},
116 {"unsigned long long int", typeid(unsigned long long int)},
117 {"std::size_t", typeid(std::size_t)},
118 // Extended standard integer types
119#ifdef INT8_MAX
120 {"std::int8_t", typeid(std::int8_t)},
121#endif
122#ifdef INT16_MAX
123 {"std::int16_t", typeid(std::int16_t)},
124#endif
125#ifdef INT32_MAX
126 {"std::int32_t", typeid(std::int32_t)},
127#endif
128#ifdef INT64_MAX
129 {"std::int64_t", typeid(std::int64_t)},
130#endif
131#ifdef UINT8_MAX
132 {"std::uint8_t", typeid(std::uint8_t)},
133#endif
134#ifdef UINT16_MAX
135 {"std::uint16_t", typeid(std::uint16_t)},
136#endif
137#ifdef UINT32_MAX
138 {"std::uint32_t", typeid(std::uint32_t)},
139#endif
140#ifdef UINT64_MAX
141 {"std::uint64_t", typeid(std::uint64_t)},
142#endif
143 // ROOT integer types
144 {"Int_t", typeid(Int_t)},
145 {"UInt_t", typeid(UInt_t)},
146 {"Short_t", typeid(Short_t)},
147 {"UShort_t", typeid(UShort_t)},
148 {"Long_t", typeid(Long_t)},
149 {"ULong_t", typeid(ULong_t)},
150 {"Long64_t", typeid(Long64_t)},
151 {"ULong64_t", typeid(ULong64_t)},
152 // Boolean type
153 {"bool", typeid(bool)},
154 {"Bool_t", typeid(bool)},
155 // Character types
156 {"char", typeid(char)},
157 {"Char_t", typeid(char)},
158 {"signed char", typeid(signed char)},
159 {"unsigned char", typeid(unsigned char)},
160 {"UChar_t", typeid(unsigned char)},
161 {"char16_t", typeid(char16_t)},
162 {"char32_t", typeid(char32_t)},
163 // Floating-point types
164 // Standard floating-point types
165 {"float", typeid(float)},
166 {"double", typeid(double)},
167 {"long double", typeid(long double)},
168 // ROOT floating-point types
169 {"Float_t", typeid(float)},
170 {"Double_t", typeid(double)}};
171
172 if (auto it = typeName2TypeIDMap.find(name); it != typeName2TypeIDMap.end())
173 return it->second.get();
174
175 if (auto c = TClass::GetClass(name.c_str()); c && c->GetTypeInfo()) {
176 return *c->GetTypeInfo();
177 }
178
179 // When the type_info cannot be retrieved with TClass, it might be that the interpreter still knows about it. This
180 // happens for example when a class has been declared to the interpreter in the same program where this
181 // RDataFrame is running, but has no dictionary. We attempt to retrieve the type_info via the interpreter before
182 // giving up.
183 std::unique_ptr<TInterpreterValue> v = gInterpreter->MakeInterpreterValue();
184 if (gInterpreter->Evaluate(("typeid(" + name + ')').c_str(), *v)) {
185 auto *typeIdAsVoidPtr = v->GetAsPointer();
186 const std::type_info *ti = reinterpret_cast<const std::type_info *>(typeIdAsVoidPtr);
187 if (ti)
188 return *ti;
189 }
190
191 throw std::runtime_error("Cannot extract type_info of type " + name + ".");
192}
193
194/// Returns the name of a type starting from its type_info
195/// An empty string is returned in case of failure
196/// References and pointers are not supported since those cannot be stored in
197/// columns.
198/// Note that this function will take a lock and may be a potential source of
199/// contention in multithreaded execution.
200std::string TypeID2TypeName(const std::type_info &id)
201{
202 const static std::unordered_map<TypeInfoRef, std::string, TypeInfoRefHash, TypeInfoRefEqualComp> typeID2TypeNameMap{
203 {typeid(char), "char"},
204 {typeid(unsigned char), "unsigned char"},
205 {typeid(signed char), "signed char"},
206 {typeid(int), "int"},
207 {typeid(unsigned int), "unsigned int"},
208 {typeid(short), "short"},
209 {typeid(unsigned short), "unsigned short"},
210 {typeid(long), "long"},
211 {typeid(unsigned long), "unsigned long"},
212 {typeid(double), "double"},
213 {typeid(float), "float"},
214 {typeid(Long64_t), "Long64_t"},
215 {typeid(ULong64_t), "ULong64_t"},
216 {typeid(bool), "bool"}};
217
218 if (auto it = typeID2TypeNameMap.find(id); it != typeID2TypeNameMap.end())
219 return it->second;
220
221 if (auto c = TClass::GetClass(id)) {
222 return c->GetName();
223 }
224
225 return "";
226}
227
228char TypeID2ROOTTypeName(const std::type_info &tid)
229{
230 const static std::unordered_map<TypeInfoRef, char, TypeInfoRefHash, TypeInfoRefEqualComp> typeID2ROOTTypeNameMap{
231 {typeid(char), 'B'}, {typeid(Char_t), 'B'}, {typeid(unsigned char), 'b'}, {typeid(UChar_t), 'b'},
232 {typeid(int), 'I'}, {typeid(Int_t), 'I'}, {typeid(unsigned int), 'i'}, {typeid(UInt_t), 'i'},
233 {typeid(short), 'S'}, {typeid(Short_t), 'S'}, {typeid(unsigned short), 's'}, {typeid(UShort_t), 's'},
234 {typeid(long), 'G'}, {typeid(Long_t), 'G'}, {typeid(unsigned long), 'g'}, {typeid(ULong_t), 'g'},
235 {typeid(long long), 'L'}, {typeid(Long64_t), 'L'}, {typeid(unsigned long long), 'l'}, {typeid(ULong64_t), 'l'},
236 {typeid(float), 'F'}, {typeid(Float_t), 'F'}, {typeid(Double_t), 'D'}, {typeid(double), 'D'},
237 {typeid(bool), 'O'}, {typeid(Bool_t), 'O'}};
238
239 if (auto it = typeID2ROOTTypeNameMap.find(tid); it != typeID2ROOTTypeNameMap.end())
240 return it->second;
241
242 return ' ';
243}
244
245std::string ComposeRVecTypeName(const std::string &valueType)
246{
247 return "ROOT::VecOps::RVec<" + valueType + ">";
248}
249
250std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
251{
252 const char *colTypeCStr = leaf->GetTypeName();
253 std::string colType = colTypeCStr == nullptr ? "" : colTypeCStr;
254 if (colType.empty())
255 throw std::runtime_error("Could not deduce type of leaf " + colName);
256 if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() == 1) {
257 // this is a variable-sized array
259 } else if (leaf->GetLeafCount() == nullptr && leaf->GetLenStatic() > 1) {
260 // this is a fixed-sized array (we do not differentiate between variable- and fixed-sized arrays)
262 } else if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() > 1) {
263 // This case is encountered when a branch is a collection (e.g. std::vector) of a user-defined class which has
264 // a data member that is a fixed-size array. Here, 'leaf' is said data member, and the user could read it
265 // partially as std::vector<std::array<T, N>>. We expose it as ROOT::RVec<std::array<T, N>> for consistency with
266 // other collection types.
267 // WARNING: Currently this considers only the possibility of a 1-dim array, as TLeaf does not expose information
268 // to get all dimension lengths of a multi-dim array in a straightforward way (e.g. with one API call).
269 auto valueType = colType;
270 colType = "ROOT::VecOps::RVec<std::array<" + valueType + ", " + std::to_string(leaf->GetLenStatic()) + ">>";
271 }
272
273 return colType;
274}
275
276/// Return the typename of object colName stored in t, if any. Return an empty string if colName is not in t.
277/// Supported cases:
278/// - leaves corresponding to single values, variable- and fixed-length arrays, with following syntax:
279/// - "leafname", as long as TTree::GetLeaf resolves it
280/// - "b1.b2...leafname", as long as TTree::GetLeaf("b1.b2....", "leafname") resolves it
281/// - TBranchElements, as long as TTree::GetBranch resolves their names
282std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
283{
284 // look for TLeaf either with GetLeaf(colName) or with GetLeaf(branchName, leafName) (splitting on last dot)
285 auto *leaf = t.GetLeaf(colName.c_str());
286 if (!leaf)
287 leaf = t.FindLeaf(colName.c_str()); // try harder
288 if (!leaf) {
289 // try splitting branchname and leafname
290 const auto dotPos = colName.find_last_of('.');
291 const auto hasDot = dotPos != std::string::npos;
292 if (hasDot) {
293 const auto branchName = colName.substr(0, dotPos);
294 const auto leafName = colName.substr(dotPos + 1);
295 leaf = t.GetLeaf(branchName.c_str(), leafName.c_str());
296 }
297 }
298 if (leaf)
299 return GetLeafTypeName(leaf, std::string(leaf->GetFullName()));
300
301 // we could not find a leaf named colName, so we look for a branch called like this
302 auto branch = t.GetBranch(colName.c_str());
303 if (!branch)
304 branch = t.FindBranch(colName.c_str()); // try harder
305 if (branch) {
306 static const TClassRef tbranchelement("TBranchElement");
307 if (branch->InheritsFrom(tbranchelement)) {
308 auto be = static_cast<TBranchElement *>(branch);
309 if (auto currentClass = be->GetCurrentClass())
310 return currentClass->GetName();
311 else {
312 // Here we have a special case for getting right the type of data members
313 // of classes sorted in TClonesArrays: ROOT-9674
314 auto mother = be->GetMother();
315 if (mother && mother->InheritsFrom(tbranchelement) && mother != be) {
316 auto beMom = static_cast<TBranchElement *>(mother);
317 auto beMomClass = beMom->GetClass();
318 if (beMomClass && 0 == std::strcmp("TClonesArray", beMomClass->GetName()))
319 return be->GetTypeName();
320 }
321 return be->GetClassName();
322 }
323 } else if (branch->IsA() == TBranch::Class() && branch->GetListOfLeaves()->GetEntriesUnsafe() == 1) {
324 // normal branch (not a TBranchElement): if it has only one leaf, we pick the type of the leaf:
325 // RDF and TTreeReader allow referring to branch.leaf as just branch if branch has only one leaf
326 leaf = static_cast<TLeaf *>(branch->GetListOfLeaves()->UncheckedAt(0));
327 return GetLeafTypeName(leaf, std::string(leaf->GetFullName()));
328 }
329 }
330
331 // we could not find a branch or a leaf called colName
332 return std::string();
333}
334
335/// Return a string containing the type of the given branch. Works both with real TTree branches and with temporary
336/// column created by Define. Throws if type name deduction fails.
337/// Note that for fixed- or variable-sized c-style arrays the returned type name will be RVec<T>.
338/// vector2RVec specifies whether typename 'std::vector<T>' should be converted to 'RVec<T>' or returned as is
339std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds, RDefineBase *define,
340 bool vector2RVec)
341{
342 std::string colType;
343
344 // must check defines first: we want Redefines to have precedence over everything else
345 if (define) {
346 colType = define->GetTypeName();
347 } else if (ds && ds->HasColumn(colName)) {
349 } else if (tree) {
352 std::vector<std::string> split;
353 int dummy;
354 TClassEdit::GetSplit(colType.c_str(), split, dummy);
355 auto &valueType = split[1];
357 }
358 }
359
360 if (colType.empty())
361 throw std::runtime_error("Column \"" + colName +
362 "\" is not in a dataset and is not a custom column been defined.");
363
364 return colType;
365}
366
367/// Convert type name (e.g. "Float_t") to ROOT type code (e.g. 'F') -- see TBranch documentation.
368/// Return a space ' ' in case no match was found.
369char TypeName2ROOTTypeName(const std::string &b)
370{
371 const static std::unordered_map<std::string, char> typeName2ROOTTypeNameMap{{"char", 'B'},
372 {"Char_t", 'B'},
373 {"unsigned char", 'b'},
374 {"UChar_t", 'b'},
375 {"int", 'I'},
376 {"Int_t", 'I'},
377 {"unsigned", 'i'},
378 {"unsigned int", 'i'},
379 {"UInt_t", 'i'},
380 {"short", 'S'},
381 {"short int", 'S'},
382 {"Short_t", 'S'},
383 {"unsigned short", 's'},
384 {"unsigned short int", 's'},
385 {"UShort_t", 's'},
386 {"long", 'G'},
387 {"long int", 'G'},
388 {"Long_t", 'G'},
389 {"unsigned long", 'g'},
390 {"unsigned long int", 'g'},
391 {"ULong_t", 'g'},
392 {"double", 'D'},
393 {"Double_t", 'D'},
394 {"float", 'F'},
395 {"Float_t", 'F'},
396 {"long long", 'L'},
397 {"long long int", 'L'},
398 {"Long64_t", 'L'},
399 {"unsigned long long", 'l'},
400 {"unsigned long long int", 'l'},
401 {"ULong64_t", 'l'},
402 {"bool", 'O'},
403 {"Bool_t", 'O'}};
404
405 if (auto it = typeName2ROOTTypeNameMap.find(b); it != typeName2ROOTTypeNameMap.end())
406 return it->second;
407
408 return ' ';
409}
410
411unsigned int GetNSlots()
412{
413 unsigned int nSlots = 1;
414#ifdef R__USE_IMT
417#endif // R__USE_IMT
418 return nSlots;
419}
420
421/// Replace occurrences of '.' with '_' in each string passed as argument.
422/// An Info message is printed when this happens. Dots at the end of the string are not replaced.
423/// An exception is thrown in case the resulting set of strings would contain duplicates.
424std::vector<std::string> ReplaceDotWithUnderscore(const std::vector<std::string> &columnNames)
425{
427 for (auto &col : newColNames) {
428 const auto dotPos = col.find('.');
429 if (dotPos != std::string::npos && dotPos != col.size() - 1 && dotPos != 0u) {
430 auto oldName = col;
431 std::replace(col.begin(), col.end(), '.', '_');
432 if (std::find(columnNames.begin(), columnNames.end(), col) != columnNames.end())
433 throw std::runtime_error("Column " + oldName + " would be written as " + col +
434 " but this column already exists. Please use Alias to select a new name for " +
435 oldName);
436 Info("Snapshot", "Column %s will be saved as %s", oldName.c_str(), col.c_str());
437 }
438 }
439
440 return newColNames;
441}
442
443void InterpreterDeclare(const std::string &code)
444{
445 R__LOG_DEBUG(10, RDFLogChannel()) << "Declaring the following code to cling:\n\n" << code << '\n';
446
447 if (!gInterpreter->Declare(code.c_str())) {
448 const auto msg =
449 "\nRDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of "
450 "the crash\n All RDF objects that have not run an event loop yet should be considered in an invalid state.\n";
451 throw std::runtime_error(msg);
452 }
453}
454
455void InterpreterCalc(const std::string &code, const std::string &context)
456{
457 if (code.empty())
458 return;
459
460 R__LOG_DEBUG(10, RDFLogChannel()) << "Jitting and executing the following code:\n\n" << code << '\n';
461
462 TInterpreter::EErrorCode errorCode(TInterpreter::kNoError); // storage for cling errors
463
464 auto callCalc = [&errorCode, &context](const std::string &codeSlice) {
465 gInterpreter->Calc(codeSlice.c_str(), &errorCode);
467 std::string msg = "\nAn error occurred during just-in-time compilation";
468 if (!context.empty())
469 msg += " in " + context;
470 msg +=
471 ". The lines above might indicate the cause of the crash\nAll RDF objects that have not run their event "
472 "loop yet should be considered in an invalid state.\n";
473 throw std::runtime_error(msg);
474 }
475 };
476
477 // Call Calc every 1000 newlines in order to avoid jitting a very large function body, which is slow:
478 // see https://github.com/root-project/root/issues/9312 and https://github.com/root-project/root/issues/7604
479 std::size_t substr_start = 0;
480 std::size_t substr_end = 0;
481 while (substr_end != std::string::npos && substr_start != code.size() - 1) {
482 for (std::size_t i = 0u; i < 1000u && substr_end != std::string::npos; ++i) {
483 substr_end = code.find('\n', substr_end + 1);
484 }
485 const std::string subs = code.substr(substr_start, substr_end - substr_start);
487
488 callCalc(subs);
489 }
490}
491
492bool IsInternalColumn(std::string_view colName)
493{
494 const auto str = colName.data();
495 const auto goodPrefix = colName.size() > 3 && // has at least more characters than {r,t}df
496 ('r' == str[0] || 't' == str[0]) && // starts with r or t
497 0 == strncmp("df", str + 1, 2); // 2nd and 3rd letters are df
498 return goodPrefix && '_' == colName.back(); // also ends with '_'
499}
500
501unsigned int GetColumnWidth(const std::vector<std::string>& names, const unsigned int minColumnSpace)
502{
503 auto columnWidth = 0u;
504 for (const auto& name : names) {
505 const auto length = name.length();
506 if (length > columnWidth)
508 }
510 return columnWidth;
511}
512
513void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType,
514 const std::string &colName)
515{
516 // We want to explicitly support the reading of bools as unsigned char, as
517 // this is quite common to circumvent the std::vector<bool> specialization.
518 const bool explicitlySupported = (colType == typeid(bool) && requestedType == typeid(unsigned char)) ? true : false;
519
520 // Here we compare names and not typeinfos since they may come from two different contexts: a compiled
521 // and a jitted one.
522 const auto diffTypes = (0 != std::strcmp(colType.name(), requestedType.name()));
523 auto inheritedType = [&]() {
525 return colTClass && colTClass->InheritsFrom(TClass::GetClass(requestedType));
526 };
527
529 const auto tName = TypeID2TypeName(requestedType);
530 const auto colTypeName = TypeID2TypeName(colType);
531 std::string errMsg = "RDataFrame: type mismatch: column \"" + colName + "\" is being used as ";
532 if (tName.empty()) {
533 errMsg += requestedType.name();
534 errMsg += " (extracted from type info)";
535 } else {
536 errMsg += tName;
537 }
538 errMsg += " but the Define or Vary node advertises it as ";
539 if (colTypeName.empty()) {
540 auto &id = colType;
541 errMsg += id.name();
542 errMsg += " (extracted from type info)";
543 } else {
545 }
546 throw std::runtime_error(errMsg);
547 }
548}
549
550bool IsStrInVec(const std::string &str, const std::vector<std::string> &vec)
551{
552 return std::find(vec.cbegin(), vec.cend(), str) != vec.cend();
553}
554
555auto RStringCache::Insert(const std::string &string) -> decltype(fStrings)::const_iterator
556{
557 {
558 std::shared_lock l{fMutex};
559 if (auto it = fStrings.find(string); it != fStrings.end())
560 return it;
561 }
562
563 // TODO: Would be nicer to use a lock upgrade strategy a-la TVirtualRWMutex
564 // but that is unfortunately not usable outside the already available ROOT mutexes
565 std::unique_lock l{fMutex};
566 if (auto it = fStrings.find(string); it != fStrings.end())
567 return it;
568
569 return fStrings.insert(string).first;
570}
571
573{
574 const nlohmann::ordered_json fullData = nlohmann::ordered_json::parse(std::ifstream(jsonFile));
575 if (!fullData.contains("samples") || fullData["samples"].empty()) {
576 throw std::runtime_error(
577 R"(The input specification does not contain any samples. Please provide the samples in the specification like:
578{
579 "samples": {
580 "sampleA": {
581 "trees": ["tree1", "tree2"],
582 "files": ["file1.root", "file2.root"],
583 "metadata": {"lumi": 1.0, }
584 },
585 "sampleB": {
586 "trees": ["tree3", "tree4"],
587 "files": ["file3.root", "file4.root"],
588 "metadata": {"lumi": 0.5, }
589 },
590 ...
591 },
592})");
593 }
594
596 for (const auto &keyValue : fullData["samples"].items()) {
597 const std::string &sampleName = keyValue.key();
598 const auto &sample = keyValue.value();
599 // TODO: if requested in https://github.com/root-project/root/issues/11624
600 // allow union-like types for trees and files, see: https://github.com/nlohmann/json/discussions/3815
601 if (!sample.contains("trees")) {
602 throw std::runtime_error("A list of tree names must be provided for sample " + sampleName + ".");
603 }
604 std::vector<std::string> trees = sample["trees"];
605 if (!sample.contains("files")) {
606 throw std::runtime_error("A list of files must be provided for sample " + sampleName + ".");
607 }
608 std::vector<std::string> files = sample["files"];
609 if (!sample.contains("metadata")) {
611 } else {
613 for (const auto &metadata : sample["metadata"].items()) {
614 const auto &val = metadata.value();
615 if (val.is_string())
616 m.Add(metadata.key(), val.get<std::string>());
617 else if (val.is_number_integer())
618 m.Add(metadata.key(), val.get<int>());
619 else if (val.is_number_float())
620 m.Add(metadata.key(), val.get<double>());
621 else
622 throw std::logic_error("The metadata keys can only be of type [string|int|double].");
623 }
625 }
626 }
627 if (fullData.contains("friends")) {
628 for (const auto &friends : fullData["friends"].items()) {
629 std::string alias = friends.key();
630 std::vector<std::string> trees = friends.value()["trees"];
631 std::vector<std::string> files = friends.value()["files"];
632 if (files.size() != trees.size() && trees.size() > 1)
633 throw std::runtime_error("Mismatch between trees and files in a friend.");
634 spec.WithGlobalFriends(trees, files, alias);
635 }
636 }
637
638 if (fullData.contains("range")) {
639 std::vector<int> range = fullData["range"];
640
641 if (range.size() == 1)
642 spec.WithGlobalRange({range[0]});
643 else if (range.size() == 2)
644 spec.WithGlobalRange({range[0], range[1]});
645 }
646 return spec;
647};
648
649} // end NS RDF
650} // end NS Internal
651} // end NS ROOT
652
653std::string
655{
656 return df.GetTypeNameWithOpts(colName, vector2RVec);
657}
658
660{
661 return df.GetTopLevelFieldNames();
662}
663
665{
666 return df.GetColumnNamesNoDuplicates();
667}
668
674
676{
677 return ds.DescribeDataset();
678}
679
681 const ROOT::RDF::RDataSource &ds, unsigned int slot,
682 const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &sampleMap)
683{
684 return ds.CreateSampleInfo(slot, sampleMap);
685}
686
691
696
697std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
699 const std::type_info &tid, TTreeReader *treeReader)
700{
701 return ds.CreateColumnReader(slot, col, tid, treeReader);
702}
703
705{
706 return std::move(spec.fSamples);
707}
static void WarnHist()
Definition RDFUtils.cxx:50
#define R__LOG_WARNING(...)
Definition RLogger.hxx:357
#define R__LOG_DEBUG(DEBUGLEVEL,...)
Definition RLogger.hxx:359
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
Basic types used by ROOT and required by TInterpreter.
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
unsigned short UShort_t
Unsigned Short integer 2 bytes (unsigned short)
Definition RtypesCore.h:54
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
unsigned char UChar_t
Unsigned Character 1 byte (unsigned char)
Definition RtypesCore.h:52
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
unsigned long ULong_t
Unsigned long integer 4 bytes (unsigned long). Size depends on architecture.
Definition RtypesCore.h:69
long Long_t
Signed long integer 4 bytes (long). Size depends on architecture.
Definition RtypesCore.h:68
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int)
Definition RtypesCore.h:60
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
short Short_t
Signed Short integer 2 bytes (short)
Definition RtypesCore.h:53
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Definition RtypesCore.h:84
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Definition TError.cxx:241
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
char name[80]
Definition TGX11.cxx:148
#define gInterpreter
TRObject operator()(const T1 &t1) const
std::string GetTypeName() const
The head node of a RDF computation graph.
auto Insert(const std::string &string) -> decltype(fStrings)::const_iterator
Inserts the input string in the cache and returns an iterator to the cached string.
Definition RDFUtils.cxx:555
The dataset specification for RDataFrame.
Class behaving as a heterogenuous dictionary to store the metadata of a dataset.
Definition RMetaData.hxx:57
Class representing a sample which is a grouping of trees and their fileglobs, and,...
Definition RSample.hxx:39
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
A log configuration for a channel, e.g.
Definition RLogger.hxx:97
const_iterator begin() const
const_iterator end() const
A Branch for the case of an object.
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2994
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
Definition TTree.cxx:4890
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition TTree.cxx:5430
virtual TLeaf * GetLeaf(const char *branchname, const char *leafname)
Return pointer to the 1st Leaf named name in any Branch of this Tree or any branch in the list of fri...
Definition TTree.cxx:6306
virtual TLeaf * FindLeaf(const char *name)
Find first leaf containing searchname.
Definition TTree.cxx:4965
ROOT::RLogChannel & RDFLogChannel()
Definition RDFUtils.cxx:43
void RunFinalChecks(const ROOT::RDF::RDataSource &ds, bool nodesLeftNotRun)
Definition RDFUtils.cxx:687
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:424
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:86
ROOT::RDF::RSampleInfo CreateSampleInfo(const ROOT::RDF::RDataSource &ds, unsigned int slot, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &sampleMap)
Definition RDFUtils.cxx:680
ROOT::RDF::Experimental::RDatasetSpec RetrieveSpecFromJson(const std::string &jsonFile)
Function to retrieve RDatasetSpec from JSON file provided.
Definition RDFUtils.cxx:572
unsigned int GetNSlots()
Definition RDFUtils.cxx:411
std::string ComposeRVecTypeName(const std::string &valueType)
Definition RDFUtils.cxx:245
void CallInitializeWithOpts(ROOT::RDF::RDataSource &ds, const std::set< std::string > &suppressErrorsForMissingColumns)
Definition RDFUtils.cxx:669
std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
Definition RDFUtils.cxx:250
const std::vector< std::string > & GetTopLevelFieldNames(const ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:659
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:369
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:200
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:550
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition RDFUtils.cxx:501
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
Definition RDFUtils.cxx:282
std::string DescribeDataset(ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:675
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
Definition RDFUtils.cxx:698
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:339
void InterpreterCalc(const std::string &code, const std::string &context="")
Jit code in the interpreter with TInterpreter::Calc, throw in case of errors.
Definition RDFUtils.cxx:455
void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType, const std::string &colName)
Definition RDFUtils.cxx:513
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:492
std::vector< ROOT::RDF::Experimental::RSample > MoveOutSamples(ROOT::RDF::Experimental::RDatasetSpec &spec)
Definition RDFUtils.cxx:704
void ProcessMT(ROOT::RDF::RDataSource &ds, ROOT::Detail::RDF::RLoopManager &lm)
Definition RDFUtils.cxx:692
void WarnHist()
Warn once about experimental filling of RHist.
Definition RDFUtils.cxx:55
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:654
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition RDFUtils.cxx:443
const std::vector< std::string > & GetColumnNamesNoDuplicates(const ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:664
unsigned int & NThreadPerTH3()
Obtain or set the number of threads that will share a clone of a thread-safe 3D histogram.
Definition RDFUtils.cxx:76
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:228
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:669
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
Definition TROOT.cxx:676
@ kSTLvector
Definition ESTLType.h:30
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4