11#include "RConfigure.h"
33#include <nlohmann/json.hpp>
57 static std::once_flag once;
62using TypeInfoRef = std::reference_wrapper<const std::type_info>;
63struct TypeInfoRefHash {
64 std::size_t
operator()(TypeInfoRef
id)
const {
return id.get().hash_code(); }
67struct TypeInfoRefEqualComp {
68 bool operator()(TypeInfoRef left, TypeInfoRef right)
const {
return left.get() == right.get(); }
78 static unsigned int nThread = 1;
91 const static std::unordered_map<std::string, TypeInfoRef> typeName2TypeIDMap{
94 {
"short",
typeid(short)},
95 {
"short int",
typeid(
short int)},
96 {
"signed short",
typeid(
signed short)},
97 {
"signed short int",
typeid(
signed short int)},
98 {
"unsigned short",
typeid(
unsigned short)},
99 {
"unsigned short int",
typeid(
unsigned short int)},
100 {
"int",
typeid(
int)},
101 {
"signed",
typeid(signed)},
102 {
"signed int",
typeid(
signed int)},
103 {
"unsigned",
typeid(unsigned)},
104 {
"unsigned int",
typeid(
unsigned int)},
105 {
"long",
typeid(long)},
106 {
"long int",
typeid(
long int)},
107 {
"signed long",
typeid(
signed long)},
108 {
"signed long int",
typeid(
signed long int)},
109 {
"unsigned long",
typeid(
unsigned long)},
110 {
"unsigned long int",
typeid(
unsigned long int)},
111 {
"long long",
typeid(
long long)},
112 {
"long long int",
typeid(
long long int)},
113 {
"signed long long",
typeid(
signed long long)},
114 {
"signed long long int",
typeid(
signed long long int)},
115 {
"unsigned long long",
typeid(
unsigned long long)},
116 {
"unsigned long long int",
typeid(
unsigned long long int)},
117 {
"std::size_t",
typeid(std::size_t)},
120 {
"std::int8_t",
typeid(std::int8_t)},
123 {
"std::int16_t",
typeid(std::int16_t)},
126 {
"std::int32_t",
typeid(std::int32_t)},
129 {
"std::int64_t",
typeid(std::int64_t)},
132 {
"std::uint8_t",
typeid(std::uint8_t)},
135 {
"std::uint16_t",
typeid(std::uint16_t)},
138 {
"std::uint32_t",
typeid(std::uint32_t)},
141 {
"std::uint64_t",
typeid(std::uint64_t)},
144 {
"Int_t",
typeid(
Int_t)},
145 {
"UInt_t",
typeid(UInt_t)},
148 {
"Long_t",
typeid(
Long_t)},
153 {
"bool",
typeid(
bool)},
154 {
"Bool_t",
typeid(
bool)},
156 {
"char",
typeid(char)},
157 {
"Char_t",
typeid(char)},
158 {
"signed char",
typeid(
signed char)},
159 {
"unsigned char",
typeid(
unsigned char)},
160 {
"UChar_t",
typeid(
unsigned char)},
161 {
"char16_t",
typeid(char16_t)},
162 {
"char32_t",
typeid(char32_t)},
165 {
"float",
typeid(float)},
166 {
"double",
typeid(
double)},
167 {
"long double",
typeid(
long double)},
169 {
"Float_t",
typeid(float)},
170 {
"Double_t",
typeid(
double)}};
172 if (
auto it = typeName2TypeIDMap.find(
name); it != typeName2TypeIDMap.end())
173 return it->second.get();
176 if (!
c->GetTypeInfo()) {
177 throw std::runtime_error(
"Cannot extract type_info of type " +
name +
".");
179 return *
c->GetTypeInfo();
182 throw std::runtime_error(
"Cannot extract type_info of type " +
name +
".");
193 const static std::unordered_map<TypeInfoRef, std::string, TypeInfoRefHash, TypeInfoRefEqualComp> typeID2TypeNameMap{
194 {
typeid(char),
"char"},
195 {
typeid(
unsigned char),
"unsigned char"},
196 {
typeid(
signed char),
"signed char"},
197 {
typeid(
int),
"int"},
198 {
typeid(
unsigned int),
"unsigned int"},
199 {
typeid(short),
"short"},
200 {
typeid(
unsigned short),
"unsigned short"},
201 {
typeid(long),
"long"},
202 {
typeid(
unsigned long),
"unsigned long"},
203 {
typeid(
double),
"double"},
204 {
typeid(float),
"float"},
207 {
typeid(
bool),
"bool"}};
209 if (
auto it = typeID2TypeNameMap.find(
id); it != typeID2TypeNameMap.end())
221 const static std::unordered_map<TypeInfoRef, char, TypeInfoRefHash, TypeInfoRefEqualComp> typeID2ROOTTypeNameMap{
222 {
typeid(char),
'B'}, {
typeid(
Char_t),
'B'}, {
typeid(
unsigned char),
'b'}, {
typeid(
UChar_t),
'b'},
223 {
typeid(
int),
'I'}, {
typeid(
Int_t),
'I'}, {
typeid(
unsigned int),
'i'}, {
typeid(UInt_t),
'i'},
224 {
typeid(short),
'S'}, {
typeid(
Short_t),
'S'}, {
typeid(
unsigned short),
's'}, {
typeid(
UShort_t),
's'},
225 {
typeid(long),
'G'}, {
typeid(
Long_t),
'G'}, {
typeid(
unsigned long),
'g'}, {
typeid(
ULong_t),
'g'},
226 {
typeid(
long long),
'L'}, {
typeid(
Long64_t),
'L'}, {
typeid(
unsigned long long),
'l'}, {
typeid(
ULong64_t),
'l'},
230 if (
auto it = typeID2ROOTTypeNameMap.find(tid); it != typeID2ROOTTypeNameMap.end())
238 return "ROOT::VecOps::RVec<" + valueType +
">";
244 std::string colType = colTypeCStr ==
nullptr ?
"" : colTypeCStr;
246 throw std::runtime_error(
"Could not deduce type of leaf " + colName);
260 auto valueType = colType;
261 colType =
"ROOT::VecOps::RVec<std::array<" + valueType +
", " + std::to_string(leaf->
GetLenStatic()) +
">>";
276 auto *leaf = t.
GetLeaf(colName.c_str());
281 const auto dotPos = colName.find_last_of(
'.');
282 const auto hasDot = dotPos != std::string::npos;
284 const auto branchName = colName.substr(0, dotPos);
285 const auto leafName = colName.substr(dotPos + 1);
293 auto branch = t.
GetBranch(colName.c_str());
297 static const TClassRef tbranchelement(
"TBranchElement");
298 if (branch->InheritsFrom(tbranchelement)) {
300 if (
auto currentClass = be->GetCurrentClass())
301 return currentClass->
GetName();
305 auto mother = be->GetMother();
306 if (mother && mother->InheritsFrom(tbranchelement) && mother != be) {
308 auto beMomClass = beMom->
GetClass();
309 if (beMomClass && 0 == std::strcmp(
"TClonesArray", beMomClass->GetName()))
310 return be->GetTypeName();
312 return be->GetClassName();
314 }
else if (branch->IsA() ==
TBranch::Class() && branch->GetListOfLeaves()->GetEntriesUnsafe() == 1) {
317 leaf =
static_cast<TLeaf *
>(branch->GetListOfLeaves()->UncheckedAt(0));
323 return std::string();
338 }
else if (ds && ds->
HasColumn(colName)) {
343 std::vector<std::string> split;
346 auto &valueType = split[1];
352 throw std::runtime_error(
"Column \"" + colName +
353 "\" is not in a dataset and is not a custom column been defined.");
362 const static std::unordered_map<std::string, char> typeName2ROOTTypeNameMap{{
"char",
'B'},
364 {
"unsigned char",
'b'},
369 {
"unsigned int",
'i'},
374 {
"unsigned short",
's'},
375 {
"unsigned short int",
's'},
380 {
"unsigned long",
'g'},
381 {
"unsigned long int",
'g'},
388 {
"long long int",
'L'},
390 {
"unsigned long long",
'l'},
391 {
"unsigned long long int",
'l'},
396 if (
auto it = typeName2ROOTTypeNameMap.find(
b); it != typeName2ROOTTypeNameMap.end())
404 unsigned int nSlots = 1;
417 auto newColNames = columnNames;
418 for (
auto &col : newColNames) {
419 const auto dotPos = col.find(
'.');
420 if (dotPos != std::string::npos && dotPos != col.size() - 1 && dotPos != 0u) {
422 std::replace(col.begin(), col.end(),
'.',
'_');
423 if (std::find(columnNames.begin(), columnNames.end(), col) != columnNames.end())
424 throw std::runtime_error(
"Column " + oldName +
" would be written as " + col +
425 " but this column already exists. Please use Alias to select a new name for " +
427 Info(
"Snapshot",
"Column %s will be saved as %s", oldName.c_str(), col.c_str());
440 "\nRDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of "
441 "the crash\n All RDF objects that have not run an event loop yet should be considered in an invalid state.\n";
442 throw std::runtime_error(msg);
455 auto callCalc = [&errorCode, &context](
const std::string &codeSlice) {
458 std::string msg =
"\nAn error occurred during just-in-time compilation";
459 if (!context.empty())
460 msg +=
" in " + context;
462 ". The lines above might indicate the cause of the crash\nAll RDF objects that have not run their event "
463 "loop yet should be considered in an invalid state.\n";
464 throw std::runtime_error(msg);
470 std::size_t substr_start = 0;
471 std::size_t substr_end = 0;
472 while (substr_end != std::string::npos && substr_start != code.size() - 1) {
473 for (std::size_t i = 0u; i < 1000u && substr_end != std::string::npos; ++i) {
474 substr_end = code.find(
'\n', substr_end + 1);
476 const std::string subs = code.substr(substr_start, substr_end - substr_start);
477 substr_start = substr_end;
485 const auto str = colName.data();
486 const auto goodPrefix = colName.size() > 3 &&
487 (
'r' == str[0] ||
't' == str[0]) &&
488 0 == strncmp(
"df", str + 1, 2);
489 return goodPrefix &&
'_' == colName.back();
492unsigned int GetColumnWidth(
const std::vector<std::string>& names,
const unsigned int minColumnSpace)
494 auto columnWidth = 0u;
495 for (
const auto&
name : names) {
496 const auto length =
name.length();
497 if (length > columnWidth)
498 columnWidth = length;
500 columnWidth = (columnWidth / minColumnSpace + 1) * minColumnSpace;
505 const std::string &colName)
509 const bool explicitlySupported = (colType ==
typeid(
bool) && requestedType ==
typeid(
unsigned char)) ?
true :
false;
513 const auto diffTypes = (0 != std::strcmp(colType.name(), requestedType.name()));
514 auto inheritedType = [&]() {
516 return colTClass && colTClass->InheritsFrom(
TClass::GetClass(requestedType));
519 if (!explicitlySupported && diffTypes && !inheritedType()) {
522 std::string errMsg =
"RDataFrame: type mismatch: column \"" + colName +
"\" is being used as ";
524 errMsg += requestedType.name();
525 errMsg +=
" (extracted from type info)";
529 errMsg +=
" but the Define or Vary node advertises it as ";
530 if (colTypeName.empty()) {
533 errMsg +=
" (extracted from type info)";
535 errMsg += colTypeName;
537 throw std::runtime_error(errMsg);
543 return std::find(
vec.cbegin(),
vec.cend(), str) !=
vec.cend();
560 return fStrings.insert(
string).first;
565 const nlohmann::ordered_json fullData = nlohmann::ordered_json::parse(std::ifstream(jsonFile));
566 if (!fullData.contains(
"samples") || fullData[
"samples"].empty()) {
567 throw std::runtime_error(
568 R
"(The input specification does not contain any samples. Please provide the samples in the specification like:
572 "trees": ["tree1", "tree2"],
573 "files": ["file1.root", "file2.root"],
574 "metadata": {"lumi": 1.0, }
577 "trees": ["tree3", "tree4"],
578 "files": ["file3.root", "file4.root"],
579 "metadata": {"lumi": 0.5, }
587 for (
const auto &keyValue : fullData[
"samples"].items()) {
588 const std::string &sampleName = keyValue.key();
589 const auto &sample = keyValue.value();
592 if (!sample.contains(
"trees")) {
593 throw std::runtime_error(
"A list of tree names must be provided for sample " + sampleName +
".");
595 std::vector<std::string> trees = sample[
"trees"];
596 if (!sample.contains(
"files")) {
597 throw std::runtime_error(
"A list of files must be provided for sample " + sampleName +
".");
599 std::vector<std::string> files = sample[
"files"];
600 if (!sample.contains(
"metadata")) {
604 for (
const auto &metadata : sample[
"metadata"].items()) {
605 const auto &val = metadata.value();
607 m.Add(metadata.key(), val.get<std::string>());
608 else if (val.is_number_integer())
609 m.Add(metadata.key(), val.get<
int>());
610 else if (val.is_number_float())
611 m.Add(metadata.key(), val.get<
double>());
613 throw std::logic_error(
"The metadata keys can only be of type [string|int|double].");
618 if (fullData.contains(
"friends")) {
619 for (
const auto &friends : fullData[
"friends"].items()) {
620 std::string alias = friends.key();
621 std::vector<std::string> trees = friends.value()[
"trees"];
622 std::vector<std::string> files = friends.value()[
"files"];
623 if (files.size() != trees.size() && trees.size() > 1)
624 throw std::runtime_error(
"Mismatch between trees and files in a friend.");
629 if (fullData.contains(
"range")) {
630 std::vector<int> range = fullData[
"range"];
632 if (range.size() == 1)
634 else if (range.size() == 2)
647 return df.GetTypeNameWithOpts(colName, vector2RVec);
652 return df.GetTopLevelFieldNames();
657 return df.GetColumnNamesNoDuplicates();
661 const std::set<std::string> &suppressErrorsForMissingColumns)
673 const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &sampleMap)
688std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
690 const std::type_info &tid,
TTreeReader *treeReader)
#define R__LOG_WARNING(...)
#define R__LOG_DEBUG(DEBUGLEVEL,...)
Basic types used by ROOT and required by TInterpreter.
unsigned short UShort_t
Unsigned Short integer 2 bytes (unsigned short).
int Int_t
Signed integer 4 bytes (int).
unsigned char UChar_t
Unsigned Character 1 byte (unsigned char).
char Char_t
Character 1 byte (char).
unsigned long ULong_t
Unsigned long integer 4 bytes (unsigned long). Size depends on architecture.
long Long_t
Signed long integer 4 bytes (long). Size depends on architecture.
bool Bool_t
Boolean (0=false, 1=true) (bool).
short Short_t
Signed Short integer 2 bytes (short).
double Double_t
Double 8 bytes.
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
TRObject operator()(const T1 &t1) const
std::string GetTypeName() const
The head node of a RDF computation graph.
auto Insert(const std::string &string) -> decltype(fStrings)::const_iterator
Inserts the input string in the cache and returns an iterator to the cached string.
std::unordered_set< std::string > fStrings
The dataset specification for RDataFrame.
RDatasetSpec & WithGlobalFriends(const std::string &treeName, const std::string &fileNameGlob, const std::string &alias="")
Add friend tree to RDatasetSpec object.
RDatasetSpec & AddSample(RSample sample)
Add sample (RSample class object) to the RDatasetSpec object.
RDatasetSpec & WithGlobalRange(const RDatasetSpec::REntryRange &entryRange={})
Create an RDatasetSpec object for a given range of entries.
std::vector< RSample > fSamples
List of samples.
Class representing a sample which is a grouping of trees and their fileglobs, and,...
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual void ProcessMT(ROOT::Detail::RDF::RLoopManager &)
virtual std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *)
Creates a column reader for the requested column.
virtual std::string DescribeDataset()
virtual void InitializeWithOpts(const std::set< std::string > &)
virtual ROOT::RDF::RSampleInfo CreateSampleInfo(unsigned int, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &) const
virtual void RunFinalChecks(bool) const
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
A log configuration for a channel, e.g.
A Branch for the case of an object.
virtual TClass * GetClass() const
TClassRef is used to implement a permanent reference to a TClass object.
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
virtual const char * GetTypeName() const
virtual TLeaf * GetLeafCount() const
If this leaf stores a variable-sized array or a multi-dimensional array whose last dimension has vari...
virtual Int_t GetLenStatic() const
Return the fixed length of this leaf.
const char * GetName() const override
Returns name of object.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
virtual TLeaf * GetLeaf(const char *branchname, const char *leafname)
Return pointer to the 1st Leaf named name in any Branch of this Tree or any branch in the list of fri...
virtual TLeaf * FindLeaf(const char *name)
Find first leaf containing searchname.
ROOT::RLogChannel & RDFLogChannel()
void RunFinalChecks(const ROOT::RDF::RDataSource &ds, bool nodesLeftNotRun)
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
ROOT::RDF::RSampleInfo CreateSampleInfo(const ROOT::RDF::RDataSource &ds, unsigned int slot, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &sampleMap)
ROOT::RDF::Experimental::RDatasetSpec RetrieveSpecFromJson(const std::string &jsonFile)
Function to retrieve RDatasetSpec from JSON file provided.
std::string ComposeRVecTypeName(const std::string &valueType)
void CallInitializeWithOpts(ROOT::RDF::RDataSource &ds, const std::set< std::string > &suppressErrorsForMissingColumns)
std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
const std::vector< std::string > & GetTopLevelFieldNames(const ROOT::RDF::RDataSource &ds)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
std::string DescribeDataset(ROOT::RDF::RDataSource &ds)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
void InterpreterCalc(const std::string &code, const std::string &context="")
Jit code in the interpreter with TInterpreter::Calc, throw in case of errors.
void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType, const std::string &colName)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
std::vector< ROOT::RDF::Experimental::RSample > MoveOutSamples(ROOT::RDF::Experimental::RDatasetSpec &spec)
void ProcessMT(ROOT::RDF::RDataSource &ds, ROOT::Detail::RDF::RLoopManager &lm)
void WarnHist()
Warn once about experimental filling of RHist.
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
const std::vector< std::string > & GetColumnNamesNoDuplicates(const ROOT::RDF::RDataSource &ds)
unsigned int & NThreadPerTH3()
Obtain or set the number of threads that will share a clone of a thread-safe 3D histogram.
char TypeID2ROOTTypeName(const std::type_info &tid)
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.