41static bool ClustersAreSortedAndContiguous(
const std::vector<std::vector<EntryCluster>> &cls)
44 for (
const auto &fcl : cls) {
45 for (
const auto &
c : fcl) {
46 if (last_end !=
c.start)
61static std::vector<std::vector<EntryCluster>>
62ConvertToElistClusters(std::vector<std::vector<EntryCluster>> &&clusters,
TEntryList &entryList,
63 const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames,
64 const std::vector<Long64_t> &entriesPerFile)
67 R__ASSERT(ClustersAreSortedAndContiguous(clusters));
69 const bool listHasGlobalEntryNumbers = entryList.
GetLists() ==
nullptr;
70 const auto nFiles = clusters.size();
72 std::unique_ptr<TChain> chain;
77 if (listHasGlobalEntryNumbers) {
85 for (
auto i = 0u; i < nFiles; ++i)
86 chain->Add((fileNames[i] +
"/" + treeNames[i]).c_str(), entriesPerFile[i]);
90 Long64_t localEntry = elist.GetEntryAndTree(elEntry, treenum);
91 if (localEntry == -1ll)
93 return localEntry + ch->GetTreeOffset()[treenum];
102 std::vector<std::vector<EntryCluster>> elistClusters;
104 for (
auto fileN = 0u; fileN < nFiles; ++fileN) {
105 std::vector<EntryCluster> elistClustersForFile;
106 for (
const auto &
c : clusters[fileN]) {
107 if (entry >=
c.end || entry == -1ll)
110 const Long64_t elistRangeStart = elistEntry;
112 while (entry <
c.end && entry != -1ll)
113 entry = Next(elistEntry, entryList, chain.get());
114 elistClustersForFile.emplace_back(EntryCluster{elistRangeStart, elistEntry});
116 elistClusters.emplace_back(std::move(elistClustersForFile));
119 R__ASSERT(elistClusters.size() == clusters.size());
120 R__ASSERT(ClustersAreSortedAndContiguous(elistClusters));
123 return elistClusters;
127using ClustersAndEntries = std::pair<std::vector<std::vector<EntryCluster>>, std::vector<Long64_t>>;
131static ClustersAndEntries
132MakeClusters(
const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames)
137 const auto nFileNames = fileNames.size();
138 std::vector<std::vector<EntryCluster>> clustersPerFile;
139 std::vector<Long64_t> entriesPerFile;
140 entriesPerFile.reserve(nFileNames);
142 for (
auto i = 0u; i < nFileNames; ++i) {
143 const auto &fileName = fileNames[i];
144 const auto &treeName = treeNames[i];
146 std::unique_ptr<TFile>
f(
TFile::Open(fileName.c_str()));
147 if (!
f ||
f->IsZombie()) {
148 const auto msg =
"TTreeProcessorMT::Process: an error occurred while opening file \"" + fileName +
"\"";
149 throw std::runtime_error(msg);
151 auto *t =
f->Get<
TTree>(treeName.c_str());
154 const auto msg =
"TTreeProcessorMT::Process: an error occurred while getting tree \"" + treeName +
155 "\" from file \"" + fileName +
"\"";
156 throw std::runtime_error(msg);
159 auto clusterIter = t->GetClusterIterator(0);
161 const Long64_t entries = t->GetEntries();
163 std::vector<EntryCluster> clusters;
164 while ((start = clusterIter()) < entries) {
165 end = clusterIter.GetNextEntry();
167 clusters.emplace_back(EntryCluster{start + offset, end + offset});
170 clustersPerFile.emplace_back(std::move(clusters));
171 entriesPerFile.emplace_back(entries);
186 std::vector<std::vector<EntryCluster>> eventRangesPerFile(clustersPerFile.size());
187 auto clustersPerFileIt = clustersPerFile.begin();
188 auto eventRangesPerFileIt = eventRangesPerFile.begin();
189 for (; clustersPerFileIt != clustersPerFile.end(); clustersPerFileIt++, eventRangesPerFileIt++) {
190 const auto clustersInThisFileSize = clustersPerFileIt->size();
191 const auto nFolds = clustersInThisFileSize / maxTasksPerFile;
196 clustersPerFileIt->begin(), clustersPerFileIt->end(),
197 [&eventRangesPerFileIt](
const EntryCluster &clust) { eventRangesPerFileIt->emplace_back(clust); });
202 auto nReminderClusters = clustersInThisFileSize % maxTasksPerFile;
203 const auto clustersInThisFile = *clustersPerFileIt;
204 for (
auto i = 0ULL; i < clustersInThisFileSize; ++i) {
205 const auto start = clustersInThisFile[i].start;
210 if (nReminderClusters > 0) {
214 const auto end = clustersInThisFile[i].end;
215 eventRangesPerFileIt->emplace_back(EntryCluster({start, end}));
219 return std::make_pair(std::move(eventRangesPerFile), std::move(entriesPerFile));
224static std::vector<std::vector<Long64_t>>
225GetFriendEntries(
const std::vector<std::pair<std::string, std::string>> &friendNames,
226 const std::vector<std::vector<std::string>> &friendFileNames)
228 std::vector<std::vector<Long64_t>> friendEntries;
229 const auto nFriends = friendNames.size();
230 for (
auto i = 0u; i < nFriends; ++i) {
231 std::vector<Long64_t> nEntries;
232 const auto &thisFriendName = friendNames[i].first;
233 const auto &thisFriendFiles = friendFileNames[i];
234 for (
const auto &fname : thisFriendFiles) {
237 f->GetObject(thisFriendName.c_str(), t);
240 friendEntries.emplace_back(std::move(nEntries));
243 return friendEntries;
248static std::vector<std::string> GetTreeFullPaths(
const TTree &
tree)
252 auto &chain =
static_cast<const TChain &
>(
tree);
254 if (!files || files->GetEntries() == 0) {
255 throw std::runtime_error(
"TTreeProcessorMT: input TChain does not contain any file");
257 std::vector<std::string> treeNames;
259 treeNames.emplace_back(
f->GetName());
265 if (
auto motherDir =
tree.GetDirectory()) {
271 if (motherDir->InheritsFrom(
"TFile")) {
272 return {
tree.GetName()};
274 std::string fullPath = motherDir->GetPath();
275 fullPath = fullPath.substr(fullPath.find(
":/") + 1);
277 fullPath +=
tree.GetName();
282 return {
tree.GetName()};
301 const FriendInfo &friendInfo,
const std::vector<Long64_t> &nEntries,
302 const std::vector<std::vector<Long64_t>> &friendEntries)
304 const std::vector<NameAlias> &friendNames = friendInfo.
fFriendNames;
305 const std::vector<std::vector<std::string>> &friendFileNames = friendInfo.
fFriendFileNames;
308 const auto nFiles = fileNames.size();
309 for (
auto i = 0u; i < nFiles; ++i) {
310 fChain->Add((fileNames[i] +
"/" + treeNames[i]).c_str(), nEntries[i]);
315 const auto nFriends = friendNames.size();
316 for (
auto i = 0u; i < nFriends; ++i) {
317 const auto &friendName = friendNames[i];
318 const auto &
name = friendName.first;
319 const auto &alias = friendName.second;
322 auto frChain = std::make_unique<TChain>(
name.c_str());
323 const auto nFileNames = friendFileNames[i].size();
324 for (
auto j = 0u; j < nFileNames; ++j)
325 frChain->Add(friendFileNames[i][j].c_str(), friendEntries[i][j]);
328 fChain->AddFriend(frChain.get(), alias.c_str());
329 fFriends.emplace_back(std::move(frChain));
335std::unique_ptr<TTreeReader>
337 const std::vector<std::string> &fileNames,
const FriendInfo &friendInfo,
338 const TEntryList &entryList,
const std::vector<Long64_t> &nEntries,
339 const std::vector<std::vector<Long64_t>> &friendEntries)
341 const bool hasEntryList = entryList.
GetN() > 0;
342 const bool usingLocalEntries = friendInfo.
fFriendNames.empty() && !hasEntryList;
343 const bool needNewChain =
344 fChain ==
nullptr || (usingLocalEntries && (fileNames[0] !=
fChain->GetListOfFiles()->At(0)->GetTitle() ||
345 treeNames[0] !=
fChain->GetListOfFiles()->At(0)->GetName()));
347 MakeChain(treeNames, fileNames, friendInfo, nEntries, friendEntries);
358 auto reader = std::make_unique<TTreeReader>(
fChain.get(),
fEntryList.get());
359 reader->SetEntriesRange(start, end);
373 std::vector<Internal::NameAlias> friendNames;
374 std::vector<std::vector<std::string>> friendFileNames;
376 const auto friends =
tree.GetListOfFriends();
380 for (
auto fr : *friends) {
384 friendFileNames.emplace_back();
385 auto &fileNames = friendFileNames.back();
388 const auto alias_c =
tree.GetFriendAlias(frTree);
389 const std::string alias = alias_c !=
nullptr ? alias_c :
"";
400 const auto chainFiles =
static_cast<TChain*
>(frTree)->GetListOfFiles();
401 const auto realName = chainFiles->First()->
GetName();
402 friendNames.emplace_back(std::make_pair(realName, alias));
404 for (
auto f : *chainFiles) {
405 fileNames.emplace_back(
f->GetTitle());
409 const auto realName = GetTreeFullPaths(*frTree)[0];
410 friendNames.emplace_back(std::make_pair(realName, alias));
413 const auto f = frTree->GetCurrentFile();
415 throw std::runtime_error(
"Friend trees with no associated file are not supported.");
416 fileNames.emplace_back(
f->GetName());
427 std::vector<std::string> treeNames;
430 throw std::runtime_error(
"Empty list of files and no tree name provided");
434 std::string treeName;
436 TIter next(
f->GetListOfKeys());
437 while (
auto *key =
static_cast<TKey *
>(next())) {
438 const char *className = key->GetClassName();
439 if (strcmp(className,
"TTree") == 0) {
440 treeName = key->GetName();
444 if (treeName.empty())
445 throw std::runtime_error(
"Cannot find any tree in file " + fname);
446 treeNames.emplace_back(std::move(treeName));
460 : fFileNames({std::string(filename)}),
461 fTreeNames(treename.empty() ? FindTreeNames() : std::vector<std::string>{std::string(treename)}), fFriendInfo(),
470 throw std::runtime_error(
"The provided list of file names is empty");
472 std::vector<std::string> strings;
473 strings.reserve(views.size());
474 for (
const auto &
v : views)
475 strings.emplace_back(
v);
493 fTreeNames(treename.empty() ? FindTreeNames()
494 : std::vector<std::string>(fFileNames.size(), std::string(treename))),
495 fFriendInfo(), fPool(nThreads)
502 std::vector<std::string> filenames;
509 throw std::runtime_error(
"The provided chain of files is empty");
510 filenames.reserve(nFiles);
511 for (
auto f : *filelist)
512 filenames.emplace_back(
f->GetTitle());
516 const auto msg =
"The specified TTree is not linked to any file, in-memory-only trees are not supported.";
517 throw std::runtime_error(msg);
520 filenames.emplace_back(
f->GetName());
534 fFriendInfo(GetFriendInfo(
tree)), fPool(nThreads)
575 const bool hasFriends = !friendNames.empty();
577 const bool shouldRetrieveAllClusters = hasFriends || hasEntryList;
578 ClustersAndEntries clusterAndEntries{};
579 if (shouldRetrieveAllClusters) {
582 clusterAndEntries.first = ConvertToElistClusters(std::move(clusterAndEntries.first),
fEntryList,
fTreeNames,
586 const auto &clusters = clusterAndEntries.first;
587 const auto &entries = clusterAndEntries.second;
590 const auto friendEntries =
591 hasFriends ? GetFriendEntries(friendNames, friendFileNames) : std::vector<std::vector<Long64_t>>{};
595 auto processFile = [&](std::size_t fileIdx) {
597 const auto &theseFiles = shouldRetrieveAllClusters ?
fFileNames : std::vector<std::string>({
fFileNames[fileIdx]});
599 const auto &theseTrees = shouldRetrieveAllClusters ?
fTreeNames : std::vector<std::string>({
fTreeNames[fileIdx]});
601 const auto theseClustersAndEntries =
602 shouldRetrieveAllClusters ? ClustersAndEntries{} : MakeClusters(theseTrees, theseFiles);
605 const auto &thisFileClusters = shouldRetrieveAllClusters ? clusters[fileIdx] : theseClustersAndEntries.first[0];
608 const auto &theseEntries =
609 shouldRetrieveAllClusters ? entries : std::vector<Long64_t>({theseClustersAndEntries.second[0]});
611 auto processCluster = [&](
const EntryCluster &
c) {
613 theseEntries, friendEntries);
620 std::vector<std::size_t> fileIdxs(
fFileNames.size());
621 std::iota(fileIdxs.begin(), fileIdxs.end(), 0u);
std::vector< std::string > GetFilesFromTree(TTree &tree)
std::vector< std::string > CheckAndConvert(const std::vector< std::string_view > &views)
std::unique_ptr< TChain > fChain
Chain on which to operate.
std::unique_ptr< TTreeReader > GetTreeReader(Long64_t start, Long64_t end, const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const FriendInfo &friendInfo, const TEntryList &entryList, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Get a TTreeReader for the current tree of this view.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain, if present.
std::unique_ptr< TEntryList > fEntryList
TEntryList for fChain, if present.
void MakeChain(const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const FriendInfo &friendInfo, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Construct fChain, also adding friends if needed and injecting knowledge of offsets if available.
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
A class to process the entries of a TTree in parallel.
const std::vector< std::string > fTreeNames
TTree names (always same size and ordering as fFileNames)
static unsigned int GetMaxTasksPerFilePerWorker()
Sets the maximum number of tasks created per file, per worker.
std::vector< std::string > FindTreeNames()
Retrieve the names of the TTrees in each of the input files, throw if a TTree cannot be found.
const std::vector< std::string > fFileNames
Names of the files.
static void SetMaxTasksPerFilePerWorker(unsigned int m)
Sets the maximum number of tasks created per file, per worker.
static unsigned int fgMaxTasksPerFilePerWorker
ROOT::TThreadExecutor fPool
! Thread pool for processing.
TEntryList fEntryList
User-defined selection of entry numbers to be processed, empty if none was provided.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > fTreeView
Thread-local TreeViews.
TTreeProcessorMT(std::string_view filename, std::string_view treename="", UInt_t nThreads=0u)
Constructor based on a file name.
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
const Internal::FriendInfo fFriendInfo
A chain is a collection of files containing TTree objects.
TObjArray * GetListOfFiles() const
Small helper to keep current directory context.
A List of entry numbers in a TTree or TChain.
virtual TList * GetLists() const
virtual Long64_t GetEntry(Int_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next().
virtual Long64_t GetN() const
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TFriendElement TF describes a TTree object TF in a file.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
virtual const char * GetName() const
Returns name of object.
Int_t GetEntries() const
Return the number of objects in array (i.e.
Mother of all ROOT objects.
@ kCanDelete
if object in a list can be deleted
@ kMustCleanup
if object destructor must call RecursiveRemove()
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
virtual Long64_t GetEntries() const
basic_string_view< char > string_view
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
void EnableThreadSafety()
Enables the global mutex to make ROOT thread safe/aware.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.
std::vector< Internal::NameAlias > fFriendNames
Pairs of names and aliases of friend trees/chains.