41static bool ClustersAreSortedAndContiguous(
const std::vector<std::vector<EntryCluster>> &cls)
44 for (
const auto &fcl : cls) {
45 for (
const auto &
c : fcl) {
46 if (last_end !=
c.start)
61static std::vector<std::vector<EntryCluster>>
62ConvertToElistClusters(std::vector<std::vector<EntryCluster>> &&clusters,
TEntryList &entryList,
63 const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames,
64 const std::vector<Long64_t> &entriesPerFile)
67 R__ASSERT(ClustersAreSortedAndContiguous(clusters));
69 const bool listHasGlobalEntryNumbers = entryList.
GetLists() ==
nullptr;
70 const auto nFiles = clusters.size();
72 std::unique_ptr<TChain> chain;
77 if (listHasGlobalEntryNumbers) {
85 for (
auto i = 0u; i < nFiles; ++i)
86 chain->
Add((fileNames[i] +
"?query#" + treeNames[i]).c_str(), entriesPerFile[i]);
90 Long64_t localEntry = elist.GetEntryAndTree(elEntry, treenum);
91 if (localEntry == -1ll)
93 return localEntry + ch->GetTreeOffset()[treenum];
102 std::vector<std::vector<EntryCluster>> elistClusters;
104 for (
auto fileN = 0u; fileN < nFiles; ++fileN) {
105 std::vector<EntryCluster> elistClustersForFile;
106 for (
const auto &
c : clusters[fileN]) {
107 if (entry >=
c.end || entry == -1ll)
110 const Long64_t elistRangeStart = elistEntry;
112 while (entry <
c.end && entry != -1ll)
113 entry = Next(elistEntry, entryList, chain.get());
114 elistClustersForFile.emplace_back(EntryCluster{elistRangeStart, elistEntry});
116 elistClusters.emplace_back(std::move(elistClustersForFile));
119 R__ASSERT(elistClusters.size() == clusters.size());
120 R__ASSERT(ClustersAreSortedAndContiguous(elistClusters));
123 return elistClusters;
127using ClustersAndEntries = std::pair<std::vector<std::vector<EntryCluster>>, std::vector<Long64_t>>;
131static ClustersAndEntries
132MakeClusters(
const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames,
const unsigned int maxTasksPerFile)
137 const auto nFileNames = fileNames.size();
138 std::vector<std::vector<EntryCluster>> clustersPerFile;
139 std::vector<Long64_t> entriesPerFile;
140 entriesPerFile.reserve(nFileNames);
142 for (
auto i = 0u; i < nFileNames; ++i) {
143 const auto &fileName = fileNames[i];
144 const auto &treeName = treeNames[i];
146 std::unique_ptr<TFile>
f(
TFile::Open(fileName.c_str()));
147 if (!
f ||
f->IsZombie()) {
148 const auto msg =
"TTreeProcessorMT::Process: an error occurred while opening file \"" + fileName +
"\"";
149 throw std::runtime_error(msg);
151 auto *t =
f->Get<
TTree>(treeName.c_str());
154 const auto msg =
"TTreeProcessorMT::Process: an error occurred while getting tree \"" + treeName +
155 "\" from file \"" + fileName +
"\"";
156 throw std::runtime_error(msg);
159 auto clusterIter = t->GetClusterIterator(0);
161 const Long64_t entries = t->GetEntries();
163 std::vector<EntryCluster> clusters;
164 while ((start = clusterIter()) < entries) {
165 end = clusterIter.GetNextEntry();
167 clusters.emplace_back(EntryCluster{start + offset, end + offset});
170 clustersPerFile.emplace_back(std::move(clusters));
171 entriesPerFile.emplace_back(entries);
189 std::vector<std::vector<EntryCluster>> eventRangesPerFile(clustersPerFile.size());
190 auto clustersPerFileIt = clustersPerFile.begin();
191 auto eventRangesPerFileIt = eventRangesPerFile.begin();
192 for (; clustersPerFileIt != clustersPerFile.end(); clustersPerFileIt++, eventRangesPerFileIt++) {
193 const auto clustersInThisFileSize = clustersPerFileIt->size();
194 const auto nFolds = clustersInThisFileSize / maxTasksPerFile;
198 *eventRangesPerFileIt = std::move(*clustersPerFileIt);
203 auto nReminderClusters = clustersInThisFileSize % maxTasksPerFile;
204 const auto &clustersInThisFile = *clustersPerFileIt;
205 for (
auto i = 0ULL; i < clustersInThisFileSize; ++i) {
206 const auto start = clustersInThisFile[i].start;
211 if (nReminderClusters > 0) {
215 const auto end = clustersInThisFile[i].end;
216 eventRangesPerFileIt->emplace_back(EntryCluster({start, end}));
220 return std::make_pair(std::move(eventRangesPerFile), std::move(entriesPerFile));
225static std::vector<std::vector<Long64_t>>
226GetFriendEntries(
const std::vector<std::pair<std::string, std::string>> &friendNames,
227 const std::vector<std::vector<std::string>> &friendFileNames)
229 std::vector<std::vector<Long64_t>> friendEntries;
230 const auto nFriends = friendNames.size();
231 for (
auto i = 0u; i < nFriends; ++i) {
232 std::vector<Long64_t> nEntries;
233 const auto &thisFriendName = friendNames[i].first;
234 const auto &thisFriendFiles = friendFileNames[i];
235 for (
const auto &fname : thisFriendFiles) {
238 f->GetObject(thisFriendName.c_str(), t);
241 friendEntries.emplace_back(std::move(nEntries));
244 return friendEntries;
249static std::vector<std::string> GetTreeFullPaths(
const TTree &
tree)
252 if (
tree.IsA() == TChain::Class()) {
253 auto &chain =
static_cast<const TChain &
>(
tree);
255 if (!files || files->GetEntries() == 0) {
256 throw std::runtime_error(
"TTreeProcessorMT: input TChain does not contain any file");
258 std::vector<std::string> treeNames;
260 treeNames.emplace_back(
f->GetName());
266 if (
auto motherDir =
tree.GetDirectory()) {
272 if (motherDir->InheritsFrom(
"TFile")) {
273 return {
tree.GetName()};
275 std::string fullPath = motherDir->GetPath();
276 fullPath = fullPath.substr(fullPath.find(
":/") + 1);
278 fullPath +=
tree.GetName();
283 return {
tree.GetName()};
304 const FriendInfo &friendInfo,
const std::vector<Long64_t> &nEntries,
305 const std::vector<std::vector<Long64_t>> &friendEntries)
307 const std::vector<NameAlias> &friendNames = friendInfo.
fFriendNames;
308 const std::vector<std::vector<std::string>> &friendFileNames = friendInfo.
fFriendFileNames;
311 const auto nFiles = fileNames.size();
312 for (
auto i = 0u; i < nFiles; ++i) {
313 fChain->Add((fileNames[i] +
"?query#" + treeNames[i]).c_str(), nEntries[i]);
318 const auto nFriends = friendNames.size();
319 for (
auto i = 0u; i < nFriends; ++i) {
320 const auto &friendName = friendNames[i];
321 const auto &
name = friendName.first;
322 const auto &alias = friendName.second;
325 auto frChain = std::make_unique<TChain>(
name.c_str());
326 const auto nFileNames = friendFileNames[i].size();
327 for (
auto j = 0u; j < nFileNames; ++j)
328 frChain->Add(friendFileNames[i][j].c_str(), friendEntries[i][j]);
331 fChain->AddFriend(frChain.get(), alias.c_str());
332 fFriends.emplace_back(std::move(frChain));
338std::unique_ptr<TTreeReader>
340 const std::vector<std::string> &fileNames,
const FriendInfo &friendInfo,
341 const TEntryList &entryList,
const std::vector<Long64_t> &nEntries,
342 const std::vector<std::vector<Long64_t>> &friendEntries)
344 const bool hasEntryList = entryList.
GetN() > 0;
345 const bool usingLocalEntries = friendInfo.
fFriendNames.empty() && !hasEntryList;
346 const bool needNewChain =
347 fChain ==
nullptr || (usingLocalEntries && (fileNames[0] !=
fChain->GetListOfFiles()->At(0)->GetTitle() ||
348 treeNames[0] !=
fChain->GetListOfFiles()->At(0)->GetName()));
350 MakeChain(treeNames, fileNames, friendInfo, nEntries, friendEntries);
361 auto reader = std::make_unique<TTreeReader>(
fChain.get(),
fEntryList.get());
362 reader->SetEntriesRange(start, end);
376 std::vector<Internal::NameAlias> friendNames;
377 std::vector<std::vector<std::string>> friendFileNames;
385 const auto friends =
tree.GetListOfFriends();
389 for (
auto fr : *friends) {
391 const bool isChain = frTree->IsA() == TChain::Class();
393 friendFileNames.emplace_back();
394 auto &fileNames = friendFileNames.back();
397 const auto alias_c =
tree.GetFriendAlias(frTree);
398 const std::string alias = alias_c !=
nullptr ? alias_c :
"";
409 const auto chainFiles =
static_cast<TChain*
>(frTree)->GetListOfFiles();
410 const auto realName = chainFiles->First()->
GetName();
411 friendNames.emplace_back(std::make_pair(realName, alias));
413 for (
auto f : *chainFiles) {
414 fileNames.emplace_back(
f->GetTitle());
418 const auto realName = GetTreeFullPaths(*frTree)[0];
419 friendNames.emplace_back(std::make_pair(realName, alias));
422 const auto f = frTree->GetCurrentFile();
424 throw std::runtime_error(
"Friend trees with no associated file are not supported.");
425 fileNames.emplace_back(
f->GetName());
436 std::vector<std::string> treeNames;
439 throw std::runtime_error(
"Empty list of files and no tree name provided");
443 std::string treeName;
445 TIter next(
f->GetListOfKeys());
446 while (
auto *key =
static_cast<TKey *
>(next())) {
447 const char *className = key->GetClassName();
448 if (strcmp(className,
"TTree") == 0) {
449 treeName = key->GetName();
453 if (treeName.empty())
454 throw std::runtime_error(
"Cannot find any tree in file " + fname);
455 treeNames.emplace_back(std::move(treeName));
469 : fFileNames({std::string(filename)}),
470 fTreeNames(treename.empty() ? FindTreeNames() : std::vector<std::string>{std::string(treename)}), fFriendInfo(),
479 throw std::runtime_error(
"The provided list of file names is empty");
481 std::vector<std::string> strings;
482 strings.reserve(views.size());
483 for (
const auto &
v : views)
484 strings.emplace_back(
v);
502 fTreeNames(treename.empty() ? FindTreeNames()
503 : std::vector<std::string>(fFileNames.size(), std::string(treename))),
504 fFriendInfo(), fPool(nThreads)
511 std::vector<std::string> filenames;
513 const bool isChain =
tree.IsA() == TChain::Class();
518 throw std::runtime_error(
"The provided chain of files is empty");
519 filenames.reserve(nFiles);
520 for (
auto f : *filelist)
521 filenames.emplace_back(
f->GetTitle());
525 const auto msg =
"The specified TTree is not linked to any file, in-memory-only trees are not supported.";
526 throw std::runtime_error(msg);
529 filenames.emplace_back(
f->GetName());
543 fFriendInfo(GetFriendInfo(
tree)), fPool(nThreads)
580 const unsigned int maxTasksPerFile =
588 const bool hasFriends = !friendNames.empty();
590 const bool shouldRetrieveAllClusters = hasFriends || hasEntryList;
591 ClustersAndEntries clusterAndEntries{};
592 if (shouldRetrieveAllClusters) {
595 clusterAndEntries.first = ConvertToElistClusters(std::move(clusterAndEntries.first),
fEntryList,
fTreeNames,
599 const auto &clusters = clusterAndEntries.first;
600 const auto &entries = clusterAndEntries.second;
603 const auto friendEntries =
604 hasFriends ? GetFriendEntries(friendNames, friendFileNames) : std::vector<std::vector<Long64_t>>{};
608 auto processFile = [&](std::size_t fileIdx) {
610 const auto &theseFiles = shouldRetrieveAllClusters ?
fFileNames : std::vector<std::string>({
fFileNames[fileIdx]});
612 const auto &theseTrees = shouldRetrieveAllClusters ?
fTreeNames : std::vector<std::string>({
fTreeNames[fileIdx]});
614 const auto theseClustersAndEntries =
615 shouldRetrieveAllClusters ? ClustersAndEntries{} : MakeClusters(theseTrees, theseFiles, maxTasksPerFile);
618 const auto &thisFileClusters = shouldRetrieveAllClusters ? clusters[fileIdx] : theseClustersAndEntries.first[0];
621 const auto &theseEntries =
622 shouldRetrieveAllClusters ? entries : std::vector<Long64_t>({theseClustersAndEntries.second[0]});
624 auto processCluster = [&](
const EntryCluster &
c) {
626 theseEntries, friendEntries);
633 std::vector<std::size_t> fileIdxs(
fFileNames.size());
634 std::iota(fileIdxs.begin(), fileIdxs.end(), 0u);
std::vector< std::string > GetFilesFromTree(TTree &tree)
std::vector< std::string > CheckAndConvert(const std::vector< std::string_view > &views)
std::unique_ptr< TChain > fChain
Chain on which to operate.
std::unique_ptr< TTreeReader > GetTreeReader(Long64_t start, Long64_t end, const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const FriendInfo &friendInfo, const TEntryList &entryList, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Get a TTreeReader for the current tree of this view.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain, if present.
std::unique_ptr< TEntryList > fEntryList
TEntryList for fChain, if present.
void MakeChain(const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const FriendInfo &friendInfo, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Construct fChain, also adding friends if needed and injecting knowledge of offsets if available.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
A class to process the entries of a TTree in parallel.
const std::vector< std::string > fTreeNames
TTree names (always same size and ordering as fFileNames)
static unsigned int GetMaxTasksPerFilePerWorker()
This function is deprecated in favor of GetTasksPerWorkerHint().
std::vector< std::string > FindTreeNames()
Retrieve the names of the TTrees in each of the input files, throw if a TTree cannot be found.
const std::vector< std::string > fFileNames
Names of the files.
static unsigned int fgTasksPerWorkerHint
static void SetMaxTasksPerFilePerWorker(unsigned int m)
This function is deprecated in favor of SetTasksPerWorkerHint().
Internal::FriendInfo GetFriendInfo(TTree &tree)
Get and store the names, aliases and file names of the friends of the tree.
static unsigned int fgMaxTasksPerFilePerWorker
ROOT::TThreadExecutor fPool
! Thread pool for processing.
TEntryList fEntryList
User-defined selection of entry numbers to be processed, empty if none was provided.
static void SetTasksPerWorkerHint(unsigned int m)
Set the hint for the desired number of tasks created per worker.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > fTreeView
Thread-local TreeViews.
TTreeProcessorMT(std::string_view filename, std::string_view treename="", UInt_t nThreads=0u)
Constructor based on a file name.
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
const Internal::FriendInfo fFriendInfo
static unsigned int GetTasksPerWorkerHint()
Retrieve the current value for the desired number of tasks per worker.
A chain is a collection of files containing TTree objects.
TObjArray * GetListOfFiles() const
Small helper to keep current directory context.
A List of entry numbers in a TTree or TChain.
virtual TList * GetLists() const
virtual Long64_t GetEntry(Int_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next().
virtual Long64_t GetN() const
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TFriendElement TF describes a TTree object TF in a file.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
virtual void Add(TObject *obj)
virtual const char * GetName() const
Returns name of object.
Int_t GetEntries() const
Return the number of objects in array (i.e.
Mother of all ROOT objects.
@ kCanDelete
if object in a list can be deleted
@ kMustCleanup
if object destructor must call RecursiveRemove()
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
virtual Long64_t GetEntries() const
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
void EnableThreadSafety()
Enables the global mutex to make ROOT thread safe/aware.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.
std::vector< Internal::NameAlias > fFriendNames
Pairs of names and aliases of friend trees/chains.