47void TTreeView::MakeChain(
const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames,
48 const FriendInfo &friendInfo,
const std::vector<Long64_t> &nEntries,
49 const std::vector<std::vector<Long64_t>> &friendEntries)
51 const std::vector<NameAlias> &friendNames = friendInfo.
fFriendNames;
52 const std::vector<std::vector<std::string>> &friendFileNames = friendInfo.
fFriendFileNames;
55 const auto nFiles = fileNames.size();
56 for (
auto i = 0u; i < nFiles; ++i) {
57 fChain->Add((fileNames[i] +
"/" + treeNames[i]).c_str(), nEntries[i]);
62 const auto nFriends = friendNames.size();
63 for (
auto i = 0u; i < nFriends; ++i) {
64 const auto &friendName = friendNames[i];
65 const auto &
name = friendName.first;
66 const auto &alias = friendName.second;
69 auto frChain = std::make_unique<TChain>(
name.c_str());
70 const auto nFileNames = friendFileNames[i].size();
71 for (
auto j = 0u; j < nFileNames; ++j)
72 frChain->Add(friendFileNames[i][j].c_str(), friendEntries[i][j]);
75 fChain->AddFriend(frChain.get(), alias.c_str());
76 fFriends.emplace_back(std::move(frChain));
86 std::vector<TEntryList*> globalEntryLists;
87 auto innerLists = globalList.
GetLists();
89 if (globalList.
GetN()) {
90 globalEntryLists.emplace_back(&globalList);
93 for (
auto lp : *innerLists) {
94 auto lpAsTEntryList =
static_cast<TEntryList *
>(lp);
95 if (lpAsTEntryList->GetN()) {
96 globalEntryLists.emplace_back(lpAsTEntryList);
101 auto localList = std::make_unique<TEntryList>();
103 for (
auto gl : globalEntryLists) {
107 auto tmp_list =
new TEntryList(gl->GetName(), gl->GetTitle(), gl->GetFileName(), gl->GetTreeName());
112 }
else if (entry >= start) {
113 tmp_list->Enter(entry);
115 }
while ((entry = gl->Next()) >= 0);
117 if (tmp_list->GetN() > 0) {
118 localList->Add(tmp_list);
124 auto reader = std::make_unique<TTreeReader>(
fChain.get(), localList.get());
125 return std::make_pair(std::move(reader), std::move(localList));
130 auto reader = std::make_unique<TTreeReader>(
fChain.get());
131 reader->SetEntriesRange(start, end);
140 const std::vector<Long64_t> &nEntries,
const std::vector<std::vector<Long64_t>> &friendEntries)
142 const bool usingLocalEntries = friendInfo.
fFriendNames.empty() && entryList.
GetN() == 0;
143 if (
fChain ==
nullptr || (usingLocalEntries && fileNames[0] !=
fChain->GetListOfFiles()->At(0)->GetTitle()))
144 MakeChain(treeNames, fileNames, friendInfo, nEntries, friendEntries);
146 std::unique_ptr<TTreeReader> reader;
147 std::unique_ptr<TEntryList> localList;
148 if (entryList.
GetN() > 0) {
155 return std::make_pair(std::move(reader), std::move(localList));
163MakeClusters(
const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames)
168 const auto nFileNames = fileNames.size();
169 std::vector<std::vector<EntryCluster>> clustersPerFile;
170 std::vector<Long64_t> entriesPerFile;
171 entriesPerFile.reserve(nFileNames);
173 for (
auto i = 0u; i < nFileNames; ++i) {
174 const auto &fileName = fileNames[i];
175 const auto &treeName = treeNames[i];
177 std::unique_ptr<TFile>
f(
TFile::Open(fileName.c_str()));
178 if (!
f ||
f->IsZombie()) {
179 Error(
"TTreeProcessorMT::Process",
"An error occurred while opening file %s: skipping it.", fileName.c_str());
180 clustersPerFile.emplace_back(std::vector<EntryCluster>());
181 entriesPerFile.emplace_back(0ULL);
185 f->GetObject(treeName.c_str(), t);
188 Error(
"TTreeProcessorMT::Process",
"An error occurred while getting tree %s from file %s: skipping this file.",
189 treeName.c_str(), fileName.c_str());
190 clustersPerFile.emplace_back(std::vector<EntryCluster>());
191 entriesPerFile.emplace_back(0ULL);
199 std::vector<EntryCluster> clusters;
200 while ((start = clusterIter()) < entries) {
201 end = clusterIter.GetNextEntry();
203 clusters.emplace_back(EntryCluster{start + offset, end + offset});
206 clustersPerFile.emplace_back(std::move(clusters));
207 entriesPerFile.emplace_back(entries);
222 std::vector<std::vector<EntryCluster>> eventRangesPerFile(clustersPerFile.size());
223 auto clustersPerFileIt = clustersPerFile.begin();
224 auto eventRangesPerFileIt = eventRangesPerFile.begin();
225 for (; clustersPerFileIt != clustersPerFile.end(); clustersPerFileIt++, eventRangesPerFileIt++) {
226 const auto clustersInThisFileSize = clustersPerFileIt->size();
227 const auto nFolds = clustersInThisFileSize / maxTasksPerFile;
232 clustersPerFileIt->begin(), clustersPerFileIt->end(),
233 [&eventRangesPerFileIt](
const EntryCluster &clust) { eventRangesPerFileIt->emplace_back(clust); });
238 auto nReminderClusters = clustersInThisFileSize % maxTasksPerFile;
239 const auto clustersInThisFile = *clustersPerFileIt;
240 for (
auto i = 0ULL; i < clustersInThisFileSize; ++i) {
241 const auto start = clustersInThisFile[i].start;
246 if (nReminderClusters > 0) {
250 const auto end = clustersInThisFile[i].end;
251 eventRangesPerFileIt->emplace_back(EntryCluster({start, end}));
255 return std::make_pair(std::move(eventRangesPerFile), std::move(entriesPerFile));
260static std::vector<std::vector<Long64_t>>
262 const std::vector<std::vector<std::string>> &friendFileNames)
264 std::vector<std::vector<Long64_t>> friendEntries;
265 const auto nFriends = friendNames.size();
266 for (
auto i = 0u; i < nFriends; ++i) {
267 std::vector<Long64_t> nEntries;
268 const auto &thisFriendName = friendNames[i].first;
269 const auto &thisFriendFiles = friendFileNames[i];
270 for (
const auto &fname : thisFriendFiles) {
273 f->GetObject(thisFriendName.c_str(), t);
276 friendEntries.emplace_back(std::move(nEntries));
279 return friendEntries;
288 auto &chain =
static_cast<const TChain &
>(
tree);
290 if (!files || files->GetEntries() == 0) {
291 throw std::runtime_error(
"TTreeProcessorMT: input TChain does not contain any file");
293 std::vector<std::string> treeNames;
295 treeNames.emplace_back(
f->GetName());
301 if (
auto motherDir =
tree.GetDirectory()) {
307 if (motherDir->InheritsFrom(
"TFile")) {
308 return {
tree.GetName()};
310 std::string fullPath(motherDir->GetPath());
312 fullPath +=
tree.GetName();
317 return {
tree.GetName()};
330 std::vector<Internal::NameAlias> friendNames;
331 std::vector<std::vector<std::string>> friendFileNames;
333 const auto friends =
tree.GetListOfFriends();
337 for (
auto fr : *friends) {
341 const auto realName = frTree->
GetName();
342 const auto alias =
tree.GetFriendAlias(frTree);
344 friendNames.emplace_back(std::make_pair(realName, std::string(alias)));
346 friendNames.emplace_back(std::make_pair(realName,
""));
350 friendFileNames.emplace_back();
351 auto &fileNames = friendFileNames.back();
354 const auto frChain =
static_cast<TChain *
>(frTree);
355 for (
auto f : *(frChain->GetListOfFiles())) {
356 fileNames.emplace_back(
f->GetTitle());
359 const auto f = frTree->GetCurrentFile();
361 throw std::runtime_error(
"Friend trees with no associated file are not supported.");
362 fileNames.emplace_back(
f->GetName());
373 std::vector<std::string> treeNames;
376 throw std::runtime_error(
"Empty list of files and no tree name provided");
380 std::string treeName;
382 TIter next(
f->GetListOfKeys());
383 while (
auto *key =
static_cast<TKey *
>(next())) {
384 const char *className = key->GetClassName();
385 if (strcmp(className,
"TTree") == 0) {
386 treeName = key->GetName();
390 if (treeName.empty())
391 throw std::runtime_error(
"Cannot find any tree in file " + fname);
392 treeNames.emplace_back(std::move(treeName));
404 : fFileNames({std::string(filename)}),
405 fTreeNames(treename.empty() ? FindTreeNames() : std::vector<std::string>{std::string(treename)}), fFriendInfo()
412 throw std::runtime_error(
"The provided list of file names is empty");
414 std::vector<std::string> strings;
415 strings.reserve(views.size());
416 for (
const auto &
v : views)
417 strings.emplace_back(
v);
432 fTreeNames(treename.empty() ? FindTreeNames()
433 : std::vector<std::string>(fFileNames.size(), std::string(treename))),
440 std::vector<std::string> filenames;
447 throw std::runtime_error(
"The provided chain of files is empty");
448 filenames.reserve(nFiles);
449 for (
auto f : *filelist)
450 filenames.emplace_back(
f->GetTitle());
454 const auto msg =
"The specified TTree is not linked to any file, in-memory-only trees are not supported.";
455 throw std::runtime_error(msg);
458 filenames.emplace_back(
f->GetName());
470 fFriendInfo(GetFriendInfo(
tree))
503 const bool hasFriends = !friendNames.empty();
505 const bool shouldRetrieveAllClusters = hasFriends || hasEntryList;
506 const auto clustersAndEntries =
508 const auto &clusters = clustersAndEntries.first;
509 const auto &entries = clustersAndEntries.second;
512 const auto friendEntries =
517 using Internal::EntryCluster;
518 auto processFile = [&](std::size_t fileIdx) {
520 const auto &theseFiles = shouldRetrieveAllClusters ?
fFileNames : std::vector<std::string>({
fFileNames[fileIdx]});
522 const auto &theseTrees = shouldRetrieveAllClusters ?
fTreeNames : std::vector<std::string>({
fTreeNames[fileIdx]});
524 const auto theseClustersAndEntries =
528 const auto &thisFileClusters = shouldRetrieveAllClusters ? clusters[fileIdx] : theseClustersAndEntries.first[0];
531 const auto &theseEntries =
532 shouldRetrieveAllClusters ? entries : std::vector<Long64_t>({theseClustersAndEntries.second[0]});
534 auto processCluster = [&](
const Internal::EntryCluster &
c) {
535 std::unique_ptr<TTreeReader> reader;
536 std::unique_ptr<TEntryList> elist;
542 pool.
Foreach(processCluster, thisFileClusters);
545 std::vector<std::size_t> fileIdxs(
fFileNames.size());
546 std::iota(fileIdxs.begin(), fileIdxs.end(), 0u);
551 pool.
Foreach(processFile, fileIdxs);
void Error(const char *location, const char *msgfmt,...)
std::vector< std::string > GetFilesFromTree(TTree &tree)
std::vector< std::string > CheckAndConvert(const std::vector< std::string_view > &views)
std::pair< std::unique_ptr< TTreeReader >, std::unique_ptr< TEntryList > > TreeReaderEntryListPair
std::unique_ptr< TChain > fChain
Chain on which to operate.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain.
void MakeChain(const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const FriendInfo &friendInfo, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Construct fChain, also adding friends if needed and injecting knowledge of offsets if available.
std::unique_ptr< TTreeReader > MakeReader(Long64_t start, Long64_t end)
TreeReaderEntryListPair MakeReaderWithEntryList(TEntryList &globalList, Long64_t start, Long64_t end)
TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end, const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const FriendInfo &friendInfo, TEntryList entryList, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Get a TTreeReader for the current tree of this view.
This class provides a simple interface to execute the same task multiple times in parallel,...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
A class to process the entries of a TTree in parallel.
const std::vector< std::string > fTreeNames
TTree names (always same size and ordering as fFileNames)
static unsigned int GetMaxTasksPerFilePerWorker()
Sets the maximum number of tasks created per file, per worker.
std::vector< std::string > FindTreeNames()
Retrieve the names of the TTrees in each of the input files, throw if a TTree cannot be found.
const std::vector< std::string > fFileNames
Names of the files.
static void SetMaxTasksPerFilePerWorker(unsigned int m)
Sets the maximum number of tasks created per file, per worker.
static unsigned int fgMaxTasksPerFilePerWorker
TTreeProcessorMT(std::string_view filename, std::string_view treename="")
Constructor based on a file name.
const TEntryList fEntryList
User-defined selection of entry numbers to be processed, empty if none was provided.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > fTreeView
! Thread-local TreeViews
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
const Internal::FriendInfo fFriendInfo
A chain is a collection of files containing TTree objects.
TObjArray * GetListOfFiles() const
Small helper to keep current directory context.
A List of entry numbers in a TTree or TChain.
virtual TList * GetLists() const
virtual Long64_t GetN() const
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TFriendElement TF describes a TTree object TF in a file.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
virtual const char * GetName() const
Returns name of object.
Int_t GetEntries() const
Return the number of objects in array (i.e.
Mother of all ROOT objects.
@ kMustCleanup
if object destructor must call RecursiveRemove()
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
virtual TClusterIterator GetClusterIterator(Long64_t firstentry)
Return an iterator over the cluster of baskets starting at firstentry.
virtual Long64_t GetEntries() const
basic_string_view< char > string_view
static std::vector< std::string > GetTreeFullPaths(const TTree &tree)
Return the full path of the TTree or the trees in the TChain.
std::pair< std::vector< std::vector< EntryCluster > >, std::vector< Long64_t > > ClustersAndEntries
Return a vector of cluster boundaries for the given tree and files.
static std::vector< std::vector< Long64_t > > GetFriendEntries(const std::vector< std::pair< std::string, std::string > > &friendNames, const std::vector< std::vector< std::string > > &friendFileNames)
Return a vector containing the number of entries of each file of each friend TChain.
static ClustersAndEntries MakeClusters(const std::vector< std::string > &treeNames, const std::vector< std::string > &fileNames)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
UInt_t GetImplicitMTPoolSize()
Returns the size of the pool used for implicit multi-threading.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.
std::vector< Internal::NameAlias > fFriendNames
Pairs of names and aliases of friend trees/chains.