41using ClustersAndEntries = std::pair<std::vector<std::vector<EntryCluster>>, std::vector<Long64_t>>;
47 const auto nFileNames = fileNames.size();
48 std::vector<std::vector<EntryCluster>> clustersPerFile;
49 std::vector<Long64_t> entriesPerFile; entriesPerFile.reserve(nFileNames);
51 for (
const auto &fileName : fileNames) {
52 auto fileNameC = fileName.c_str();
54 if (!
f ||
f->IsZombie()) {
55 Error(
"TTreeProcessorMT::Process",
56 "An error occurred while opening file %s: skipping it.",
58 clustersPerFile.emplace_back(std::vector<EntryCluster>());
59 entriesPerFile.emplace_back(0ULL);
63 f->GetObject(treeName.c_str(), t);
66 Error(
"TTreeProcessorMT::Process",
67 "An error occurred while getting tree %s from file %s: skipping this file.",
68 treeName.c_str(), fileNameC);
69 clustersPerFile.emplace_back(std::vector<EntryCluster>());
70 entriesPerFile.emplace_back(0ULL);
74 auto clusterIter = t->GetClusterIterator(0);
76 const Long64_t entries = t->GetEntries();
78 std::vector<EntryCluster> clusters;
79 while ((start = clusterIter()) < entries) {
80 end = clusterIter.GetNextEntry();
82 clusters.emplace_back(
EntryCluster{start + offset, end + offset});
85 clustersPerFile.emplace_back(std::move(clusters));
86 entriesPerFile.emplace_back(entries);
101 std::vector<std::vector<EntryCluster>> eventRangesPerFile(clustersPerFile.size());
102 auto clustersPerFileIt = clustersPerFile.begin();
103 auto eventRangesPerFileIt = eventRangesPerFile.begin();
104 for (; clustersPerFileIt != clustersPerFile.end(); clustersPerFileIt++, eventRangesPerFileIt++) {
105 const auto clustersInThisFileSize = clustersPerFileIt->size();
106 const auto nFolds = clustersInThisFileSize / maxTasksPerFile;
110 std::for_each(clustersPerFileIt->begin(), clustersPerFileIt->end(),
111 [&eventRangesPerFileIt](
const EntryCluster &clust) { eventRangesPerFileIt->emplace_back(clust); });
116 auto nReminderClusters = clustersInThisFileSize % maxTasksPerFile;
117 const auto clustersInThisFile = *clustersPerFileIt;
118 for(
auto i = 0ULL; i < (clustersInThisFileSize-1); ++i) {
119 const auto start = clustersInThisFile[i].start;
124 if (nReminderClusters > 0) {
128 const auto end = clustersInThisFile[i].end;
129 eventRangesPerFileIt->emplace_back(
EntryCluster({start, end}));
133 return std::make_pair(std::move(eventRangesPerFile), std::move(entriesPerFile));
138static std::vector<std::vector<Long64_t>>
GetFriendEntries(
const std::vector<std::pair<std::string, std::string>> &friendNames,
139 const std::vector<std::vector<std::string>> &friendFileNames)
141 std::vector<std::vector<Long64_t>> friendEntries;
142 const auto nFriends = friendNames.size();
143 for (
auto i = 0u; i < nFriends; ++i) {
144 std::vector<Long64_t> nEntries;
145 const auto &thisFriendName = friendNames[i].first;
146 const auto &thisFriendFiles = friendFileNames[i];
147 for (
const auto &fname : thisFriendFiles) {
150 f->GetObject(thisFriendName.c_str(), t);
151 nEntries.emplace_back(t->GetEntries());
153 friendEntries.emplace_back(std::move(nEntries));
156 return friendEntries;
164 if (0 == strcmp(
"TChain",
tree.ClassName())) {
165 auto &chain =
dynamic_cast<const TChain&
>(
tree);
167 if (files && 0 != files->GetEntries()) {
173 if (
auto motherDir =
tree.GetDirectory()) {
174 std::string fullPath(motherDir->GetPath());
176 fullPath +=
tree.GetName();
181 return tree.GetName();
194 std::vector<Internal::NameAlias> friendNames;
195 std::vector<std::vector<std::string>> friendFileNames;
197 const auto friends =
tree.GetListOfFriends();
201 for (
auto fr : *friends) {
205 const auto realName = frTree->
GetName();
206 const auto alias =
tree.GetFriendAlias(frTree);
208 friendNames.emplace_back(std::make_pair(realName, std::string(alias)));
210 friendNames.emplace_back(std::make_pair(realName,
""));
214 friendFileNames.emplace_back();
215 auto &fileNames = friendFileNames.back();
218 const auto frChain =
static_cast<TChain *
>(frTree);
219 for (
auto f : *(frChain->GetListOfFiles())) {
220 fileNames.emplace_back(
f->GetTitle());
223 const auto f = frTree->GetCurrentFile();
225 throw std::runtime_error(
"Friend trees with no associated file are not supported.");
226 fileNames.emplace_back(
f->GetName());
237 std::string treeName;
240 throw std::runtime_error(
"Empty list of files and no tree name provided");
244 TIter next(
f->GetListOfKeys());
245 while (
TKey *key = (
TKey *)next()) {
246 const char *className = key->GetClassName();
247 if (strcmp(className,
"TTree") == 0) {
248 treeName = key->GetName();
252 if (treeName.empty())
253 throw std::runtime_error(
"Cannot find any tree in file " +
fFileNames[0]);
265 : fFileNames({std::string(filename)}), fTreeName(treename.empty() ? FindTreeName() : treename), fFriendInfo() {}
270 throw std::runtime_error(
"The provided list of file names is empty");
272 std::vector<std::string> strings;
273 strings.reserve(views.size());
274 for (
const auto &
v : views)
275 strings.emplace_back(
v);
286 : fFileNames(
CheckAndConvert(filenames)), fTreeName(treename.empty() ? FindTreeName() : treename), fFriendInfo() {}
290 std::vector<std::string> filenames;
297 throw std::runtime_error(
"The provided chain of files is empty");
298 filenames.reserve(nFiles);
299 for (
auto f : *filelist)
300 filenames.emplace_back(
f->GetTitle());
302 TFile *
f =
tree.GetCurrentFile();
304 const auto msg =
"The specified TTree is not linked to any file, in-memory-only trees are not supported.";
305 throw std::runtime_error(msg);
308 filenames.emplace_back(
f->GetName());
320 fFriendInfo(GetFriendInfo(
tree)) {}
351 const bool hasFriends = !friendNames.empty();
353 const bool shouldRetrieveAllClusters = hasFriends || hasEntryList;
354 const auto clustersAndEntries =
356 const auto &clusters = clustersAndEntries.first;
357 const auto &entries = clustersAndEntries.second;
360 const auto friendEntries =
366 auto processFile = [&](std::size_t fileIdx) {
368 const auto &theseFiles = shouldRetrieveAllClusters ?
fFileNames : std::vector<std::string>({
fFileNames[fileIdx]});
370 const auto theseClustersAndEntries =
374 const auto &thisFileClusters = shouldRetrieveAllClusters ? clusters[fileIdx] : theseClustersAndEntries.first[0];
377 const auto &theseEntries =
378 shouldRetrieveAllClusters ? entries : std::vector<Long64_t>({theseClustersAndEntries.second[0]});
381 std::unique_ptr<TTreeReader>
reader;
382 std::unique_ptr<TEntryList> elist;
388 pool.
Foreach(processCluster, thisFileClusters);
391 std::vector<std::size_t> fileIdxs(
fFileNames.size());
392 std::iota(fileIdxs.begin(), fileIdxs.end(), 0u);
397 pool.
Foreach(processFile, fileIdxs);
void Error(const char *location, const char *msgfmt,...)
std::vector< std::string > GetFilesFromTree(TTree &tree)
std::vector< std::string > CheckAndConvert(const std::vector< std::string_view > &views)
This class provides a simple interface to execute the same task multiple times in parallel,...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
A class to process the entries of a TTree in parallel.
static unsigned int GetMaxTasksPerFilePerWorker()
Sets the maximum number of tasks created per file, per worker.
const std::string fTreeName
Name of the tree.
const std::vector< std::string > fFileNames
Names of the files.
static void SetMaxTasksPerFilePerWorker(unsigned int m)
Sets the maximum number of tasks created per file, per worker.
static unsigned int fgMaxTasksPerFilePerWorker
TTreeProcessorMT(std::string_view filename, std::string_view treename="")
Constructor based on a file name.
const TEntryList fEntryList
User-defined selection of entry numbers to be processed, empty if none was provided.
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > treeView
! Thread-local TreeViews
const Internal::FriendInfo fFriendInfo
std::string FindTreeName()
Retrieve the name of the first TTree in the first input file, else throw.
A chain is a collection of files containing TTree objects.
TObjArray * GetListOfFiles() const
A List of entry numbers in a TTree or TChain.
virtual Long64_t GetN() const
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
A TFriendElement TF describes a TTree object TF in a file.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
virtual const char * GetName() const
Returns name of object.
Int_t GetEntries() const
Return the number of objects in array (i.e.
TObject * At(Int_t idx) const
virtual const char * GetName() const
Returns name of object.
A simple, robust and fast interface to read values from ROOT colmnar datasets such as TTree,...
std::pair< std::vector< std::vector< EntryCluster > >, std::vector< Long64_t > > ClustersAndEntries
Return a vector of cluster boundaries for the given tree and files.
static std::vector< std::vector< Long64_t > > GetFriendEntries(const std::vector< std::pair< std::string, std::string > > &friendNames, const std::vector< std::vector< std::string > > &friendFileNames)
Return a vector containing the number of entries of each file of each friend TChain.
static ClustersAndEntries MakeClusters(const std::string &treeName, const std::vector< std::string > &fileNames)
static std::string GetTreeFullPath(const TTree &tree)
Return the full path of the tree.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Namespace for new ROOT classes and functions.
UInt_t GetImplicitMTPoolSize()
Returns the size of the pool used for implicit multi-threading.
basic_string_view< char > string_view
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.
std::vector< Internal::NameAlias > fFriendNames
Pairs of names and aliases of friend trees/chains.