41static bool ClustersAreSortedAndContiguous(
const std::vector<std::vector<EntryCluster>> &cls)
44 for (
const auto &fcl : cls) {
45 for (
const auto &
c : fcl) {
46 if (last_end !=
c.start)
61static std::vector<std::vector<EntryCluster>>
62ConvertToElistClusters(std::vector<std::vector<EntryCluster>> &&clusters,
TEntryList &entryList,
63 const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames,
64 const std::vector<Long64_t> &entriesPerFile)
67 R__ASSERT(ClustersAreSortedAndContiguous(clusters));
69 const bool listHasGlobalEntryNumbers = entryList.
GetLists() ==
nullptr;
70 const auto nFiles = clusters.size();
72 std::unique_ptr<TChain> chain;
77 if (listHasGlobalEntryNumbers) {
85 for (
auto i = 0u; i < nFiles; ++i)
86 chain->Add((fileNames[i] +
"?#" + treeNames[i]).c_str(), entriesPerFile[i]);
90 Long64_t localEntry = elist.GetEntryAndTree(elEntry, treenum);
91 if (localEntry == -1ll)
93 return localEntry + ch->GetTreeOffset()[treenum];
102 std::vector<std::vector<EntryCluster>> elistClusters;
104 for (
auto fileN = 0u; fileN < nFiles; ++fileN) {
105 std::vector<EntryCluster> elistClustersForFile;
106 for (
const auto &
c : clusters[fileN]) {
107 if (entry >=
c.end || entry == -1ll)
110 const Long64_t elistRangeStart = elistEntry;
112 while (entry <
c.end && entry != -1ll)
113 entry = Next(elistEntry, entryList, chain.get());
114 elistClustersForFile.emplace_back(EntryCluster{elistRangeStart, elistEntry});
116 elistClusters.emplace_back(std::move(elistClustersForFile));
119 R__ASSERT(elistClusters.size() == clusters.size());
120 R__ASSERT(ClustersAreSortedAndContiguous(elistClusters));
123 return elistClusters;
127using ClustersAndEntries = std::pair<std::vector<std::vector<EntryCluster>>, std::vector<Long64_t>>;
131static ClustersAndEntries MakeClusters(
const std::vector<std::string> &treeNames,
132 const std::vector<std::string> &fileNames,
const unsigned int maxTasksPerFile)
137 const auto nFileNames = fileNames.size();
138 std::vector<std::vector<EntryCluster>> clustersPerFile;
139 std::vector<Long64_t> entriesPerFile;
140 entriesPerFile.reserve(nFileNames);
142 for (
auto i = 0u; i < nFileNames; ++i) {
143 const auto &fileName = fileNames[i];
144 const auto &treeName = treeNames[i];
147 fileName.c_str(),
"READ_WITHOUT_GLOBALREGISTRATION"));
148 if (!
f ||
f->IsZombie()) {
149 const auto msg =
"TTreeProcessorMT::Process: an error occurred while opening file \"" + fileName +
"\"";
150 throw std::runtime_error(msg);
152 auto *t =
f->Get<
TTree>(treeName.c_str());
155 const auto msg =
"TTreeProcessorMT::Process: an error occurred while getting tree \"" + treeName +
156 "\" from file \"" + fileName +
"\"";
157 throw std::runtime_error(msg);
163 auto clusterIter = t->GetClusterIterator(0);
165 const Long64_t entries = t->GetEntries();
167 std::vector<EntryCluster> clusters;
168 while ((start = clusterIter()) < entries) {
169 end = clusterIter.GetNextEntry();
171 clusters.emplace_back(EntryCluster{start +
offset, end +
offset});
174 clustersPerFile.emplace_back(std::move(clusters));
175 entriesPerFile.emplace_back(entries);
193 std::vector<std::vector<EntryCluster>> eventRangesPerFile(clustersPerFile.size());
194 auto clustersPerFileIt = clustersPerFile.begin();
195 auto eventRangesPerFileIt = eventRangesPerFile.begin();
196 for (; clustersPerFileIt != clustersPerFile.end(); clustersPerFileIt++, eventRangesPerFileIt++) {
197 const auto clustersInThisFileSize = clustersPerFileIt->size();
198 const auto nFolds = clustersInThisFileSize / maxTasksPerFile;
202 *eventRangesPerFileIt = std::move(*clustersPerFileIt);
207 auto nReminderClusters = clustersInThisFileSize % maxTasksPerFile;
208 const auto &clustersInThisFile = *clustersPerFileIt;
209 for (
auto i = 0ULL; i < clustersInThisFileSize; ++i) {
210 const auto start = clustersInThisFile[i].start;
215 if (nReminderClusters > 0) {
219 const auto end = clustersInThisFile[i].end;
220 eventRangesPerFileIt->emplace_back(EntryCluster({start, end}));
224 return std::make_pair(std::move(eventRangesPerFile), std::move(entriesPerFile));
236 std::vector<std::vector<Long64_t>> friendEntries;
237 const auto nFriends = friendNames.size();
238 for (
auto i = 0u; i < nFriends; ++i) {
239 std::vector<Long64_t> nEntries;
240 const auto &thisFriendName = friendNames[i].first;
241 const auto &thisFriendFiles = friendFileNames[i];
242 const auto &thisFriendChainSubNames = friendChainSubNames[i];
247 if (!thisFriendChainSubNames.empty()) {
249 for (
auto fileidx = 0u; fileidx < thisFriendFiles.size(); ++fileidx) {
250 std::unique_ptr<TFile> curfile(
251 TFile::Open(thisFriendFiles[fileidx].c_str(),
"READ_WITHOUT_GLOBALREGISTRATION"));
252 if (!curfile || curfile->IsZombie())
253 throw std::runtime_error(
"TTreeProcessorMT::GetFriendEntries: Could not open file \"" +
254 thisFriendFiles[fileidx] +
"\"");
257 TTree *curtree = curfile->Get<
TTree>(thisFriendChainSubNames[fileidx].c_str());
259 throw std::runtime_error(
"TTreeProcessorMT::GetFriendEntries: Could not retrieve TTree \"" +
260 thisFriendChainSubNames[fileidx] +
"\" from file \"" +
261 thisFriendFiles[fileidx] +
"\"");
268 for (
const auto &fname : thisFriendFiles) {
269 std::unique_ptr<TFile>
f(
TFile::Open(fname.c_str(),
"READ_WITHOUT_GLOBALREGISTRATION"));
270 if (!
f ||
f->IsZombie())
271 throw std::runtime_error(
"TTreeProcessorMT::GetFriendEntries: Could not open file \"" + fname +
"\"");
274 throw std::runtime_error(
"TTreeProcessorMT::GetFriendEntries: Could not retrieve TTree \"" +
275 thisFriendName +
"\" from file \"" + fname +
"\"");
280 friendEntries.emplace_back(std::move(nEntries));
283 return friendEntries;
303 const std::vector<std::vector<Long64_t>> &friendEntries)
311 const auto nFiles = fileNames.size();
312 for (
auto i = 0u; i < nFiles; ++i) {
313 fChain->Add((fileNames[i] +
"?#" + treeNames[i]).c_str(), nEntries[i]);
319 const auto nFriends = friendNames.size();
320 for (
auto i = 0u; i < nFriends; ++i) {
321 const auto &thisFriendNameAlias = friendNames[i];
322 const auto &thisFriendName = thisFriendNameAlias.first;
323 const auto &thisFriendAlias = thisFriendNameAlias.second;
324 const auto &thisFriendFiles = friendFileNames[i];
325 const auto &thisFriendChainSubNames = friendChainSubNames[i];
326 const auto &thisFriendEntries = friendEntries[i];
330 const auto nFileNames = friendFileNames[i].size();
331 if (thisFriendChainSubNames.empty()) {
334 for (
auto j = 0u; j < nFileNames; ++j) {
335 frChain->Add(thisFriendFiles[j].c_str(), thisFriendEntries[j]);
340 for (
auto j = 0u; j < nFileNames; ++j) {
341 frChain->Add((thisFriendFiles[j] +
"?#" + thisFriendChainSubNames[j]).c_str(), thisFriendEntries[j]);
346 fChain->AddFriend(frChain.get(), thisFriendAlias.c_str());
347 fFriends.emplace_back(std::move(frChain));
353std::unique_ptr<TTreeReader>
356 const TEntryList &entryList,
const std::vector<Long64_t> &nEntries,
357 const std::vector<std::vector<Long64_t>> &friendEntries)
359 const bool hasEntryList = entryList.
GetN() > 0;
360 const bool usingLocalEntries = friendInfo.
fFriendNames.empty() && !hasEntryList;
361 const bool needNewChain =
362 fChain ==
nullptr || (usingLocalEntries && (fileNames[0] !=
fChain->GetListOfFiles()->At(0)->GetTitle() ||
363 treeNames[0] !=
fChain->GetListOfFiles()->At(0)->GetName()));
365 MakeChain(treeNames, fileNames, friendInfo, nEntries, friendEntries);
376 auto reader = std::make_unique<TTreeReader>(
fChain.get(),
fEntryList.get());
377 reader->SetEntriesRange(start, end);
395std::vector<std::string> TTreeProcessorMT::FindTreeNames()
397 std::vector<std::string> treeNames;
400 throw std::runtime_error(
"Empty list of files and no tree name provided");
404 std::string treeName;
406 TIter next(
f->GetListOfKeys());
407 while (
auto *key =
static_cast<TKey *
>(next())) {
408 const char *className = key->GetClassName();
409 if (strcmp(className,
"TTree") == 0) {
410 treeName = key->GetName();
414 if (treeName.empty())
415 throw std::runtime_error(
"Cannot find any tree in file " + fname);
416 treeNames.emplace_back(std::move(treeName));
430 : fFileNames({std::string(
filename)}),
431 fTreeNames(treename.empty() ? FindTreeNames() : std::vector<std::string>{std::string(treename)}), fFriendInfo(),
440 throw std::runtime_error(
"The provided list of file names is empty");
442 std::vector<std::string> strings;
443 strings.reserve(views.size());
444 for (
const auto &
v : views)
445 strings.emplace_back(
v);
463 fTreeNames(treename.empty() ? FindTreeNames()
464 : std::vector<std::string>(fFileNames.
size(), std::string(treename))),
465 fFriendInfo(), fPool(nThreads)
511 const unsigned int maxTasksPerFile =
521 const bool shouldRetrieveAllClusters = hasFriends || hasEntryList;
522 ClustersAndEntries allClusterAndEntries{};
523 auto &allClusters = allClusterAndEntries.first;
524 const auto &allEntries = allClusterAndEntries.second;
525 if (shouldRetrieveAllClusters) {
532 auto processFileUsingGlobalClusters = [&](std::size_t fileIdx) {
533 auto processCluster = [&](
const EntryCluster &
c) {
542 auto processFileRetrievingClusters = [&](std::size_t fileIdx) {
544 const auto &treeNames = std::vector<std::string>({
fTreeNames[fileIdx]});
545 const auto &fileNames = std::vector<std::string>({
fFileNames[fileIdx]});
546 const auto clustersAndEntries = MakeClusters(treeNames, fileNames, maxTasksPerFile);
547 const auto &clusters = clustersAndEntries.first[0];
548 const auto &entries = clustersAndEntries.second[0];
549 auto processCluster = [&](
const EntryCluster &
c) {
551 std::vector<std::vector<Long64_t>>{});
557 std::vector<std::size_t> fileIdxs(fFileNames.size());
558 std::iota(fileIdxs.begin(), fileIdxs.end(), 0u);
560 if (shouldRetrieveAllClusters)
561 fPool.Foreach(processFileUsingGlobalClusters, fileIdxs);
563 fPool.Foreach(processFileRetrievingClusters, fileIdxs);
566 for (
unsigned int islot = 0; islot < fTreeView.GetNSlots(); ++islot) {
568 if (view !=
nullptr) {
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
std::vector< std::string > CheckAndConvert(const std::vector< std::string_view > &views)
std::unique_ptr< TChain > fChain
Chain on which to operate.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain, if present.
std::unique_ptr< TEntryList > fEntryList
TEntryList for fChain, if present.
void Reset()
Clear the resources.
void MakeChain(const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const TreeUtils::RFriendInfo &friendInfo, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Construct fChain, also adding friends if needed and injecting knowledge of offsets if available.
std::unique_ptr< TTreeReader > GetTreeReader(Long64_t start, Long64_t end, const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const TreeUtils::RFriendInfo &friendInfo, const TEntryList &entryList, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t > > &friendEntries)
Get a TTreeReader for the current tree of this view.
ROOT::Internal::TreeUtils::RNoCleanupNotifier fNoCleanupNotifier
void RegisterChain(TChain &c)
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
A class to process the entries of a TTree in parallel.
const std::vector< std::string > fTreeNames
TTree names (always same size and ordering as fFileNames)
const std::vector< std::string > fFileNames
Names of the files.
static unsigned int fgTasksPerWorkerHint
const Internal::TreeUtils::RFriendInfo fFriendInfo
ROOT::TThreadExecutor fPool
! Thread pool for processing.
TEntryList fEntryList
User-defined selection of entry numbers to be processed, empty if none was provided.
static void SetTasksPerWorkerHint(unsigned int m)
Set the hint for the desired number of tasks created per worker.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > fTreeView
Thread-local TreeViews.
TTreeProcessorMT(std::string_view filename, std::string_view treename="", UInt_t nThreads=0u)
Constructor based on a file name.
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
static unsigned int GetTasksPerWorkerHint()
Retrieve the current value for the desired number of tasks per worker.
A chain is a collection of files containing TTree objects.
@ kWithoutGlobalRegistration
TDirectory::TContext keeps track and restore the current directory.
A List of entry numbers in a TTree or TChain.
virtual TList * GetLists() const
virtual Long64_t GetEntry(Int_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next().
virtual Long64_t GetN() const
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
@ kCanDelete
if object in a list can be deleted
@ kMustCleanup
if object destructor must call RecursiveRemove()
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
virtual Long64_t GetEntries() const
Different standalone functions to work with trees and tuples, not reqiuired to be a member of any cla...
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
Retrieve the full path(s) to a TTree or the trees in a TChain.
RFriendInfo GetFriendInfo(const TTree &tree)
Get and store the names, aliases and file names of the direct friends of the tree.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
RVec< PromoteType< T > > ceil(const RVec< T > &v)
basic_string_view< char > string_view
void ClearMustCleanupBits(TObjArray &arr)
Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
void EnableThreadSafety()
Enables the global mutex to make ROOT thread safe/aware.
Information about friend trees of a certain TTree or TChain object.
std::vector< NameAlias > fFriendNames
Pairs of names and aliases of friend trees/chains.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where each friend is stored.
std::vector< std::vector< std::string > > fFriendChainSubNames
Names of the subtrees of a friend TChain.