34using EntryRange = std::pair<Long64_t, Long64_t>;
37static bool ClustersAreSortedAndContiguous(
const std::vector<std::vector<EntryRange>> &cls)
40 for (
const auto &fcl : cls) {
41 for (
const auto &
c : fcl) {
42 if (last_end !=
c.first)
57static std::vector<std::vector<EntryRange>>
58ConvertToElistClusters(std::vector<std::vector<EntryRange>> &&clusters,
TEntryList &entryList,
59 const std::vector<std::string> &treeNames,
const std::vector<std::string> &fileNames,
60 const std::vector<Long64_t> &entriesPerFile)
63 R__ASSERT(ClustersAreSortedAndContiguous(clusters));
65 const bool listHasGlobalEntryNumbers = entryList.
GetLists() ==
nullptr;
66 const auto nFiles = clusters.size();
68 std::unique_ptr<TChain> chain;
73 if (listHasGlobalEntryNumbers) {
81 for (
auto i = 0u; i < nFiles; ++i)
82 chain->Add((fileNames[i] +
"?#" + treeNames[i]).c_str(), entriesPerFile[i]);
86 Long64_t localEntry = elist.GetEntryAndTree(elEntry, treenum);
87 if (localEntry == -1ll)
89 return localEntry + ch->GetTreeOffset()[treenum];
98 std::vector<std::vector<EntryRange>> elistClusters;
100 for (
auto fileN = 0u; fileN < nFiles; ++fileN) {
101 std::vector<EntryRange> elistClustersForFile;
102 for (
const auto &
c : clusters[fileN]) {
103 if (entry >=
c.second || entry == -1ll)
106 const Long64_t elistRangeStart = elistEntry;
108 while (entry <
c.second && entry != -1ll)
109 entry = Next(elistEntry, entryList, chain.get());
110 elistClustersForFile.emplace_back(EntryRange{elistRangeStart, elistEntry});
112 elistClusters.emplace_back(std::move(elistClustersForFile));
115 R__ASSERT(elistClusters.size() == clusters.size());
116 R__ASSERT(ClustersAreSortedAndContiguous(elistClusters));
119 return elistClusters;
123using ClustersAndEntries = std::pair<std::vector<std::vector<EntryRange>>, std::vector<Long64_t>>;
127static ClustersAndEntries MakeClusters(
const std::vector<std::string> &treeNames,
128 const std::vector<std::string> &fileNames,
const unsigned int maxTasksPerFile,
129 const EntryRange &range = {0, std::numeric_limits<Long64_t>::max()})
134 const auto nFileNames = fileNames.size();
135 std::vector<std::vector<EntryRange>> clustersPerFile;
136 std::vector<Long64_t> entriesPerFile;
137 entriesPerFile.reserve(nFileNames);
139 bool rangeEndReached =
false;
140 for (
auto i = 0u; i < nFileNames && !rangeEndReached; ++i) {
141 const auto &fileName = fileNames[i];
142 const auto &treeName = treeNames[i];
145 fileName.c_str(),
"READ_WITHOUT_GLOBALREGISTRATION"));
146 if (!
f ||
f->IsZombie()) {
147 const auto msg =
"TTreeProcessorMT::Process: an error occurred while opening file \"" + fileName +
"\"";
148 throw std::runtime_error(msg);
150 auto *t =
f->Get<
TTree>(treeName.c_str());
153 const auto msg =
"TTreeProcessorMT::Process: an error occurred while getting tree \"" + treeName +
154 "\" from file \"" + fileName +
"\"";
155 throw std::runtime_error(msg);
161 auto clusterIter = t->GetClusterIterator(0);
162 Long64_t clusterStart = 0ll, clusterEnd = 0ll;
163 const Long64_t entries = t->GetEntries();
165 std::vector<EntryRange> entryRanges;
166 while ((clusterStart = clusterIter()) < entries && !rangeEndReached) {
167 clusterEnd = clusterIter.GetNextEntry();
177 const auto currentStart = std::max(clusterStart +
offset, range.first);
178 const auto currentEnd = std::min(clusterEnd +
offset, range.second);
181 if (currentStart < currentEnd)
182 entryRanges.emplace_back(EntryRange{currentStart, currentEnd});
183 if (currentEnd == range.second)
184 rangeEndReached =
true;
187 clustersPerFile.emplace_back(std::move(entryRanges));
189 entriesPerFile.emplace_back(entries);
192 throw std::logic_error(std::string(
"A range of entries was passed in the creation of the TTreeProcessorMT, ") +
193 "but the starting entry (" + range.first +
") is larger than the total number of " +
194 "entries (" +
offset +
") in the dataset.");
211 std::vector<std::vector<EntryRange>> eventRangesPerFile(clustersPerFile.size());
212 auto clustersPerFileIt = clustersPerFile.begin();
213 auto eventRangesPerFileIt = eventRangesPerFile.begin();
214 for (; clustersPerFileIt != clustersPerFile.end(); clustersPerFileIt++, eventRangesPerFileIt++) {
215 const auto clustersInThisFileSize = clustersPerFileIt->size();
216 const auto nFolds = clustersInThisFileSize / maxTasksPerFile;
220 *eventRangesPerFileIt = std::move(*clustersPerFileIt);
225 auto nReminderClusters = clustersInThisFileSize % maxTasksPerFile;
226 const auto &clustersInThisFile = *clustersPerFileIt;
227 for (
auto i = 0ULL; i < clustersInThisFileSize; ++i) {
228 const auto start = clustersInThisFile[i].first;
233 if (nReminderClusters > 0) {
237 const auto end = clustersInThisFile[i].second;
238 eventRangesPerFileIt->emplace_back(EntryRange({
start,
end}));
242 return std::make_pair(std::move(eventRangesPerFile), std::move(entriesPerFile));
268 const auto nFilesToProcess = nEntries.size();
269 for (
auto i = 0u; i < nFilesToProcess; ++i) {
270 fChain->Add((fileNames[i] +
"?#" + treeNames[i]).c_str(), nEntries[i]);
276 R__ASSERT(nFriends ==
fFriends.size() &&
"Created the wrong number of friends from the available information.");
277 for (std::size_t i = 0ul; i < nFriends; i++) {
278 const auto &thisFriendAlias = friendInfo.
fFriendNames[i].second;
285std::unique_ptr<TTreeReader>
288 const TEntryList &entryList,
const std::vector<Long64_t> &nEntries)
290 const bool hasEntryList = entryList.
GetN() > 0;
291 const bool usingLocalEntries = friendInfo.
fFriendNames.empty() && !hasEntryList;
292 const bool needNewChain =
293 fChain ==
nullptr || (usingLocalEntries && (fileNames[0] !=
fChain->GetListOfFiles()->At(0)->GetTitle() ||
294 treeNames[0] !=
fChain->GetListOfFiles()->At(0)->GetName()));
296 MakeChain(treeNames, fileNames, friendInfo, nEntries);
307 auto reader = std::make_unique<TTreeReader>(
fChain.get(),
fEntryList.get());
308 reader->SetEntriesRange(start, end);
328 std::vector<std::string> treeNames;
331 throw std::runtime_error(
"Empty list of files and no tree name provided");
335 std::string treeName;
337 TIter next(
f->GetListOfKeys());
338 while (
auto *key =
static_cast<TKey *
>(next())) {
339 const char *className = key->GetClassName();
340 if (strcmp(className,
"TTree") == 0) {
341 treeName = key->GetName();
345 if (treeName.empty())
346 throw std::runtime_error(
"Cannot find any tree in file " + fname);
347 treeNames.emplace_back(std::move(treeName));
362 const EntryRange &globalRange)
363 : fFileNames({std::string(
filename)}),
364 fTreeNames(treename.empty() ? FindTreeNames() : std::vector<std::string>{std::string(treename)}), fFriendInfo(),
365 fPool(nThreads), fGlobalRange(globalRange)
373 throw std::runtime_error(
"The provided list of file names is empty");
375 std::vector<std::string> strings;
376 strings.reserve(views.size());
377 for (
const auto &
v : views)
378 strings.emplace_back(
v);
395 UInt_t nThreads,
const EntryRange &globalRange)
397 fTreeNames(treename.empty() ? FindTreeNames()
398 : std::vector<std::string>(fFileNames.
size(), std::string(treename))),
399 fFriendInfo(), fPool(nThreads), fGlobalRange(globalRange)
411 : fFileNames(Internal::
TreeUtils::GetFileNamesFromTree(
tree)),
427 : fFileNames(Internal::
TreeUtils::GetFileNamesFromTree(
tree)),
431 fGlobalRange(globalRange)
455 const unsigned int maxTasksPerFile =
465 const bool shouldRetrieveAllClusters = hasFriends || hasEntryList ||
fGlobalRange.first > 0 ||
466 fGlobalRange.second != std::numeric_limits<Long64_t>::max();
467 ClustersAndEntries allClusterAndEntries{};
468 auto &allClusters = allClusterAndEntries.first;
469 const auto &allEntries = allClusterAndEntries.second;
470 if (shouldRetrieveAllClusters) {
477 auto processFileUsingGlobalClusters = [&](std::size_t fileIdx) {
478 auto processCluster = [&](
const EntryRange &
c) {
487 auto processFileRetrievingClusters = [&](std::size_t fileIdx) {
489 const auto &treeNames = std::vector<std::string>({
fTreeNames[fileIdx]});
490 const auto &fileNames = std::vector<std::string>({
fFileNames[fileIdx]});
491 const auto clustersAndEntries = MakeClusters(treeNames, fileNames, maxTasksPerFile);
492 const auto &clusters = clustersAndEntries.first[0];
493 const auto &entries = clustersAndEntries.second[0];
494 auto processCluster = [&](
const EntryRange &
c) {
501 const auto firstNonEmpty =
502 fGlobalRange.first > 0u ? std::distance(allClusters.begin(), std::find_if(allClusters.begin(), allClusters.end(),
503 [](
auto &
c) { return !c.empty(); }))
506 std::vector<std::size_t> fileIdxs(allEntries.empty() ? fFileNames.size() : allEntries.
size() - firstNonEmpty);
507 std::iota(fileIdxs.begin(), fileIdxs.end(), firstNonEmpty);
509 if (shouldRetrieveAllClusters)
510 fPool.Foreach(processFileUsingGlobalClusters, fileIdxs);
512 fPool.Foreach(processFileRetrievingClusters, fileIdxs);
515 for (
unsigned int islot = 0; islot < fTreeView.GetNSlots(); ++islot) {
517 if (view !=
nullptr) {
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
std::vector< std::string > CheckAndConvert(const std::vector< std::string_view > &views)
std::unique_ptr< TChain > fChain
Chain on which to operate.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain, if present.
std::unique_ptr< TEntryList > fEntryList
TEntryList for fChain, if present.
void Reset()
Clear the resources.
std::unique_ptr< TTreeReader > GetTreeReader(Long64_t start, Long64_t end, const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const ROOT::TreeUtils::RFriendInfo &friendInfo, const TEntryList &entryList, const std::vector< Long64_t > &nEntries)
Get a TTreeReader for the current tree of this view.
void MakeChain(const std::vector< std::string > &treeName, const std::vector< std::string > &fileNames, const ROOT::TreeUtils::RFriendInfo &friendInfo, const std::vector< Long64_t > &nEntries)
Construct fChain, also adding friends if needed and injecting knowledge of offsets if available.
ROOT::Internal::TreeUtils::RNoCleanupNotifier fNoCleanupNotifier
void RegisterChain(TChain &c)
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
ROOT::TreeUtils::RFriendInfo fFriendInfo
const std::vector< std::string > fTreeNames
TTree names (always same size and ordering as fFileNames)
std::vector< std::string > FindTreeNames()
Retrieve the names of the TTrees in each of the input files, throw if a TTree cannot be found.
const std::vector< std::string > fFileNames
Names of the files.
static unsigned int fgTasksPerWorkerHint
ROOT::TThreadExecutor fPool
! Thread pool for processing.
TEntryList fEntryList
User-defined selection of entry numbers to be processed, empty if none was provided.
static void SetTasksPerWorkerHint(unsigned int m)
Set the hint for the desired number of tasks created per worker.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > fTreeView
Thread-local TreeViews.
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
TTreeProcessorMT(std::string_view filename, std::string_view treename="", UInt_t nThreads=0u, const std::pair< Long64_t, Long64_t > &globalRange={0, std::numeric_limits< Long64_t >::max()})
static unsigned int GetTasksPerWorkerHint()
Retrieve the current value for the desired number of tasks per worker.
std::pair< Long64_t, Long64_t > fGlobalRange
A chain is a collection of files containing TTree objects.
TDirectory::TContext keeps track and restore the current directory.
A List of entry numbers in a TTree or TChain.
virtual TList * GetLists() const
virtual Long64_t GetEntry(Long64_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next().
virtual Long64_t GetN() const
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
@ kCanDelete
if object in a list can be deleted
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
Different standalone functions to work with trees and tuples, not reqiuired to be a member of any cla...
std::unique_ptr< TChain > MakeChainForMT(const std::string &name="", const std::string &title="")
Create a TChain object with options that avoid common causes of thread contention.
std::vector< std::unique_ptr< TChain > > MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
Create friends from the main TTree.
void ClearMustCleanupBits(TObjArray &arr)
Reset the kMustCleanup bit of a TObjArray of TBranch objects (e.g.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
void EnableThreadSafety()
Enable support for multi-threading within the ROOT code in particular, enables the global mutex to ma...
Information about friend trees of a certain TTree or TChain object.
std::vector< std::pair< std::string, std::string > > fFriendNames
Pairs of names and aliases of each friend tree/chain.