Logo ROOT   6.12/07
Reference Guide
TTreeProcessorMT.cxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Enric Tejedor, CERN 12/09/2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class ROOT::TTreeProcessorMT
13  \ingroup Parallelism
14  \brief A class to process the entries of a TTree in parallel.
15 
16 By means of its Process method, ROOT::TTreeProcessorMT provides a way to process the
17 entries of a TTree in parallel. When invoking TTreeProcessor::Process, the user
18 passes a function whose only parameter is a TTreeReader. The function iterates
19 on a subrange of entries by using that TTreeReader.
20 
21 The implementation of ROOT::TTreeProcessorMT parallelizes the processing of the subranges,
22 each corresponding to a cluster in the TTree. This is possible thanks to the use
23 of a ROOT::TThreadedObject, so that each thread works with its own TFile and TTree
24 objects.
25 */
26 
27 #include "TROOT.h"
29 #include "ROOT/TThreadExecutor.hxx"
30 
31 using namespace ROOT;
32 
33 ////////////////////////////////////////////////////////////////////////
34 /// Constructor based on a file name.
35 /// \param[in] filename Name of the file containing the tree to process.
36 /// \param[in] treename Name of the tree to process. If not provided,
37 /// the implementation will automatically search for a
38 /// tree in the file.
39 TTreeProcessorMT::TTreeProcessorMT(std::string_view filename, std::string_view treename) : treeView(filename, treename) {}
40 
41 ////////////////////////////////////////////////////////////////////////
42 /// Constructor based on a collection of file names.
43 /// \param[in] filenames Collection of the names of the files containing the tree to process.
44 /// \param[in] treename Name of the tree to process. If not provided,
45 /// the implementation will automatically search for a
46 /// tree in the collection of files.
47 TTreeProcessorMT::TTreeProcessorMT(const std::vector<std::string_view> &filenames, std::string_view treename) : treeView(filenames, treename) {}
48 
49 ////////////////////////////////////////////////////////////////////////
50 /// Constructor based on a TTree.
51 /// \param[in] tree Tree or chain of files containing the tree to process.
53 
54 ////////////////////////////////////////////////////////////////////////
55 /// Constructor based on a TTree and a TEntryList.
56 /// \param[in] tree Tree or chain of files containing the tree to process.
57 /// \param[in] entries List of entry numbers to process.
58 TTreeProcessorMT::TTreeProcessorMT(TTree &tree, TEntryList &entries) : treeView(tree, entries) {}
59 
60 ////////////////////////////////////////////////////////////////////////
61 /// Divide input data in clusters, i.e. the workloads to distribute to tasks
62 std::vector<ROOT::Internal::TreeViewCluster> TTreeProcessorMT::MakeClusters()
63 {
65  std::vector<ROOT::Internal::TreeViewCluster> clusters;
66  const auto &fileNames = treeView->GetFileNames();
67  const auto nFileNames = fileNames.size();
68  const auto &treeName = treeView->GetTreeName();
69  Long64_t offset = 0;
70  for (auto i = 0u; i < nFileNames; ++i) { // TTreeViewCluster requires the index of the file the cluster belongs to
71  std::unique_ptr<TFile> f(TFile::Open(fileNames[i].c_str())); // need TFile::Open to load plugins if need be
72  TTree *t = nullptr; // not a leak, t will be deleted by f
73  f->GetObject(treeName.c_str(), t);
74  auto clusterIter = t->GetClusterIterator(0);
75  Long64_t start = 0, end = 0;
76  const Long64_t entries = t->GetEntries();
77  // Iterate over the clusters in the current file and generate a task for each of them
78  while ((start = clusterIter()) < entries) {
79  end = clusterIter.GetNextEntry();
80  // Add the current file's offset to start and end to make them (chain) global
81  clusters.emplace_back(ROOT::Internal::TreeViewCluster{start + offset, end + offset});
82  }
83  offset += entries;
84  }
85  return clusters;
86 }
87 
88 //////////////////////////////////////////////////////////////////////////////
89 /// Process the entries of a TTree in parallel. The user-provided function
90 /// receives a TTreeReader which can be used to iterate on a subrange of
91 /// entries
92 /// ~~~{.cpp}
93 /// TTreeProcessorMT::Process([](TTreeReader& readerSubRange) {
94 /// // Select branches to read
95 /// while (readerSubRange.next()) {
96 /// // Use content of current entry
97 /// }
98 /// });
99 /// ~~~
100 /// The user needs to be aware that each of the subranges can potentially
101 /// be processed in parallel. This means that the code of the user function
102 /// should be thread safe.
103 ///
104 /// \param[in] func User-defined function that processes a subrange of entries
106 {
107  // Enable this IMT use case (activate its locks)
109 
110  auto clusters = MakeClusters();
111 
112  auto mapFunction = [this, &func](const ROOT::Internal::TreeViewCluster &c) {
113  // This task will operate with the tree that contains startEntry
114  treeView->PushLoadedEntry(c.startEntry);
115 
116  auto readerAndEntryList = treeView->GetTreeReader(c.startEntry, c.endEntry);
117  auto &reader = std::get<0>(readerAndEntryList);
118  func(*reader);
119 
120  // In case of task interleaving, we need to load here the tree of the parent task
122  };
123 
124  // Assume number of threads has been initialized via ROOT::EnableImplicitMT
125  TThreadExecutor pool;
126  pool.Foreach(mapFunction, clusters);
127 }
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
const std::vector< std::string > & GetFileNames() const
Get the filenames for this view.
long long Long64_t
Definition: RtypesCore.h:69
TTreeReader is a simple, robust and fast interface to read values from a TTree, TChain or TNtuple...
Definition: TTreeReader.h:43
basic_string_view< char > string_view
Definition: RStringView.h:35
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
A cluster of entries as seen by TTreeView.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3950
std::string GetTreeName() const
Get the name of the tree of this view.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
This class provides a simple interface to execute the same task multiple times in parallel...
void RestoreLoadedEntry()
Restore the tree of the previous loaded entry, if any.
TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end)
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
std::vector< ROOT::Internal::TreeViewCluster > MakeClusters()
Divide input data in clusters, i.e. the workloads to distribute to tasks.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > treeView
! Thread-local TreeViews
void PushLoadedEntry(Long64_t entry)
Push a new loaded entry to the stack.
Definition: tree.py:1
TTreeProcessorMT(std::string_view filename, std::string_view treename="")
Constructor based on a file name.
A List of entry numbers in a TTree or TChain.
Definition: TEntryList.h:25