Logo ROOT   6.14/05
Reference Guide
TTreeProcessorMT.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Enric Tejedor, CERN 12/09/2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TTreeProcessorMT
13 #define ROOT_TTreeProcessorMT
14 
15 #include "TKey.h"
16 #include "TTree.h"
17 #include "TFile.h"
18 #include "TChain.h"
19 #include "TTreeReader.h"
20 #include "TError.h"
21 #include "TEntryList.h"
22 #include "TFriendElement.h"
23 #include "ROOT/RMakeUnique.hxx"
24 #include "ROOT/TThreadedObject.hxx"
25 
26 #include <string.h>
27 #include <functional>
28 #include <vector>
29 
30 
31 /** \class TTreeView
32  \brief A helper class that encapsulates a file and a tree.
33 
34 A helper class that encapsulates a TFile and a TTree, along with their names.
35 It is used together with TTProcessor and ROOT::TThreadedObject, so that
36 in the TTProcessor::Process method each thread can work on its own
37 <TFile,TTree> pair.
38 
39 This class can also be used with a collection of file names or a TChain, in case
40 the tree is stored in more than one file. A view will always contain only the
41 current (active) tree and file objects.
42 
43 A copy constructor is defined for TTreeView to work with ROOT::TThreadedObject.
44 The latter makes a copy of a model object every time a new thread accesses
45 the threaded object.
46 */
47 
48 namespace ROOT {
49  namespace Internal {
50 
51  /// A cluster of entries
52  struct EntryCluster {
55  };
56 
57  using ClustersAndEntries = std::pair<std::vector<EntryCluster>, std::vector<Long64_t>>;
58  ClustersAndEntries MakeClusters(const std::string &treename, const std::vector<std::string> &filenames);
59 
60  class TTreeView {
61  private:
62  using TreeReaderEntryListPair = std::pair<std::unique_ptr<TTreeReader>, std::unique_ptr<TEntryList>>;
63  using NameAlias = std::pair<std::string, std::string>;
64 
65  // NOTE: fFriends must come before fChain to be deleted after it, see ROOT-9281 for more details
66  std::vector<std::unique_ptr<TChain>> fFriends; ///< Friends of the tree/chain
67  std::unique_ptr<TChain> fChain; ///< Chain on which to operate
68  std::vector<std::string> fFileNames; ///< Names of the files
69  std::string fTreeName; ///< Name of the tree
70  TEntryList fEntryList; ///< User-defined selection of entry numbers to be processed, empty if none was provided
71  std::vector<Long64_t> fLoadedEntries; ///<! Per-task loaded entries (for task interleaving)
72  std::vector<NameAlias> fFriendNames; ///< <name,alias> pairs of the friends of the tree/chain
73  std::vector<std::vector<std::string>> fFriendFileNames; ///< Names of the files where friends are stored
74 
75  ////////////////////////////////////////////////////////////////////////////////
76  /// If not treeName was provided to the ctor, use the name of the first TTree in the first file, else throw.
78  {
79  // If the tree name is empty, look for a tree in the file
80  if (fTreeName.empty()) {
82  std::unique_ptr<TFile> f(TFile::Open(fFileNames[0].c_str()));
83  TIter next(f->GetListOfKeys());
84  while (TKey *key = (TKey *)next()) {
85  const char *className = key->GetClassName();
86  if (strcmp(className, "TTree") == 0) {
87  fTreeName = key->GetName();
88  break;
89  }
90  }
91  if (fTreeName.empty()) {
92  auto msg = "Cannot find any tree in file " + fFileNames[0];
93  throw std::runtime_error(msg);
94  }
95  }
96  }
97 
98  ////////////////////////////////////////////////////////////////////////////////
99  /// Construct fChain, also adding friends if needed and injecting knowledge of offsets if available.
100  void MakeChain(const std::vector<Long64_t> &nEntries, const std::vector<std::vector<Long64_t>> &friendEntries)
101  {
102  fChain.reset(new TChain(fTreeName.c_str()));
103  const auto nFiles = fFileNames.size();
104  for (auto i = 0u; i < nFiles; ++i) {
105  fChain->Add(fFileNames[i].c_str(), nEntries[i]);
106  }
108 
109  fFriends.clear();
110  const auto nFriends = fFriendNames.size();
111  for (auto i = 0u; i < nFriends; ++i) {
112  const auto &friendName = fFriendNames[i];
113  const auto &name = friendName.first;
114  const auto &alias = friendName.second;
115 
116  // Build a friend chain
117  auto frChain = std::make_unique<TChain>(name.c_str());
118  const auto nFileNames = fFriendFileNames[i].size();
119  for (auto j = 0u; j < nFileNames; ++j)
120  frChain->Add(fFriendFileNames[i][j].c_str(), friendEntries[i][j]);
121 
122  // Make it friends with the main chain
123  fChain->AddFriend(frChain.get(), alias.c_str());
124  fFriends.emplace_back(std::move(frChain));
125  }
126  }
127 
128  ////////////////////////////////////////////////////////////////////////////////
129  /// Get and store the names, aliases and file names of the friends of the tree.
130  void StoreFriends(const TTree &tree, bool isTree)
131  {
132  auto friends = tree.GetListOfFriends();
133  if (!friends)
134  return;
135 
136  for (auto fr : *friends) {
137  auto frTree = static_cast<TFriendElement *>(fr)->GetTree();
138 
139  // Check if friend tree has an alias
140  auto realName = frTree->GetName();
141  auto alias = tree.GetFriendAlias(frTree);
142  if (alias) {
143  fFriendNames.emplace_back(std::make_pair(realName, std::string(alias)));
144  } else {
145  fFriendNames.emplace_back(std::make_pair(realName, ""));
146  }
147 
148  // Store the file names of the friend tree
149  fFriendFileNames.emplace_back();
150  auto &fileNames = fFriendFileNames.back();
151  if (isTree) {
152  auto f = frTree->GetCurrentFile();
153  fileNames.emplace_back(f->GetName());
154  } else {
155  auto frChain = static_cast<TChain *>(frTree);
156  for (auto f : *(frChain->GetListOfFiles())) {
157  fileNames.emplace_back(f->GetTitle());
158  }
159  }
160  }
161  }
162 
164  {
165  // TEntryList and SetEntriesRange do not work together (the former has precedence).
166  // We need to construct a TEntryList that contains only those entry numbers in our desired range.
167  auto elist = std::make_unique<TEntryList>();
168  Long64_t entry = fEntryList.GetEntry(0);
169  do {
170  if (entry >= start && entry < end) // TODO can quit this loop early when entry >= end
171  elist->Enter(entry);
172  } while ((entry = fEntryList.Next()) >= 0);
173 
174  auto reader = std::make_unique<TTreeReader>(fChain.get(), elist.get());
175  return std::make_pair(std::move(reader), std::move(elist));
176  }
177 
178  std::unique_ptr<TTreeReader> MakeReader(Long64_t start, Long64_t end)
179  {
180  auto reader = std::make_unique<TTreeReader>(fChain.get());
181  fChain->LoadTree(start - 1);
182  reader->SetEntriesRange(start, end);
183  return reader;
184  }
185 
186  public:
187  //////////////////////////////////////////////////////////////////////////
188  /// Constructor based on a file name.
189  /// \param[in] fn Name of the file containing the tree to process.
190  /// \param[in] tn Name of the tree to process. If not provided,
191  /// the implementation will automatically search for a
192  /// tree in the file.
194  {
195  fFileNames.emplace_back(fn);
196  GetTreeNameIfNeeded();
197  }
198 
199  //////////////////////////////////////////////////////////////////////////
200  /// Constructor based on a collection of file names.
201  /// \param[in] fns Collection of file names containing the tree to process.
202  /// \param[in] tn Name of the tree to process. If not provided,
203  /// the implementation will automatically search for a
204  /// tree in the collection of files.
205  TTreeView(const std::vector<std::string_view>& fns, std::string_view tn) : fTreeName(tn)
206  {
207  if (fns.size() > 0) {
208  for (auto& fn : fns)
209  fFileNames.emplace_back(fn);
210  }
211  else {
212  auto msg = "The provided list of file names is empty, cannot process tree " + fTreeName;
213  throw std::runtime_error(msg);
214  }
215  GetTreeNameIfNeeded();
216  }
217 
218  //////////////////////////////////////////////////////////////////////////
219  /// Constructor based on a TTree.
220  /// \param[in] tree Tree or chain of files containing the tree to process.
221  TTreeView(TTree& tree);
222 
223  //////////////////////////////////////////////////////////////////////////
224  /// Constructor based on a TTree and a TEntryList.
225  /// \param[in] tree Tree or chain of files containing the tree to process.
226  /// \param[in] entries List of entry numbers to process.
227  TTreeView(TTree& tree, TEntryList& entries) : TTreeView(tree)
228  {
229  Long64_t numEntries = entries.GetN();
230  for (Long64_t i = 0; i < numEntries; ++i) {
231  fEntryList.Enter(entries.GetEntry(i));
232  }
233  }
234 
235  //////////////////////////////////////////////////////////////////////////
236  /// Copy constructor.
237  /// \param[in] view Object to copy.
238  TTreeView(const TTreeView &view) : fTreeName(view.fTreeName), fEntryList(view.fEntryList)
239  {
240  for (auto& fn : view.fFileNames)
241  fFileNames.emplace_back(fn);
242 
243  for (auto &fn : view.fFriendNames)
244  fFriendNames.emplace_back(fn);
245 
246  for (auto &ffn : view.fFriendFileNames) {
247  fFriendFileNames.emplace_back();
248  auto &fileNames = fFriendFileNames.back();
249  for (auto &name : ffn) {
250  fileNames.emplace_back(name);
251  }
252  }
253  }
254 
255  //////////////////////////////////////////////////////////////////////////
256  /// Get a TTreeReader for the current tree of this view.
257  TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end, const std::vector<Long64_t> &nEntries,
258  const std::vector<std::vector<Long64_t>> &friendEntries)
259  {
260  if (fChain == nullptr)
261  MakeChain(nEntries, friendEntries);
262 
263  std::unique_ptr<TTreeReader> reader;
264  std::unique_ptr<TEntryList> elist;
265  if (fEntryList.GetN() > 0) {
266  std::tie(reader, elist) = MakeReaderWithEntryList(start, end);
267  } else {
268  reader = MakeReader(start, end);
269  }
270 
271  // we need to return the entry list too, as it needs to be in scope as long as the reader is
272  return std::make_pair(std::move(reader), std::move(elist));
273  }
274 
275  //////////////////////////////////////////////////////////////////////////
276  /// Get the filenames for this view.
277  const std::vector<std::string> &GetFileNames() const
278  {
279  return fFileNames;
280  }
281 
282  //////////////////////////////////////////////////////////////////////////
283  /// Get the name of the tree of this view.
284  std::string GetTreeName() const
285  {
286  return fTreeName;
287  }
288 
289  //////////////////////////////////////////////////////////////////////////
290  /// Push a new loaded entry to the stack.
291  void PushTaskFirstEntry(Long64_t entry) { fLoadedEntries.push_back(entry); }
292 
293  //////////////////////////////////////////////////////////////////////////
294  /// Restore the tree of the previous loaded entry, if any.
296  {
297  fLoadedEntries.pop_back();
298  if (fLoadedEntries.size() > 0) {
299  fChain->LoadTree(fLoadedEntries.back());
300  }
301  }
302 
303  const std::vector<NameAlias> &GetFriendNames() const
304  {
305  return fFriendNames;
306  }
307 
308  const std::vector<std::vector<std::string>> &GetFriendFileNames() const
309  {
310  return fFriendFileNames;
311  }
312  };
313  } // End of namespace Internal
314 
315 
317  private:
319 
320  public:
321  TTreeProcessorMT(std::string_view filename, std::string_view treename = "");
322  TTreeProcessorMT(const std::vector<std::string_view>& filenames, std::string_view treename = "");
325 
326  void Process(std::function<void(TTreeReader&)> func);
327 
328  };
329 
330 } // End of namespace ROOT
331 
332 #endif // defined TTreeProcessorMT
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
const std::vector< std::string > & GetFileNames() const
Get the filenames for this view.
virtual Long64_t Next()
Return the next non-zero entry index (next after fLastIndexQueried) this function is faster than GetE...
Definition: TEntryList.cxx:886
TTreeView(const std::vector< std::string_view > &fns, std::string_view tn)
Constructor based on a collection of file names.
long long Long64_t
Definition: RtypesCore.h:69
virtual Long64_t GetN() const
Definition: TEntryList.h:75
TTreeReader is a simple, robust and fast interface to read values from a TTree, TChain or TNtuple...
Definition: TTreeReader.h:43
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
std::pair< std::string, std::string > NameAlias
std::pair< std::unique_ptr< TTreeReader >, std::unique_ptr< TEntryList > > TreeReaderEntryListPair
virtual TList * GetListOfFriends() const
Definition: TTree.h:411
#define f(i)
Definition: RSha256.hxx:104
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3976
A cluster of entries.
TreeReaderEntryListPair MakeReaderWithEntryList(Long64_t start, Long64_t end)
std::string GetTreeName() const
Get the name of the tree of this view.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
const std::vector< NameAlias > & GetFriendNames() const
virtual Long64_t LoadTree(Long64_t entry)
Find the tree which contains entry, and set it as the current tree.
Definition: TChain.cxx:1273
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
std::string fTreeName
Name of the tree.
std::vector< std::string > fFileNames
Names of the files.
TTreeView(const TTreeView &view)
Copy constructor.
void StoreFriends(const TTree &tree, bool isTree)
Get and store the names, aliases and file names of the friends of the tree.
void PopTaskFirstEntry()
Restore the tree of the previous loaded entry, if any.
TTreeView(TTree &tree, TEntryList &entries)
Constructor based on a TTree and a TEntryList.
TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end, const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t >> &friendEntries)
Get a TTreeReader for the current tree of this view.
void MakeChain(const std::vector< Long64_t > &nEntries, const std::vector< std::vector< Long64_t >> &friendEntries)
Construct fChain, also adding friends if needed and injecting knowledge of offsets if available...
virtual Long64_t GetEntry(Int_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next()...
Definition: TEntryList.cxx:653
if object destructor must call RecursiveRemove()
Definition: TObject.h:60
std::unique_ptr< TChain > fChain
Chain on which to operate.
const std::vector< std::vector< std::string > > & GetFriendFileNames() const
void GetTreeNameIfNeeded()
If not treeName was provided to the ctor, use the name of the first TTree in the first file...
virtual TFriendElement * AddFriend(const char *chainname, const char *dummy="")
Add a TFriendElement to the list of friends of this chain.
Definition: TChain.cxx:644
void PushTaskFirstEntry(Long64_t entry)
Push a new loaded entry to the stack.
ClustersAndEntries MakeClusters(const std::string &treename, const std::vector< std::string > &filenames)
Return a vector of cluster boundaries for the given tree and files.
basic_string_view< char > string_view
Definition: RStringView.hxx:35
std::unique_ptr< TTreeReader > MakeReader(Long64_t start, Long64_t end)
ROOT::TThreadedObject< ROOT::Internal::TTreeView > treeView
! Thread-local TreeViews
std::pair< std::vector< EntryCluster >, std::vector< Long64_t > > ClustersAndEntries
virtual Bool_t Enter(Long64_t entry, TTree *tree=0)
Add entry #entry to the list.
Definition: TEntryList.cxx:558
std::vector< Long64_t > fLoadedEntries
! Per-task loaded entries (for task interleaving)
virtual TList * GetListOfKeys() const
A TFriendElement TF describes a TTree object TF in a file.
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:70
#define gDirectory
Definition: TDirectory.h:213
void ResetBit(UInt_t f)
Definition: TObject.h:171
std::vector< NameAlias > fFriendNames
<name,alias> pairs of the friends of the tree/chain
A List of entry numbers in a TTree or TChain.
Definition: TEntryList.h:25
A class to process the entries of a TTree in parallel.
TEntryList fEntryList
User-defined selection of entry numbers to be processed, empty if none was provided.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where friends are stored.
char name[80]
Definition: TGX11.cxx:109
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:222
virtual const char * GetFriendAlias(TTree *) const
If the &#39;tree&#39; is a friend, this method returns its alias name.
Definition: TTree.cxx:5751
TTreeView(std::string_view fn, std::string_view tn)
Constructor based on a file name.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain.