Logo ROOT   6.10/09
Reference Guide
TTreeProcessorMT.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Enric Tejedor, CERN 12/09/2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TTreeProcessorMT
13 #define ROOT_TTreeProcessorMT
14 
15 #include "TKey.h"
16 #include "TTree.h"
17 #include "TFile.h"
18 #include "TChain.h"
19 #include "TTreeReader.h"
20 #include "TError.h"
21 #include "TEntryList.h"
22 #include "ROOT/TThreadedObject.hxx"
23 
24 #include <string.h>
25 #include <functional>
26 #include <vector>
27 
28 
29 /** \class TTreeView
30  \brief A helper class that encapsulates a file and a tree.
31 
32 A helper class that encapsulates a TFile and a TTree, along with their names.
33 It is used together with TTProcessor and ROOT::TThreadedObject, so that
34 in the TTProcessor::Process method each thread can work on its own
35 <TFile,TTree> pair.
36 
37 This class can also be used with a collection of file names or a TChain, in case
38 the tree is stored in more than one file. A view will always contain only the
39 current (active) tree and file objects.
40 
41 A copy constructor is defined for TTreeView to work with ROOT::TThreadedObject.
42 The latter makes a copy of a model object every time a new thread accesses
43 the threaded object.
44 */
45 
46 namespace ROOT {
47  namespace Internal {
48  class TTreeView {
49  private:
50  std::vector<std::string> fFileNames; ///< Names of the files
51  std::string fTreeName; ///< Name of the tree
52  std::unique_ptr<TFile> fCurrentFile; ///<! Current file object of this view.
53  TTree *fCurrentTree; ///<! Current tree object of this view.
54  unsigned int fCurrentIdx; ///<! Index of the current file.
55  std::vector<TEntryList> fEntryLists; ///< Entry numbers to be processed per tree/file
56  TEntryList fCurrentEntryList; ///< Entry numbers for the current range being processed
57 
58  ////////////////////////////////////////////////////////////////////////////////
59  /// Initialize the file and the tree for this view, first looking for a tree in
60  /// the file if necessary.
61  void Init()
62  {
63  // Here we do not use a TContext since the TThreadedObject protects
64  // the copies done for every processing slots.
65  fCurrentFile.reset(TFile::Open(fFileNames[fCurrentIdx].data()));
66 
67  // If the tree name is empty, look for a tree in the file
68  if (fTreeName.empty()) {
69  TIter next(fCurrentFile->GetListOfKeys());
70  while (TKey *key = (TKey*)next()) {
71  const char *className = key->GetClassName();
72  if (strcmp(className, "TTree") == 0) {
73  fTreeName = key->GetName();
74  break;
75  }
76  }
77  if (fTreeName.empty()) {
78  auto msg = "Cannot find any tree in file " + fFileNames[fCurrentIdx];
79  throw std::runtime_error(msg);
80  }
81  }
82 
83  // We cannot use here the template method (TFile::GetObject) because the header will finish
84  // in the PCH and the specialization will be available. PyROOT will not be able to specialize
85  // the method for types other that TTree.
86  fCurrentTree = (TTree*)fCurrentFile->Get(fTreeName.data());
87 
88  // Do not remove this tree from list of cleanups (thread unsafe)
89  fCurrentTree->ResetBit(TObject::kMustCleanup);
90  }
91 
92  public:
93  //////////////////////////////////////////////////////////////////////////
94  /// Constructor based on a file name.
95  /// \param[in] fn Name of the file containing the tree to process.
96  /// \param[in] tn Name of the tree to process. If not provided,
97  /// the implementation will automatically search for a
98  /// tree in the file.
99  TTreeView(std::string_view fn, std::string_view tn) : fTreeName(tn), fCurrentIdx(0)
100  {
101  fFileNames.emplace_back(fn);
102  Init();
103  }
104 
105  //////////////////////////////////////////////////////////////////////////
106  /// Constructor based on a collection of file names.
107  /// \param[in] fns Collection of file names containing the tree to process.
108  /// \param[in] tn Name of the tree to process. If not provided,
109  /// the implementation will automatically search for a
110  /// tree in the collection of files.
111  TTreeView(const std::vector<std::string_view>& fns, std::string_view tn) : fTreeName(tn), fCurrentIdx(0)
112  {
113  if (fns.size() > 0) {
114  for (auto& fn : fns)
115  fFileNames.emplace_back(fn);
116  Init();
117  }
118  else {
119  auto msg = "The provided list of file names is empty, cannot process tree " + fTreeName;
120  throw std::runtime_error(msg);
121  }
122  }
123 
124  //////////////////////////////////////////////////////////////////////////
125  /// Constructor based on a TTree.
126  /// \param[in] tree Tree or chain of files containing the tree to process.
127  TTreeView(TTree& tree) : fTreeName(tree.GetName()), fCurrentIdx(0)
128  {
129  static const TClassRef clRefTChain("TChain");
130  if (clRefTChain == tree.IsA()) {
131  TObjArray* filelist = dynamic_cast<TChain&>(tree).GetListOfFiles();
132  if (filelist->GetEntries() > 0) {
133  for (auto f : *filelist)
134  fFileNames.emplace_back(f->GetTitle());
135  Init();
136  }
137  else {
138  auto msg = "The provided chain of files is empty, cannot process tree " + fTreeName;
139  throw std::runtime_error(msg);
140  }
141  }
142  else {
143  TFile *f = tree.GetCurrentFile();
144  if (f) {
145  fFileNames.emplace_back(f->GetName());
146  Init();
147  }
148  else {
149  auto msg = "The specified TTree is not linked to any file, in-memory-only trees are not supported. Cannot process tree " + fTreeName;
150  throw std::runtime_error(msg);
151  }
152  }
153  }
154 
155  //////////////////////////////////////////////////////////////////////////
156  /// Constructor based on a TTree and a TEntryList.
157  /// \param[in] tree Tree or chain of files containing the tree to process.
158  /// \param[in] entries List of entry numbers to process.
160  {
161  static const TClassRef clRefTChain("TChain");
162  if (clRefTChain == tree.IsA()) {
163  // We need to convert the global entry numbers to per-tree entry numbers.
164  // This will allow us to build a TEntryList for a given entry range of a tree of the chain.
165  size_t nTrees = fFileNames.size();
166  fEntryLists.resize(nTrees);
167 
168  TChain *chain = dynamic_cast<TChain*>(&tree);
169  Long64_t currListEntry = entries.GetEntry(0);
170  Long64_t currTreeOffset = 0;
171 
172  for (unsigned int treeNum = 0; treeNum < nTrees && currListEntry >= 0; ++treeNum) {
173  chain->LoadTree(currTreeOffset);
174  TTree *currTree = chain->GetTree();
175  Long64_t currTreeEntries = currTree->GetEntries();
176  Long64_t nextTreeOffset = currTreeOffset + currTreeEntries;
177 
178  while (currListEntry >= 0 && currListEntry < nextTreeOffset) {
179  fEntryLists[treeNum].Enter(currListEntry - currTreeOffset);
180  currListEntry = entries.Next();
181  }
182 
183  currTreeOffset = nextTreeOffset;
184  }
185  }
186  else {
187  fEntryLists.emplace_back(entries);
188  }
189  }
190 
191  //////////////////////////////////////////////////////////////////////////
192  /// Copy constructor.
193  /// \param[in] view Object to copy.
194  TTreeView(const TTreeView& view) : fTreeName(view.fTreeName), fCurrentIdx(view.fCurrentIdx)
195  {
196  for (auto& fn : view.fFileNames)
197  fFileNames.emplace_back(fn);
198 
199  for (auto& el : view.fEntryLists)
200  fEntryLists.emplace_back(el);
201 
202  Init();
203  }
204 
205  //////////////////////////////////////////////////////////////////////////
206  /// Get the cluster iterator for the current tree of this view, starting
207  /// from entry zero.
209  {
210  return fCurrentTree->GetClusterIterator(0);
211  }
212 
213  //////////////////////////////////////////////////////////////////////////
214  /// Get a TTreeReader for the current tree of this view.
215  std::unique_ptr<TTreeReader> GetTreeReader(Long64_t start, Long64_t end)
216  {
218  if (fEntryLists.size() > 0) {
219  // TEntryList and SetEntriesRange do not work together (the former has precedence).
220  // We need to construct a TEntryList that contains only those entry numbers
221  // in our desired range.
222  fCurrentEntryList.Reset();
223  if (fEntryLists[fCurrentIdx].GetN() > 0) {
224  Long64_t entry = fEntryLists[fCurrentIdx].GetEntry(0);
225  do {
226  if (entry >= start && entry < end) fCurrentEntryList.Enter(entry);
227  } while ((entry = fEntryLists[fCurrentIdx].Next()) >= 0);
228  }
229 
230  reader = new TTreeReader(fCurrentTree, &fCurrentEntryList);
231  }
232  else {
233  // If no TEntryList is involved we can safely set the range in the reader
234  reader = new TTreeReader(fCurrentTree);
235  reader->SetEntriesRange(start, end);
236  }
237 
238  return std::unique_ptr<TTreeReader>(reader);
239  }
240 
241  //////////////////////////////////////////////////////////////////////////
242  /// Get the number of entries of the current tree of this view.
244  {
245  return fCurrentTree->GetEntries();
246  }
247 
248  //////////////////////////////////////////////////////////////////////////
249  /// Get the number of files of this view.
250  size_t GetNumFiles() const
251  {
252  return fFileNames.size();
253  }
254 
255  //////////////////////////////////////////////////////////////////////////
256  /// Set the current file and tree of this view.
257  void SetCurrent(unsigned int i)
258  {
259  if (i != fCurrentIdx) {
260  fCurrentIdx = i;
261  // Here we need to restore the directory after opening the file.
263  TFile *f = TFile::Open(fFileNames[fCurrentIdx].data());
264  fCurrentTree = (TTree*)f->Get(fTreeName.data());
265  fCurrentTree->ResetBit(TObject::kMustCleanup);
266  fCurrentFile.reset(f);
267  }
268  }
269  };
270  } // End of namespace Internal
271 
272 
274  private:
275  ROOT::TThreadedObject<ROOT::Internal::TTreeView> treeView; ///<! Threaded object with <file,tree> per thread
276 
277  public:
278  TTreeProcessorMT(std::string_view filename, std::string_view treename = "");
279  TTreeProcessorMT(const std::vector<std::string_view>& filenames, std::string_view treename = "");
282 
283  void Process(std::function<void(TTreeReader&)> func);
284 
285  };
286 
287 } // End of namespace ROOT
288 
289 #endif // defined TTreeProcessorMT
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
An array of TObjects.
Definition: TObjArray.h:37
virtual Long64_t Next()
Return the next non-zero entry index (next after fLastIndexQueried) this function is faster than GetE...
Definition: TEntryList.cxx:890
TTreeView(const std::vector< std::string_view > &fns, std::string_view tn)
Constructor based on a collection of file names.
long long Long64_t
Definition: RtypesCore.h:69
TTreeReader is a simple, robust and fast interface to read values from a TTree, TChain or TNtuple...
Definition: TTreeReader.h:43
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
size_t GetNumFiles() const
Get the number of files of this view.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
std::unique_ptr< TTreeReader > GetTreeReader(Long64_t start, Long64_t end)
Get a TTreeReader for the current tree of this view.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3909
Helper class to iterate over cluster of baskets.
Definition: TTree.h:241
void SetCurrent(unsigned int i)
Set the current file and tree of this view.
virtual TClusterIterator GetClusterIterator(Long64_t firstentry)
Return an iterator over the cluster of baskets starting at firstentry.
Definition: TTree.cxx:5152
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
virtual Long64_t LoadTree(Long64_t entry)
Find the tree which contains entry, and set it as the current tree.
Definition: TChain.cxx:1259
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
std::string fTreeName
Name of the tree.
void Init()
Initialize the file and the tree for this view, first looking for a tree in the file if necessary...
std::unique_ptr< TFile > fCurrentFile
! Current file object of this view.
TEntryList fCurrentEntryList
Entry numbers for the current range being processed.
std::vector< std::string > fFileNames
Names of the files.
TTreeView(const TTreeView &view)
Copy constructor.
std::vector< TEntryList > fEntryLists
Entry numbers to be processed per tree/file.
TTreeView(TTree &tree, TEntryList &entries)
Constructor based on a TTree and a TEntryList.
TFile * GetCurrentFile() const
Return pointer to the current file.
Definition: TTree.cxx:5163
virtual Long64_t GetEntry(Int_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next()...
Definition: TEntryList.cxx:657
TTreeView(TTree &tree)
Constructor based on a TTree.
if object destructor must call RecursiveRemove()
Definition: TObject.h:59
double f(double x)
TTree::TClusterIterator GetClusterIterator()
Get the cluster iterator for the current tree of this view, starting from entry zero.
Long64_t GetEntries() const
Get the number of entries of the current tree of this view.
please use SetEntriesRange()!")
Definition: TTreeReader.h:188
double func(double *x, double *p)
Definition: stressTF1.cxx:213
ROOT::TThreadedObject< ROOT::Internal::TTreeView > treeView
! Threaded object with <file,tree> per thread
virtual Bool_t Enter(Long64_t entry, TTree *tree=0)
Add entry #entry to the list.
Definition: TEntryList.cxx:562
virtual Long64_t GetEntries() const
Definition: TTree.h:381
virtual TTree * GetTree() const
Definition: TChain.h:115
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:29
TTree * fCurrentTree
! Current tree object of this view.
virtual void Reset()
Reset this list.
virtual TList * GetListOfKeys() const
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:494
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:78
#define gDirectory
Definition: TDirectory.h:211
void ResetBit(UInt_t f)
Definition: TObject.h:158
A List of entry numbers in a TTree or TChain.
Definition: TEntryList.h:25
A class to process the entries of a TTree in parallel.
TTreeView(std::string_view fn, std::string_view tn)
Constructor based on a file name.
unsigned int fCurrentIdx
! Index of the current file.