Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RTTreeDS.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RTTreeDS.hxx
3 \author Vincenzo Eduardo Padulano
4 \date 2024-12
5*/
6
7/*************************************************************************
8 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_INTERNAL_RDF_RTTREEDS
16#define ROOT_INTERNAL_RDF_RTTREEDS
17
18#include "ROOT/RDataSource.hxx"
19
20#include <memory>
21#include <string>
22#include <vector>
23#include <stdexcept>
24#include <string_view>
25
26// Begin forward decls
27
28namespace ROOT {
29class RDataFrame;
30}
31
32namespace ROOT::Detail::RDF {
33class RLoopManager;
34}
35
36namespace ROOT::RDF {
37class RSampleInfo;
38}
39
41class RSample;
42}
43
44namespace ROOT::TreeUtils {
45struct RFriendInfo;
46}
47
51
52class TChain;
53class TDirectory;
54class TTree;
55class TTreeReader;
56
57// End forward decls
58
59namespace ROOT::Internal::RDF {
60
61class RTTreeDS final : public ROOT::RDF::RDataSource {
62 std::vector<std::string> fBranchNamesWithDuplicates{};
63 std::vector<std::string> fBranchNamesWithoutDuplicates{};
64 std::vector<std::string> fTopLevelBranchNames{};
65
66 std::shared_ptr<TTree> fTree;
67
68 std::unique_ptr<TTreeReader> fTreeReader;
69
70 std::vector<std::unique_ptr<TChain>> fFriends;
71
72 // Should be needed mostly for MT runs, but we keep it here to document and align the existing functionality
73 // from RLoopManager. See https://github.com/root-project/root/pull/10729
74 std::unique_ptr<ROOT::Internal::TreeUtils::RNoCleanupNotifier> fNoCleanupNotifier;
75
77 CreateSampleInfo(unsigned int,
78 const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &sampleMap) const final;
79
80 void RunFinalChecks(bool nodesLeftNotRun) const final;
81
82 void Setup(std::shared_ptr<TTree> &&tree, const ROOT::TreeUtils::RFriendInfo *friendInfo = nullptr);
83
84 std::vector<std::pair<ULong64_t, ULong64_t>> GetTTreeEntryRange(TTree &tree);
85 std::vector<std::pair<ULong64_t, ULong64_t>> GetTChainEntryRange(TChain &chain);
86
87public:
88 RTTreeDS(std::shared_ptr<TTree> tree);
89 RTTreeDS(std::shared_ptr<TTree> tree, const ROOT::TreeUtils::RFriendInfo &friendInfo);
90 RTTreeDS(std::string_view treeName, TDirectory *dirPtr);
91 RTTreeDS(std::string_view treeName, std::string_view fileNameGlob);
92 RTTreeDS(std::string_view treeName, const std::vector<std::string> &fileNameGlobs);
93
94 // Rule of five
95 RTTreeDS(const RTTreeDS &) = delete;
96 RTTreeDS &operator=(const RTTreeDS &) = delete;
97 RTTreeDS(RTTreeDS &&) = delete;
99 ~RTTreeDS() final; // Define destructor where data member types are defined
100
101 void Initialize() final;
102
103 void Finalize() final;
104
105 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
106
107 const std::vector<std::string> &GetColumnNames() const final { return fBranchNamesWithDuplicates; }
108
109 bool HasColumn(std::string_view colName) const final
110 {
111 return std::find(fBranchNamesWithDuplicates.begin(), fBranchNamesWithDuplicates.end(), colName) !=
113 }
114
115 std::string GetTypeName(std::string_view colName) const final;
116
117 std::string GetTypeNameWithOpts(std::string_view colName, bool vector2RVec) const final;
118
119 bool SetEntry(unsigned int, ULong64_t entry) final;
120
121 Record_t GetColumnReadersImpl(std::string_view /* name */, const std::type_info & /* ti */) final
122 {
123 // This datasource uses the newer GetColumnReaders() API
124 return {};
125 }
126
127 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
128 GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
129 {
130 // This data source creates column readers via CreateColumnReader
131 throw std::runtime_error("GetColumnReaders should not be called on this data source, something wrong happened!");
132 }
133
134 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase> CreateColumnReader(unsigned int slot, std::string_view col,
135 const std::type_info &tid,
136 TTreeReader *treeReader) final;
137
138 std::string GetLabel() final { return "TTreeDS"; }
139
140 TTree *GetTree();
141
142 const std::vector<std::string> &GetTopLevelFieldNames() const final { return fTopLevelBranchNames; }
143
144 const std::vector<std::string> &GetColumnNamesNoDuplicates() const final { return fBranchNamesWithoutDuplicates; }
145
146 void InitializeWithOpts(const std::set<std::string> &suppressErrorsForMissingBranches) final;
147
148 std::string DescribeDataset() final;
149
150 std::string AsString() final { return "TTree data source"; }
151
152 std::size_t GetNFiles() const final;
153
154 void ProcessMT(ROOT::Detail::RDF::RLoopManager &lm) final;
155};
156
157ROOT::RDataFrame FromTTree(std::string_view treeName, std::string_view fileNameGlob);
158ROOT::RDataFrame FromTTree(std::string_view treeName, const std::vector<std::string> &fileNameGlobs);
159
160} // namespace ROOT::Internal::RDF
161
162#endif
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Definition RtypesCore.h:84
The head node of a RDF computation graph.
std::vector< std::pair< ULong64_t, ULong64_t > > GetTTreeEntryRange(TTree &tree)
Definition RTTreeDS.cxx:579
std::vector< std::string > fBranchNamesWithoutDuplicates
Definition RTTreeDS.hxx:63
RTTreeDS & operator=(RTTreeDS &&)=delete
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
Definition RTTreeDS.cxx:429
std::string DescribeDataset() final
Definition RTTreeDS.cxx:438
std::string GetLabel() final
Return a string representation of the datasource type.
Definition RTTreeDS.hxx:138
std::unique_ptr< ROOT::Internal::TreeUtils::RNoCleanupNotifier > fNoCleanupNotifier
Definition RTTreeDS.hxx:74
const std::vector< std::string > & GetColumnNamesNoDuplicates() const final
Definition RTTreeDS.hxx:144
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Definition RTTreeDS.hxx:107
void Setup(std::shared_ptr< TTree > &&tree, const ROOT::TreeUtils::RFriendInfo *friendInfo=nullptr)
Definition RTTreeDS.cxx:262
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader) final
Creates a column reader for the requested column.
Definition RTTreeDS.cxx:504
std::vector< std::unique_ptr< TChain > > fFriends
Definition RTTreeDS.hxx:70
void Initialize() final
Convenience method called before starting an event-loop.
Definition RTTreeDS.cxx:641
void RunFinalChecks(bool nodesLeftNotRun) const final
Definition RTTreeDS.cxx:661
const std::vector< std::string > & GetTopLevelFieldNames() const final
Definition RTTreeDS.hxx:142
RTTreeDS & operator=(const RTTreeDS &)=delete
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
Definition RTTreeDS.hxx:109
ROOT::RDF::RSampleInfo CreateSampleInfo(unsigned int, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &sampleMap) const final
Definition RTTreeDS.cxx:368
std::unique_ptr< TTreeReader > fTreeReader
Definition RTTreeDS.hxx:68
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
Definition RTTreeDS.hxx:128
void Finalize() final
Convenience method called after concluding an event-loop.
Definition RTTreeDS.cxx:633
RTTreeDS(RTTreeDS &&)=delete
RTTreeDS(const RTTreeDS &)=delete
bool SetEntry(unsigned int, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition RTTreeDS.cxx:545
void InitializeWithOpts(const std::set< std::string > &suppressErrorsForMissingBranches) final
Definition RTTreeDS.cxx:654
std::vector< std::string > fTopLevelBranchNames
Definition RTTreeDS.hxx:64
std::vector< std::pair< ULong64_t, ULong64_t > > GetTChainEntryRange(TChain &chain)
Definition RTTreeDS.cxx:589
Record_t GetColumnReadersImpl(std::string_view, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
Definition RTTreeDS.hxx:121
std::vector< std::string > fBranchNamesWithDuplicates
Definition RTTreeDS.hxx:62
RTTreeDS(std::shared_ptr< TTree > tree)
Definition RTTreeDS.cxx:282
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition RTTreeDS.cxx:610
std::string AsString() final
Definition RTTreeDS.hxx:150
std::shared_ptr< TTree > fTree
Definition RTTreeDS.hxx:66
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition RTTreeDS.cxx:566
std::string GetTypeNameWithOpts(std::string_view colName, bool vector2RVec) const final
Definition RTTreeDS.cxx:553
Class representing a sample which is a grouping of trees and their fileglobs, and,...
Definition RSample.hxx:39
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
Describe directory structure in memory.
Definition TDirectory.h:45
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
STL class.
STL class.
STL class.
Special implementation of ROOT::RRangeCast for TCollection, including a check that the cast target ty...
Definition TObject.h:395
ROOT::RDataFrame FromTTree(std::string_view treeName, std::string_view fileNameGlob)
Definition RTTreeDS.cxx:357
void ProcessMT(ROOT::RDF::RDataSource &ds, ROOT::Detail::RDF::RLoopManager &lm)
Definition RDFUtils.cxx:683
Namespace hosting functions and classes to retrieve tree information for internal use.
Definition RTTreeDS.hxx:48
Information about friend trees of a certain TTree or TChain object.