Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleIndex.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleIndex.hxx
2/// \ingroup NTuple ROOT7
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-04-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleIndex
17#define ROOT7_RNTupleIndex
18
19#include <ROOT/RField.hxx>
20
21#include <memory>
22#include <string>
23#include <unordered_map>
24#include <vector>
25
26namespace ROOT {
27namespace Experimental {
28namespace Internal {
29// clang-format off
30/**
31\class ROOT::Experimental::Internal::RNTupleIndex
32\ingroup NTuple
33\brief Builds an index on one or several fields of an RNTuple so it can be joined onto other RNTuples.
34*/
35// clang-format on
37public:
38 using NTupleIndexValue_t = std::uint64_t;
39
40private:
41 /////////////////////////////////////////////////////////////////////////////
42 /// Container for the hashes of the indexed fields.
44 public:
45 std::vector<NTupleIndexValue_t> fFieldValues;
46 RIndexValue(const std::vector<NTupleIndexValue_t> &fieldValues)
47 {
48 fFieldValues.reserve(fieldValues.size());
49 fFieldValues = fieldValues;
50 }
51 inline bool operator==(const RIndexValue &other) const { return other.fFieldValues == fFieldValues; }
52 };
53
54 /////////////////////////////////////////////////////////////////////////////
55 /// Hash combinining the individual index value hashes from RIndexValue. Uses the implementation from
56 /// `boost::hash_combine` (see
57 /// https://www.boost.org/doc/libs/1_55_0/doc/html/hash/reference.html#boost.hash_combine).
59 inline std::size_t operator()(const RIndexValue &indexValue) const
60 {
61 std::size_t combinedHash = 0;
62 for (const auto &fieldVal : indexValue.fFieldValues) {
63 combinedHash ^= fieldVal + 0x9e3779b9 + (fieldVal << 6) + (fieldVal >> 2);
64 }
65 return combinedHash;
66 }
67 };
68
69 /// The index itself. Maps field values (or combinations thereof in case the index is defined for multiple fields) to
70 /// their respsective entry numbers.
71 std::unordered_map<RIndexValue, std::vector<NTupleSize_t>, RIndexValueHash> fIndex;
72
73 /// The page source belonging to the RNTuple for which to build the index.
74 std::unique_ptr<RPageSource> fPageSource;
75
76 /// The fields for which the index is built. Used to compute the hashes for each entry value.
77 std::vector<std::unique_ptr<RFieldBase>> fIndexFields;
78
79 /// Only built indexes can be queried.
80 bool fIsBuilt = false;
81
82 /////////////////////////////////////////////////////////////////////////////
83 /// \brief Create an a new RNTupleIndex for the RNTuple represented by the provided page source.
84 ///
85 /// \param[in] fieldNames The names of the fields to index. Only integral-type fields can be specified as index
86 /// fields.
87 /// \param[in] pageSource The page source.
88 RNTupleIndex(const std::vector<std::string> &fieldNames, const RPageSource &pageSource);
89
90 /////////////////////////////////////////////////////////////////////////////
91 /// \brief Ensure the RNTupleIndex has been built.
92 ///
93 /// \throws RException If the index has not been built, and can therefore not be used yet.
94 void EnsureBuilt() const;
95
96public:
97 RNTupleIndex(const RNTupleIndex &other) = delete;
98 RNTupleIndex &operator=(const RNTupleIndex &other) = delete;
99 RNTupleIndex(RNTupleIndex &&other) = delete;
101 ~RNTupleIndex() = default;
102
103 /////////////////////////////////////////////////////////////////////////////
104 /// \brief Create an RNTupleIndex from an existing RNTuple.
105 ///
106 /// \param[in] fieldNames The names of the fields to index. Only integral-type fields can be specified as index
107 /// fields.
108 /// \param[in] pageSource The page source.
109 /// \param[in] deferBuild When set to `true`, an empty index will be created. A call to RNTupleIndex::Build is
110 /// required before the index can actually be used.
111 ///
112 /// \return A pointer to the newly-created index.
113 static std::unique_ptr<RNTupleIndex>
114 Create(const std::vector<std::string> &fieldNames, const RPageSource &pageSource, bool deferBuild = false);
115
116 /////////////////////////////////////////////////////////////////////////////
117 /// \brief Build the index.
118 ///
119 /// Only a built index can be queried (with RNTupleIndex::GetFirstEntryNumber or RNTupleIndex::GetAllEntryNumbers).
120 void Build();
121
122 /////////////////////////////////////////////////////////////////////////////
123 /// \brief Get the number of indexed values.
124 ///
125 /// \return The number of indexed values.
126 ///
127 /// \note This does not have to correspond to the number of entries in the original RNTuple. If the original RNTuple
128 /// contains duplicate index values, they are counted as one.
129 std::size_t GetSize() const
130 {
131 EnsureBuilt();
132 return fIndex.size();
133 }
134
135 /////////////////////////////////////////////////////////////////////////////
136 /// \brief Whether the index has been built (and therefore ready to be used).
137 ///
138 /// \return `true` if the index has been built.
139 ///
140 /// Only built indexes can be queried.
141 bool IsBuilt() const { return fIsBuilt; }
142
143 /////////////////////////////////////////////////////////////////////////////
144 /// \brief Get the first entry number containing the given index value.
145 ///
146 /// \param[in] valuePtrs A vector of pointers to the index values to look up.
147 ///
148 /// \return The first entry number that corresponds to `valuePtrs`. When no such entry exists, `kInvalidNTupleIndex`
149 /// is returned.
150 ///
151 /// Note that in case multiple entries corresponding to the provided index value exist, the first occurrence is
152 /// returned. Use RNTupleIndex::GetAllEntryNumbers to get all entries.
153 NTupleSize_t GetFirstEntryNumber(const std::vector<void *> &valuePtrs) const;
154
155 /////////////////////////////////////////////////////////////////////////////
156 /// \brief Get the entry number containing the given index value.
157 ///
158 /// \sa GetFirstEntryNumber(std::vector<void *> valuePtrs)
159 template <typename... Ts>
161 {
162 if (sizeof...(Ts) != fIndexFields.size())
163 throw RException(R__FAIL("number of values must match number of indexed fields"));
164
165 std::vector<void *> valuePtrs;
166 valuePtrs.reserve(sizeof...(Ts));
167 ([&] { valuePtrs.push_back(&values); }(), ...);
168
169 return GetFirstEntryNumber(valuePtrs);
170 }
171
172 /////////////////////////////////////////////////////////////////////////////
173 /// \brief Get all entry numbers for the given index.
174 ///
175 /// \param[in] valuePtrs A vector of pointers to the index values to look up.
176 ///
177 /// \return The entry numbers that corresponds to `valuePtrs`. When no such entry exists, an empty vector is
178 /// returned.
179 const std::vector<NTupleSize_t> *GetAllEntryNumbers(const std::vector<void *> &valuePtrs) const;
180
181 /////////////////////////////////////////////////////////////////////////////
182 /// \brief Get all entry numbers for the given index.
183 ///
184 /// \sa GetAllEntryNumbers(std::vector<void *> valuePtrs)
185 template <typename... Ts>
186 const std::vector<NTupleSize_t> *GetAllEntryNumbers(Ts... values) const
187 {
188 if (sizeof...(Ts) != fIndexFields.size())
189 throw RException(R__FAIL("number of values must match number of indexed fields"));
190
191 std::vector<void *> valuePtrs;
192 valuePtrs.reserve(sizeof...(Ts));
193 ([&] { valuePtrs.push_back(&values); }(), ...);
194
195 return GetAllEntryNumbers(valuePtrs);
196 }
197};
198} // namespace Internal
199} // namespace Experimental
200} // namespace ROOT
201
202#endif // ROOT7_RNTupleIndex
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
Container for the hashes of the indexed fields.
RIndexValue(const std::vector< NTupleIndexValue_t > &fieldValues)
bool operator==(const RIndexValue &other) const
Builds an index on one or several fields of an RNTuple so it can be joined onto other RNTuples.
std::unique_ptr< RPageSource > fPageSource
The page source belonging to the RNTuple for which to build the index.
RNTupleIndex(const RNTupleIndex &other)=delete
const std::vector< NTupleSize_t > * GetAllEntryNumbers(Ts... values) const
Get all entry numbers for the given index.
RNTupleIndex & operator=(RNTupleIndex &&other)=delete
NTupleSize_t GetFirstEntryNumber(Ts... values) const
Get the entry number containing the given index value.
static std::unique_ptr< RNTupleIndex > Create(const std::vector< std::string > &fieldNames, const RPageSource &pageSource, bool deferBuild=false)
Create an RNTupleIndex from an existing RNTuple.
const std::vector< NTupleSize_t > * GetAllEntryNumbers(const std::vector< void * > &valuePtrs) const
Get all entry numbers for the given index.
NTupleSize_t GetFirstEntryNumber(const std::vector< void * > &valuePtrs) const
Get the first entry number containing the given index value.
void EnsureBuilt() const
Ensure the RNTupleIndex has been built.
std::size_t GetSize() const
Get the number of indexed values.
bool IsBuilt() const
Whether the index has been built (and therefore ready to be used).
bool fIsBuilt
Only built indexes can be queried.
RNTupleIndex & operator=(const RNTupleIndex &other)=delete
std::vector< std::unique_ptr< RFieldBase > > fIndexFields
The fields for which the index is built. Used to compute the hashes for each entry value.
RNTupleIndex(RNTupleIndex &&other)=delete
std::unordered_map< RIndexValue, std::vector< NTupleSize_t >, RIndexValueHash > fIndex
The index itself.
Abstract interface to read data from an ntuple.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Hash combinining the individual index value hashes from RIndexValue.
std::size_t operator()(const RIndexValue &indexValue) const