Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleJoinTable.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleJoinTable.hxx
2/// \author Florine de Geus <florine.de.geus@cern.ch>
3/// \date 2024-04-02
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleJoinTable
16#define ROOT_RNTupleJoinTable
17
18#include <ROOT/RField.hxx>
19
20#include <memory>
21#include <string>
22#include <unordered_map>
23#include <vector>
24
25namespace ROOT {
26namespace Experimental {
27namespace Internal {
28// clang-format off
29/**
30\class ROOT::Experimental::Internal::RNTupleJoinTable
31\ingroup NTuple
32\brief Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
33*/
34// clang-format on
36public:
37 using JoinValue_t = std::uint64_t;
38 using PartitionKey_t = std::uint64_t;
40
41private:
42 // clang-format off
43 /**
44 \class ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping
45 \ingroup NTuple
46 \brief Provides a mapping from one or several join field values to an entry index.
47 */
48 // clang-format on
50 private:
51 //////////////////////////////////////////////////////////////////////////
52 /// Container for the combined hashes of join field values.
54 std::vector<JoinValue_t> fJoinFieldValues;
55
56 RCombinedJoinFieldValue(const std::vector<JoinValue_t> &joinFieldValues) : fJoinFieldValues(joinFieldValues) {}
57
58 inline bool operator==(const RCombinedJoinFieldValue &other) const
59 {
60 return other.fJoinFieldValues == fJoinFieldValues;
61 }
62 };
63
64 /////////////////////////////////////////////////////////////////////////////
65 /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation
66 /// from `boost::hash_combine`. See
67 /// https://www.boost.org/doc/libs/1_87_0/libs/container_hash/doc/html/hash.html#notes_hash_combine for more
68 /// background. In particular, it mentions: "Several improvements of the 64 bit function have been subsequently
69 /// proposed, by [David Stafford](https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html), [Pelle
70 /// Evensen](https://mostlymangling.blogspot.com/2019/12/stronger-better-morer-moremur-better.html), and [Jon
71 /// Maiga](http://jonkagstrom.com/mx3/mx3_rev2.html). We currently use Jon Maiga’s function."
72 ///
73 /// \note
74 /// \parblock
75 /// Copyright 2005-2014 Daniel James.
76 /// Copyright 2021, 2022 Peter Dimov.
77 /// Distributed under the Boost Software License, Version 1.0.
78 /// https://www.boost.org/LICENSE_1_0.txt
79 ///
80 /// Based on Peter Dimov's proposal
81 /// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
82 /// issue 6.18.
83 ///
84 /// Boost Software License - Version 1.0 - August 17th, 2003
85 ///
86 /// Permission is hereby granted, free of charge, to any person or organization
87 /// obtaining a copy of the software and accompanying documentation covered by
88 /// this license (the "Software") to use, reproduce, display, distribute,
89 /// execute, and transmit the Software, and to prepare derivative works of the
90 /// Software, and to permit third-parties to whom the Software is furnished to
91 /// do so, all subject to the following:
92 ///
93 /// The copyright notices in the Software and this entire statement, including
94 /// the above license grant, this restriction and the following disclaimer,
95 /// must be included in all copies of the Software, in whole or in part, and
96 /// all derivative works of the Software, unless such copies or derivative
97 /// works are solely in the form of machine-executable object code generated by
98 /// a source language processor.
99 ///
100 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
101 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
102 /// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
103 /// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
104 /// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
105 /// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
106 /// DEALINGS IN THE SOFTWARE.
107 /// \endparblock
109 inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const
110 {
111 std::size_t seed = 0;
112 for (const auto &fieldVal : joinFieldVal.fJoinFieldValues) {
113 seed ^= mix(seed + 0x9e3779b9 + fieldVal);
114 }
115 return seed;
116 }
117
118 inline std::size_t mix(std::size_t init) const
119 {
120#ifdef R__B64
121 init ^= init >> 32;
122 init *= 0xe9846af9b1a615d;
123 init ^= init >> 32;
124 init *= 0xe9846af9b1a615d;
125 init ^= init >> 28;
126#else
127 init ^= init >> 16;
128 init *= 0x21f0aaad;
129 init ^= init >> 15;
130 init *= 0x735a2d97;
131 init ^= init >> 15;
132#endif
133 return init;
134 }
135 };
136
137 /// The mapping itself. Maps field values (or combinations thereof in case the join key is composed of multiple
138 /// fields) to their respective entry numbers.
139 std::unordered_map<RCombinedJoinFieldValue, std::vector<ROOT::NTupleSize_t>, RCombinedJoinFieldValueHash>
141
142 /// Names of the join fields used for the mapping to their respective entry indexes.
143 std::vector<std::string> fJoinFieldNames;
144
145 /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be
146 /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`.
147 std::vector<std::size_t> fJoinFieldValueSizes;
148
149 public:
150 //////////////////////////////////////////////////////////////////////////
151 /// \brief Get the entry indexes for this entry mapping.
152 const std::vector<ROOT::NTupleSize_t> *GetEntryIndexes(std::vector<void *> valuePtrs) const;
153
154 //////////////////////////////////////////////////////////////////////////
155 /// \brief Create a new entry mapping.
156 ///
157 /// \param[in] pageSource The page source of the RNTuple with the entries to map.
158 /// \param[in] joinFieldNames Names of the join fields to use in the mapping.
159 /// \param[in] entryOffset Offset to add to each entry index in the mapping. This can can be used when the
160 /// RNTuple represented by the provided page source is part of a chain of RNTuples.
161 REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector<std::string> &joinFieldNames,
162 ROOT::NTupleSize_t entryOffset = 0);
163 };
164 /// Names of the join fields used for the mapping to their respective entry indexes.
165 std::vector<std::string> fJoinFieldNames;
166
167 /// Partitions of one or multiple entry mappings.
168 std::unordered_map<PartitionKey_t, std::vector<std::unique_ptr<REntryMapping>>> fPartitions;
169
170 /////////////////////////////////////////////////////////////////////////////
171 /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source.
172 ///
173 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
174 /// allowed.
175 RNTupleJoinTable(const std::vector<std::string> &joinFieldNames) : fJoinFieldNames(joinFieldNames) {}
176
177public:
178 RNTupleJoinTable(const RNTupleJoinTable &other) = delete;
182 ~RNTupleJoinTable() = default;
183
184 /////////////////////////////////////////////////////////////////////////////
185 /// \brief Create an RNTupleJoinTable from an existing RNTuple.
186 ///
187 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
188 /// allowed.
189 ///
190 /// \return A pointer to the newly-created join table.
191 static std::unique_ptr<RNTupleJoinTable> Create(const std::vector<std::string> &joinFieldNames);
192
193 /////////////////////////////////////////////////////////////////////////////
194 /// \brief Add an entry mapping to the join table.
195 ///
196 ///
197 /// \param[in] pageSource The page source of the RNTuple with the entries to map.
198 /// \param[in] partitionKey Which partition to add the mapping to. If not provided, it will be added to the default
199 /// partition.
200 /// \param[in] entryOffset Offset to add to each entry index in the mapping. This can can be used when the
201 /// RNTuple represented by the provided page source is part of a chain of RNTuples.
202 ///
203 /// \return A reference to the updated join table.
205 ROOT::NTupleSize_t entryOffset = 0);
206
207 /////////////////////////////////////////////////////////////////////////////
208 /// \brief Get an entry index (if it exists) for the given join field value(s), from any partition.
209 ///
210 /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
211 ///
212 /// \note If one or more corresponding entries exist for the given value(s), the first entry index found in the join
213 /// table is returned.
214 ///
215 /// \return An entry number that corresponds to `valuePtrs`. When there are no corresponding entries,
216 /// `kInvalidNTupleIndex` is returned.
217 ROOT::NTupleSize_t GetEntryIndex(const std::vector<void *> &valuePtrs) const;
218};
219} // namespace Internal
220} // namespace Experimental
221} // namespace ROOT
222
223#endif // ROOT_RNTupleJoinTable
REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector< std::string > &joinFieldNames, ROOT::NTupleSize_t entryOffset=0)
Create a new entry mapping.
std::vector< std::size_t > fJoinFieldValueSizes
The size (in bytes) for each join field, corresponding to fJoinFieldNames.
std::vector< std::string > fJoinFieldNames
Names of the join fields used for the mapping to their respective entry indexes.
std::unordered_map< RCombinedJoinFieldValue, std::vector< ROOT::NTupleSize_t >, RCombinedJoinFieldValueHash > fMapping
The mapping itself.
const std::vector< ROOT::NTupleSize_t > * GetEntryIndexes(std::vector< void * > valuePtrs) const
Get the entry indexes for this entry mapping.
RNTupleJoinTable(RNTupleJoinTable &&other)=delete
RNTupleJoinTable & operator=(RNTupleJoinTable &&other)=delete
RNTupleJoinTable & operator=(const RNTupleJoinTable &other)=delete
static std::unique_ptr< RNTupleJoinTable > Create(const std::vector< std::string > &joinFieldNames)
Create an RNTupleJoinTable from an existing RNTuple.
std::vector< std::string > fJoinFieldNames
Names of the join fields used for the mapping to their respective entry indexes.
RNTupleJoinTable(const RNTupleJoinTable &other)=delete
ROOT::NTupleSize_t GetEntryIndex(const std::vector< void * > &valuePtrs) const
Get an entry index (if it exists) for the given join field value(s), from any partition.
RNTupleJoinTable & Add(ROOT::Internal::RPageSource &pageSource, PartitionKey_t partitionKey=kDefaultPartitionKey, ROOT::NTupleSize_t entryOffset=0)
Add an entry mapping to the join table.
RNTupleJoinTable(const std::vector< std::string > &joinFieldNames)
Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source.
static constexpr PartitionKey_t kDefaultPartitionKey
std::unordered_map< PartitionKey_t, std::vector< std::unique_ptr< REntryMapping > > > fPartitions
Partitions of one or multiple entry mappings.
Abstract interface to read data from an ntuple.
Namespace for ROOT features in testing.
Definition TROOT.h:100
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Hash combining the individual join field value hashes from RCombinedJoinFieldValue.
std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const