Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleJoinTable.cxx
Go to the documentation of this file.
1/// \file RNTupleJoinTable.cxx
2/// \author Florine de Geus <florine.de.geus@cern.ch>
3/// \date 2024-04-02
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
16
17namespace {
18ROOT::Experimental::Internal::RNTupleJoinTable::JoinValue_t CastValuePtr(void *valuePtr, std::size_t fieldValueSize)
19{
21
22 switch (fieldValueSize) {
23 case 1: value = *reinterpret_cast<std::uint8_t *>(valuePtr); break;
24 case 2: value = *reinterpret_cast<std::uint16_t *>(valuePtr); break;
25 case 4: value = *reinterpret_cast<std::uint32_t *>(valuePtr); break;
26 case 8: value = *reinterpret_cast<std::uint64_t *>(valuePtr); break;
27 default: throw ROOT::RException(R__FAIL("value size not supported"));
28 }
29
30 return value;
31}
32} // anonymous namespace
33
35 ROOT::Internal::RPageSource &pageSource, const std::vector<std::string> &joinFieldNames,
36 ROOT::NTupleSize_t entryOffset)
37 : fJoinFieldNames(joinFieldNames)
38{
39 static const std::unordered_set<std::string> allowedTypes = {"std::int8_t", "std::int16_t", "std::int32_t",
40 "std::int64_t", "std::uint8_t", "std::uint16_t",
41 "std::uint32_t", "std::uint64_t"};
42
43 pageSource.Attach();
44 auto desc = pageSource.GetSharedDescriptorGuard();
45
46 std::vector<std::unique_ptr<ROOT::RFieldBase>> fields;
47 std::vector<ROOT::RFieldBase::RValue> fieldValues;
48 fieldValues.reserve(fJoinFieldNames.size());
49
50 for (const auto &fieldName : fJoinFieldNames) {
51 auto fieldId = desc->FindFieldId(fieldName);
52 if (fieldId == ROOT::kInvalidDescriptorId)
53 throw RException(R__FAIL("could not find join field \"" + std::string(fieldName) + "\" in RNTuple \"" +
54 pageSource.GetNTupleName() + "\""));
55
56 const auto &fieldDesc = desc->GetFieldDescriptor(fieldId);
57
58 if (allowedTypes.find(fieldDesc.GetTypeName()) == allowedTypes.end()) {
59 throw RException(R__FAIL("cannot use field \"" + fieldName + "\" with type \"" + fieldDesc.GetTypeName() +
60 "\" in join table: only integral types are allowed"));
61 }
62
63 auto field = fieldDesc.CreateField(desc.GetRef());
65
66 fieldValues.emplace_back(field->CreateValue());
67 fJoinFieldValueSizes.emplace_back(field->GetValueSize());
68 fields.emplace_back(std::move(field));
69 }
70
71 std::vector<JoinValue_t> castJoinValues;
72 castJoinValues.reserve(fJoinFieldNames.size());
73
74 for (unsigned i = 0; i < pageSource.GetNEntries(); ++i) {
75 castJoinValues.clear();
76
77 for (auto &fieldValue : fieldValues) {
78 // TODO(fdegeus): use bulk reading
79 fieldValue.Read(i);
80
81 auto valuePtr = fieldValue.GetPtr<void>();
82 castJoinValues.push_back(CastValuePtr(valuePtr.get(), fieldValue.GetField().GetValueSize()));
83 }
84
85 fMapping[RCombinedJoinFieldValue(castJoinValues)].push_back(i + entryOffset);
86 }
87}
88
89const std::vector<ROOT::NTupleSize_t> *
90ROOT ::Experimental::Internal::RNTupleJoinTable::REntryMapping::GetEntryIndexes(std::vector<void *> valuePtrs) const
91{
92 if (valuePtrs.size() != fJoinFieldNames.size())
93 throw RException(R__FAIL("number of value pointers must match number of join fields"));
94
95 std::vector<JoinValue_t> castJoinValues;
96 castJoinValues.reserve(valuePtrs.size());
97
98 for (unsigned i = 0; i < valuePtrs.size(); ++i) {
99 castJoinValues.push_back(CastValuePtr(valuePtrs[i], fJoinFieldValueSizes[i]));
100 }
101
102 if (const auto &entries = fMapping.find(RCombinedJoinFieldValue(castJoinValues)); entries != fMapping.end()) {
103 return &entries->second;
104 }
105
106 return nullptr;
107}
108
109//------------------------------------------------------------------------------
110
111std::unique_ptr<ROOT::Experimental::Internal::RNTupleJoinTable>
112ROOT::Experimental::Internal::RNTupleJoinTable::Create(const std::vector<std::string> &fieldNames)
113{
114 return std::unique_ptr<RNTupleJoinTable>(new RNTupleJoinTable(fieldNames));
115}
116
119 PartitionKey_t partitionKey, ROOT::NTupleSize_t entryOffset)
120{
121 auto joinMapping = std::unique_ptr<REntryMapping>(new REntryMapping(pageSource, fJoinFieldNames, entryOffset));
122 fPartitions[partitionKey].emplace_back(std::move(joinMapping));
123
124 return *this;
125}
126
128ROOT::Experimental::Internal::RNTupleJoinTable::GetEntryIndex(const std::vector<void *> &valuePtrs) const
129{
130 for (const auto &partition : fPartitions) {
131 for (const auto &joinMapping : partition.second) {
132 auto entriesForMapping = joinMapping->GetEntryIndexes(valuePtrs);
133 if (entriesForMapping) {
134 return (*entriesForMapping)[0];
135 }
136 }
137 }
138
139 return kInvalidNTupleIndex;
140}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
Provides a mapping from one or several join field values to an entry index.
REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector< std::string > &joinFieldNames, ROOT::NTupleSize_t entryOffset=0)
Create a new entry mapping.
std::vector< std::size_t > fJoinFieldValueSizes
The size (in bytes) for each join field, corresponding to fJoinFieldNames.
std::vector< std::string > fJoinFieldNames
Names of the join fields used for the mapping to their respective entry indexes.
std::unordered_map< RCombinedJoinFieldValue, std::vector< ROOT::NTupleSize_t >, RCombinedJoinFieldValueHash > fMapping
The mapping itself.
Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
static std::unique_ptr< RNTupleJoinTable > Create(const std::vector< std::string > &joinFieldNames)
Create an RNTupleJoinTable from an existing RNTuple.
std::vector< std::string > fJoinFieldNames
Names of the join fields used for the mapping to their respective entry indexes.
ROOT::NTupleSize_t GetEntryIndex(const std::vector< void * > &valuePtrs) const
Get an entry index (if it exists) for the given join field value(s), from any partition.
RNTupleJoinTable & Add(ROOT::Internal::RPageSource &pageSource, PartitionKey_t partitionKey=kDefaultPartitionKey, ROOT::NTupleSize_t entryOffset=0)
Add an entry mapping to the join table.
RNTupleJoinTable(const std::vector< std::string > &joinFieldNames)
Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source.
std::unordered_map< PartitionKey_t, std::vector< std::unique_ptr< REntryMapping > > > fPartitions
Partitions of one or multiple entry mappings.
Abstract interface to read data from an ntuple.
void Attach(ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode mode=ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode::kForReading)
Open the physical storage container and deserialize header and footer.
const RSharedDescriptorGuard GetSharedDescriptorGuard() const
Takes the read lock for the descriptor.
ROOT::NTupleSize_t GetNEntries()
const std::string & GetNTupleName() const
Returns the NTuple name.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
for(Int_t i=0;i< n;i++)
Definition legend1.C:18
void CallConnectPageSourceOnField(RFieldBase &, ROOT::Internal::RPageSource &)
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId