Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RColumnElement.cxx
Go to the documentation of this file.
1/// \file RColumnElement.cxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2019-08-11
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#include "ROOT/RColumn.hxx"
15
16#include "RColumnElement.hxx"
17
18#include <algorithm>
19#include <bitset>
20#include <cassert>
21#include <cstdint>
22#include <memory>
23#include <utility>
24
27
29{
30 switch (type) {
31 case ENTupleColumnType::kIndex64: return std::make_pair(64, 64);
32 case ENTupleColumnType::kIndex32: return std::make_pair(32, 32);
33 case ENTupleColumnType::kSwitch: return std::make_pair(96, 96);
34 case ENTupleColumnType::kByte: return std::make_pair(8, 8);
35 case ENTupleColumnType::kChar: return std::make_pair(8, 8);
36 case ENTupleColumnType::kBit: return std::make_pair(1, 1);
37 case ENTupleColumnType::kReal64: return std::make_pair(64, 64);
38 case ENTupleColumnType::kReal32: return std::make_pair(32, 32);
39 case ENTupleColumnType::kReal16: return std::make_pair(16, 16);
40 case ENTupleColumnType::kInt64: return std::make_pair(64, 64);
41 case ENTupleColumnType::kUInt64: return std::make_pair(64, 64);
42 case ENTupleColumnType::kInt32: return std::make_pair(32, 32);
43 case ENTupleColumnType::kUInt32: return std::make_pair(32, 32);
44 case ENTupleColumnType::kInt16: return std::make_pair(16, 16);
45 case ENTupleColumnType::kUInt16: return std::make_pair(16, 16);
46 case ENTupleColumnType::kInt8: return std::make_pair(8, 8);
47 case ENTupleColumnType::kUInt8: return std::make_pair(8, 8);
48 case ENTupleColumnType::kSplitIndex64: return std::make_pair(64, 64);
49 case ENTupleColumnType::kSplitIndex32: return std::make_pair(32, 32);
50 case ENTupleColumnType::kSplitReal64: return std::make_pair(64, 64);
51 case ENTupleColumnType::kSplitReal32: return std::make_pair(32, 32);
52 case ENTupleColumnType::kSplitInt64: return std::make_pair(64, 64);
53 case ENTupleColumnType::kSplitUInt64: return std::make_pair(64, 64);
54 case ENTupleColumnType::kSplitInt32: return std::make_pair(32, 32);
55 case ENTupleColumnType::kSplitUInt32: return std::make_pair(32, 32);
56 case ENTupleColumnType::kSplitInt16: return std::make_pair(16, 16);
57 case ENTupleColumnType::kSplitUInt16: return std::make_pair(16, 16);
58 case ENTupleColumnType::kReal32Trunc: return std::make_pair(10, 31);
59 case ENTupleColumnType::kReal32Quant: return std::make_pair(1, 32);
60 default:
62 return std::make_pair(32, 32);
63 R__ASSERT(false);
64 }
65 // never here
66 return std::make_pair(0, 0);
67}
68
70{
71 switch (type) {
72 case ENTupleColumnType::kIndex64: return "Index64";
73 case ENTupleColumnType::kIndex32: return "Index32";
74 case ENTupleColumnType::kSwitch: return "Switch";
75 case ENTupleColumnType::kByte: return "Byte";
76 case ENTupleColumnType::kChar: return "Char";
77 case ENTupleColumnType::kBit: return "Bit";
78 case ENTupleColumnType::kReal64: return "Real64";
79 case ENTupleColumnType::kReal32: return "Real32";
80 case ENTupleColumnType::kReal16: return "Real16";
81 case ENTupleColumnType::kInt64: return "Int64";
82 case ENTupleColumnType::kUInt64: return "UInt64";
83 case ENTupleColumnType::kInt32: return "Int32";
84 case ENTupleColumnType::kUInt32: return "UInt32";
85 case ENTupleColumnType::kInt16: return "Int16";
86 case ENTupleColumnType::kUInt16: return "UInt16";
87 case ENTupleColumnType::kInt8: return "Int8";
88 case ENTupleColumnType::kUInt8: return "UInt8";
89 case ENTupleColumnType::kSplitIndex64: return "SplitIndex64";
90 case ENTupleColumnType::kSplitIndex32: return "SplitIndex32";
91 case ENTupleColumnType::kSplitReal64: return "SplitReal64";
92 case ENTupleColumnType::kSplitReal32: return "SplitReal32";
93 case ENTupleColumnType::kSplitInt64: return "SplitInt64";
94 case ENTupleColumnType::kSplitUInt64: return "SplitUInt64";
95 case ENTupleColumnType::kSplitInt32: return "SplitInt32";
96 case ENTupleColumnType::kSplitUInt32: return "SplitUInt32";
97 case ENTupleColumnType::kSplitInt16: return "SplitInt16";
98 case ENTupleColumnType::kSplitUInt16: return "SplitUInt16";
99 case ENTupleColumnType::kReal32Trunc: return "Real32Trunc";
100 case ENTupleColumnType::kReal32Quant: return "Real32Quant";
101 default:
103 return "TestFutureType";
104 return "UNKNOWN";
105 }
106}
107
108template <>
109std::unique_ptr<ROOT::Internal::RColumnElementBase>
111{
112 //clang-format off
113 switch (onDiskType) {
114 case ENTupleColumnType::kIndex64: return std::make_unique<RColumnElement<RColumnIndex, ENTupleColumnType::kIndex64>>();
115 case ENTupleColumnType::kIndex32: return std::make_unique<RColumnElement<RColumnIndex, ENTupleColumnType::kIndex32>>();
116 case ENTupleColumnType::kSwitch: return std::make_unique<RColumnElement<RColumnSwitch, ENTupleColumnType::kSwitch>>();
117 case ENTupleColumnType::kByte: return std::make_unique<RColumnElement<std::byte, ENTupleColumnType::kByte>>();
118 case ENTupleColumnType::kChar: return std::make_unique<RColumnElement<char, ENTupleColumnType::kChar>>();
119 case ENTupleColumnType::kBit: return std::make_unique<RColumnElement<bool, ENTupleColumnType::kBit>>();
120 case ENTupleColumnType::kReal64: return std::make_unique<RColumnElement<double, ENTupleColumnType::kReal64>>();
121 case ENTupleColumnType::kReal32: return std::make_unique<RColumnElement<float, ENTupleColumnType::kReal32>>();
122 // TODO: Change to std::float16_t in-memory type once available (from C++23).
123 case ENTupleColumnType::kReal16: return std::make_unique<RColumnElement<float, ENTupleColumnType::kReal16>>();
124 case ENTupleColumnType::kInt64: return std::make_unique<RColumnElement<std::int64_t, ENTupleColumnType::kInt64>>();
125 case ENTupleColumnType::kUInt64: return std::make_unique<RColumnElement<std::uint64_t, ENTupleColumnType::kUInt64>>();
126 case ENTupleColumnType::kInt32: return std::make_unique<RColumnElement<std::int32_t, ENTupleColumnType::kInt32>>();
127 case ENTupleColumnType::kUInt32: return std::make_unique<RColumnElement<std::uint32_t, ENTupleColumnType::kUInt32>>();
128 case ENTupleColumnType::kInt16: return std::make_unique<RColumnElement<std::int16_t, ENTupleColumnType::kInt16>>();
129 case ENTupleColumnType::kUInt16: return std::make_unique<RColumnElement<std::uint16_t, ENTupleColumnType::kUInt16>>();
130 case ENTupleColumnType::kInt8: return std::make_unique<RColumnElement<std::int8_t, ENTupleColumnType::kInt8>>();
131 case ENTupleColumnType::kUInt8: return std::make_unique<RColumnElement<std::uint8_t, ENTupleColumnType::kUInt8>>();
132 case ENTupleColumnType::kSplitIndex64: return std::make_unique<RColumnElement<RColumnIndex, ENTupleColumnType::kSplitIndex64>>();
133 case ENTupleColumnType::kSplitIndex32: return std::make_unique<RColumnElement<RColumnIndex, ENTupleColumnType::kSplitIndex32>>();
134 case ENTupleColumnType::kSplitReal64: return std::make_unique<RColumnElement<double, ENTupleColumnType::kSplitReal64>>();
135 case ENTupleColumnType::kSplitReal32: return std::make_unique<RColumnElement<float, ENTupleColumnType::kSplitReal32>>();
136 case ENTupleColumnType::kSplitInt64: return std::make_unique<RColumnElement<std::int64_t, ENTupleColumnType::kSplitInt64>>();
137 case ENTupleColumnType::kSplitUInt64: return std::make_unique<RColumnElement<std::uint64_t, ENTupleColumnType::kSplitUInt64>>();
138 case ENTupleColumnType::kSplitInt32: return std::make_unique<RColumnElement<std::int32_t, ENTupleColumnType::kSplitInt32>>();
139 case ENTupleColumnType::kSplitUInt32: return std::make_unique<RColumnElement<std::uint32_t, ENTupleColumnType::kSplitUInt32>>();
140 case ENTupleColumnType::kSplitInt16: return std::make_unique<RColumnElement<std::int16_t, ENTupleColumnType::kSplitInt16>>();
141 case ENTupleColumnType::kSplitUInt16: return std::make_unique<RColumnElement<std::uint16_t, ENTupleColumnType::kSplitUInt16>>();
142 case ENTupleColumnType::kReal32Trunc: return std::make_unique<RColumnElement<float, ENTupleColumnType::kReal32Trunc>>();
143 case ENTupleColumnType::kReal32Quant: return std::make_unique<RColumnElement<float, ENTupleColumnType::kReal32Quant>>();
144 default:
145 if (onDiskType == kTestFutureColumnType)
146 return std::make_unique<RColumnElement<Internal::RTestFutureColumn, kTestFutureColumnType>>();
147 R__ASSERT(false);
148 }
149 //clang-format on
150 // never here
151 return nullptr;
152}
153
154std::unique_ptr<ROOT::Internal::RColumnElementBase>
155ROOT::Internal::GenerateColumnElement(std::type_index inMemoryType, ENTupleColumnType onDiskType)
156{
157 if (inMemoryType == std::type_index(typeid(char))) {
158 return GenerateColumnElementInternal<char>(onDiskType);
159 } else if (inMemoryType == std::type_index(typeid(bool))) {
160 return GenerateColumnElementInternal<bool>(onDiskType);
161 } else if (inMemoryType == std::type_index(typeid(std::byte))) {
162 return GenerateColumnElementInternal<std::byte>(onDiskType);
163 } else if (inMemoryType == std::type_index(typeid(std::uint8_t))) {
164 return GenerateColumnElementInternal<std::uint8_t>(onDiskType);
165 } else if (inMemoryType == std::type_index(typeid(std::uint16_t))) {
166 return GenerateColumnElementInternal<std::uint16_t>(onDiskType);
167 } else if (inMemoryType == std::type_index(typeid(std::uint32_t))) {
168 return GenerateColumnElementInternal<std::uint32_t>(onDiskType);
169 } else if (inMemoryType == std::type_index(typeid(std::uint64_t))) {
170 return GenerateColumnElementInternal<std::uint64_t>(onDiskType);
171 } else if (inMemoryType == std::type_index(typeid(std::int8_t))) {
172 return GenerateColumnElementInternal<std::int8_t>(onDiskType);
173 } else if (inMemoryType == std::type_index(typeid(std::int16_t))) {
174 return GenerateColumnElementInternal<std::int16_t>(onDiskType);
175 } else if (inMemoryType == std::type_index(typeid(std::int32_t))) {
176 return GenerateColumnElementInternal<std::int32_t>(onDiskType);
177 } else if (inMemoryType == std::type_index(typeid(std::int64_t))) {
178 return GenerateColumnElementInternal<std::int64_t>(onDiskType);
179 } else if (inMemoryType == std::type_index(typeid(float))) {
180 return GenerateColumnElementInternal<float>(onDiskType);
181 } else if (inMemoryType == std::type_index(typeid(double))) {
182 return GenerateColumnElementInternal<double>(onDiskType);
183 } else if (inMemoryType == std::type_index(typeid(RColumnIndex))) {
184 return GenerateColumnElementInternal<RColumnIndex>(onDiskType);
185 } else if (inMemoryType == std::type_index(typeid(RColumnSwitch))) {
186 return GenerateColumnElementInternal<RColumnSwitch>(onDiskType);
187 } else if (inMemoryType == std::type_index(typeid(RTestFutureColumn))) {
188 return GenerateColumnElementInternal<RTestFutureColumn>(onDiskType);
189 } else {
190 R__ASSERT(!"Invalid memory type in GenerateColumnElement");
191 }
192 // never here
193 return nullptr;
194}
195
196std::unique_ptr<ROOT::Internal::RColumnElementBase>
201
202void ROOT::Internal::BitPacking::PackBits(void *dst, const void *src, std::size_t count, std::size_t sizeofSrc,
203 std::size_t nDstBits)
204{
205 assert(sizeofSrc <= sizeof(Word_t));
206 assert(0 < nDstBits && nDstBits <= sizeofSrc * 8);
207
208 const unsigned char *srcArray = reinterpret_cast<const unsigned char *>(src);
209 Word_t *dstArray = reinterpret_cast<Word_t *>(dst);
210 Word_t accum = 0;
211 std::size_t bitsUsed = 0;
212 std::size_t dstIdx = 0;
213 for (std::size_t i = 0; i < count; ++i) {
214 Word_t packedWord = 0;
215 memcpy(&packedWord, srcArray + i * sizeofSrc, sizeofSrc);
216 // truncate the LSB of the item
217 packedWord >>= sizeofSrc * 8 - nDstBits;
218
219 const std::size_t bitsRem = kBitsPerWord - bitsUsed;
220 if (bitsRem >= nDstBits) {
221 // append the entire item to the accumulator
222 accum |= (packedWord << bitsUsed);
223 bitsUsed += nDstBits;
224 } else {
225 // chop up the item into its `bitsRem` LSB bits + `nDstBits - bitsRem` MSB bits.
226 // The LSB bits will be saved in the current word and the MSB will be saved in the next one.
227 if (bitsRem > 0) {
228 Word_t packedWordLsb = packedWord;
229 packedWordLsb <<= (kBitsPerWord - bitsRem);
230 packedWordLsb >>= (kBitsPerWord - bitsRem);
231 accum |= (packedWordLsb << bitsUsed);
232 }
233
234 memcpy(&dstArray[dstIdx++], &accum, sizeof(accum));
235 accum = 0;
236 bitsUsed = 0;
237
238 if (bitsRem > 0) {
239 Word_t packedWordMsb = packedWord;
240 packedWordMsb >>= bitsRem;
241 accum |= packedWordMsb;
242 bitsUsed += nDstBits - bitsRem;
243 } else {
244 // we realigned to a word boundary: append the entire item
245 accum = packedWord;
246 bitsUsed += nDstBits;
247 }
248 }
249 }
250
251 if (bitsUsed)
252 memcpy(&dstArray[dstIdx++], &accum, (bitsUsed + 7) / 8);
253
254 [[maybe_unused]] auto expDstCount = (count * nDstBits + kBitsPerWord - 1) / kBitsPerWord;
255 assert(dstIdx == expDstCount);
256}
257
258void ROOT::Internal::BitPacking::UnpackBits(void *dst, const void *src, std::size_t count, std::size_t sizeofDst,
259 std::size_t nSrcBits)
260{
261 assert(sizeofDst <= sizeof(Word_t));
262 assert(0 < nSrcBits && nSrcBits <= sizeofDst * 8);
263
264 unsigned char *dstArray = reinterpret_cast<unsigned char *>(dst);
265 const Word_t *srcArray = reinterpret_cast<const Word_t *>(src);
266 const auto nWordsToLoad = (count * nSrcBits + kBitsPerWord - 1) / kBitsPerWord;
267
268 // bit offset of the next packed item inside the currently loaded word
269 int offInWord = 0;
270 std::size_t dstIdx = 0;
271 Word_t prevWordLsb = 0;
272 std::size_t remBytesToLoad = (count * nSrcBits + 7) / 8;
273 for (std::size_t i = 0; i < nWordsToLoad; ++i) {
274 assert(dstIdx < count);
275
276 // load the next word, containing some packed items
277 Word_t packedBytes = 0;
278 std::size_t bytesLoaded = std::min(remBytesToLoad, sizeof(Word_t));
279 memcpy(&packedBytes, &srcArray[i], bytesLoaded);
280
281 assert(remBytesToLoad >= bytesLoaded);
282 remBytesToLoad -= bytesLoaded;
283
284 // If `offInWord` is negative, it means that the last item was split
285 // across 2 words and we need to recombine it.
286 if (offInWord < 0) {
287 std::size_t nMsb = nSrcBits + offInWord;
288 std::uint32_t msb = packedBytes << (8 * sizeofDst - nMsb);
289 Word_t packedWord = msb | prevWordLsb;
290 prevWordLsb = 0;
291 memcpy(dstArray + dstIdx * sizeofDst, &packedWord, sizeofDst);
292 ++dstIdx;
293 offInWord = nMsb;
294 }
295
296 // isolate each item in the loaded word
297 while (dstIdx < count) {
298 // Check if we need to load a split item or a full one
299 if (offInWord > static_cast<int>(kBitsPerWord - nSrcBits)) {
300 // save the LSB of the next item, next `for` loop will merge them with the MSB in the next word.
301 assert(offInWord <= static_cast<int>(kBitsPerWord));
302 std::size_t nLsbNext = kBitsPerWord - offInWord;
303 if (nLsbNext)
304 prevWordLsb = (packedBytes >> offInWord) << (8 * sizeofDst - nSrcBits);
305 offInWord -= kBitsPerWord;
306 break;
307 }
308
309 Word_t packedWord = packedBytes;
310 assert(nSrcBits + offInWord <= kBitsPerWord);
311 packedWord >>= offInWord;
312 packedWord <<= 8 * sizeofDst - nSrcBits;
313 memcpy(dstArray + dstIdx * sizeofDst, &packedWord, sizeofDst);
314 ++dstIdx;
315 offInWord += nSrcBits;
316 }
317 }
318
319 assert(prevWordLsb == 0);
320 assert(dstIdx == count);
321}
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
The available trivial, native content types of a column.
static const char * GetColumnTypeName(ROOT::ENTupleColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(ROOT::ENTupleColumnType type)
If CppT == void, use the default C++ type for the given column type.
static std::pair< std::uint16_t, std::uint16_t > GetValidBitRange(ROOT::ENTupleColumnType type)
Most types have a fixed on-disk bit width.
The in-memory representation of a 32bit or 64bit on-disk index column.
Holds the index and the tag of a kSwitch column.
void PackBits(void *dst, const void *src, std::size_t count, std::size_t sizeofSrc, std::size_t nDstBits)
Tightly packs count items of size sizeofSrc contained in src into dst using nDstBits per item.
void UnpackBits(void *dst, const void *src, std::size_t count, std::size_t sizeofDst, std::size_t nSrcBits)
Undoes the effect of PackBits.
constexpr std::size_t kBitsPerWord
constexpr ENTupleColumnType kTestFutureColumnType
std::unique_ptr< RColumnElementBase > GenerateColumnElement(std::type_index inMemoryType, ROOT::ENTupleColumnType onDiskType)
ENTupleColumnType
Every concrete RColumnElement type is identified by its on-disk type (column type) and the in-memory ...