Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleInspector.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleInspector.hxx
2/// \author Florine de Geus <florine.de.geus@cern.ch>
3/// \date 2023-01-09
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT7_RNTupleInspector
16#define ROOT7_RNTupleInspector
17
18#include <ROOT/RError.hxx>
20
21#include <TFile.h>
22#include <TH1D.h>
23#include <THStack.h>
24
25#include <cstdlib>
26#include <iostream>
27#include <memory>
28#include <numeric>
29#include <optional>
30#include <regex>
31#include <vector>
32
33namespace ROOT {
34class RNTuple;
35
36namespace Internal {
37class RPageSource;
38} // namespace Internal
39
40namespace Experimental {
41
52
53// clang-format off
54/**
55\class ROOT::Experimental::RNTupleInspector
56\ingroup NTuple
57\brief Inspect on-disk and storage-related information of an RNTuple.
58
59The RNTupleInspector can be used for studying an RNTuple in terms of its storage efficiency. It provides information on
60the level of the RNTuple itself, on the (sub)field level and on the column level.
61
62Example usage:
63
64~~~ {.cpp}
65#include <ROOT/RNTuple.hxx>
66#include <ROOT/RNTupleInspector.hxx>
67
68#include <iostream>
69
70using ROOT::Experimental::RNTupleInspector;
71
72auto file = TFile::Open("data.rntuple");
73auto rntuple = std::unique_ptr<ROOT::RNTuple>(file->Get<RNTuple>("NTupleName"));
74auto inspector = RNTupleInspector::Create(*rntuple);
75
76std::cout << "The compression factor is " << inspector->GetCompressionFactor()
77 << " using compression settings " << inspector->GetCompressionSettingsAsString()
78 << std::endl;
79~~~
80*/
81// clang-format on
83public:
84 /////////////////////////////////////////////////////////////////////////////
85 /// \brief Provides column-level storage information.
86 ///
87 /// The RColumnInspector class provides storage information for an individual column. This information is partly
88 /// collected during the construction of the RNTupleInspector object, and can partly be accessed using the
89 /// RColumnInspector that belongs to this field.
91 private:
93 const std::vector<std::uint64_t> fCompressedPageSizes = {};
94 std::uint32_t fElementSize = 0;
95 std::uint64_t fNElements = 0;
96
97 public:
98 RColumnInspector(const ROOT::RColumnDescriptor &colDesc, const std::vector<std::uint64_t> &compressedPageSizes,
99 std::uint32_t elemSize, std::uint64_t nElems)
100 : fColumnDescriptor(colDesc),
101 fCompressedPageSizes(compressedPageSizes),
102 fElementSize(elemSize),
103 fNElements(nElems){};
104 ~RColumnInspector() = default;
105
107 const std::vector<std::uint64_t> &GetCompressedPageSizes() const { return fCompressedPageSizes; }
108 std::uint64_t GetNPages() const { return fCompressedPageSizes.size(); }
109 std::uint64_t GetCompressedSize() const
110 {
111 return std::accumulate(fCompressedPageSizes.begin(), fCompressedPageSizes.end(),
112 static_cast<std::uint64_t>(0));
113 }
114 std::uint64_t GetUncompressedSize() const { return fElementSize * fNElements; }
115 std::uint64_t GetElementSize() const { return fElementSize; }
116 std::uint64_t GetNElements() const { return fNElements; }
118 };
119
120 /////////////////////////////////////////////////////////////////////////////
121 /// \brief Provides field-level storage information.
122 ///
123 /// The RFieldTreeInspector class provides storage information for a field **and** its subfields. This information is
124 /// partly collected during the construction of the RNTupleInspector object, and can partly be accessed using
125 /// the RFieldDescriptor that belongs to this field.
127 private:
129 std::uint64_t fCompressedSize = 0;
130 std::uint64_t fUncompressedSize = 0;
131
132 public:
133 RFieldTreeInspector(const ROOT::RFieldDescriptor &fieldDesc, std::uint64_t onDiskSize, std::uint64_t inMemSize)
134 : fRootFieldDescriptor(fieldDesc), fCompressedSize(onDiskSize), fUncompressedSize(inMemSize){};
136
138 std::uint64_t GetCompressedSize() const { return fCompressedSize; }
139 std::uint64_t GetUncompressedSize() const { return fUncompressedSize; }
140 };
141
142private:
143 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
145 std::optional<std::uint32_t> fCompressionSettings; ///< The compression settings are unknown for an empty ntuple
146 std::uint64_t fCompressedSize = 0;
147 std::uint64_t fUncompressedSize = 0;
148
149 std::unordered_map<int, RColumnInspector> fColumnInfo;
150 std::unordered_map<int, RFieldTreeInspector> fFieldTreeInfo;
151
152 RNTupleInspector(std::unique_ptr<ROOT::Internal::RPageSource> pageSource);
153
154 /////////////////////////////////////////////////////////////////////////////
155 /// \brief Gather column-level and RNTuple-level information.
156 ///
157 /// \note This method is called when the RNTupleInspector is initially created. This means that anything unexpected
158 /// about the RNTuple itself (e.g. inconsistent compression settings across clusters) will be detected here.
159 /// Therefore, any related exceptions will be thrown on creation of the inspector.
160 void CollectColumnInfo();
161
162 /////////////////////////////////////////////////////////////////////////////
163 /// \brief Recursively gather field-level information.
164 ///
165 /// \param[in] fieldId The ID of the field from which to start the recursive traversal. Typically this is the "zero
166 /// ID", i.e. the logical parent of all top-level fields.
167 ///
168 /// \return The RFieldTreeInspector for the provided field ID.
169 ///
170 /// This method is called when the RNTupleInspector is initially created.
172
173public:
174 RNTupleInspector(const RNTupleInspector &other) = delete;
179
180 /////////////////////////////////////////////////////////////////////////////
181 /// \brief Create a new RNTupleInspector.
182 ///
183 /// \param[in] sourceNTuple A pointer to the RNTuple to be inspected.
184 ///
185 /// \return A pointer to the newly created RNTupleInspector.
186 ///
187 /// \note When this factory method is called, all required static information is collected from the RNTuple's fields
188 /// and underlying columns are collected at ones. This means that when any inconsistencies are encountered (e.g.
189 /// inconsistent compression across clusters), it will throw an error here.
190 static std::unique_ptr<RNTupleInspector> Create(const RNTuple &sourceNTuple);
191
192 /////////////////////////////////////////////////////////////////////////////
193 /// \brief Create a new RNTupleInspector.
194 ///
195 /// \param[in] ntupleName The name of the RNTuple to be inspected.
196 /// \param[in] storage The path or URI to the RNTuple to be inspected.
197 ///
198 /// \see Create(RNTuple *sourceNTuple)
199 static std::unique_ptr<RNTupleInspector> Create(std::string_view ntupleName, std::string_view storage);
200
201 /////////////////////////////////////////////////////////////////////////////
202 /// \brief Get the descriptor for the RNTuple being inspected.
203 ///
204 /// \return A static copy of the ROOT::RNTupleDescriptor belonging to the inspected RNTuple.
206
207 /////////////////////////////////////////////////////////////////////////////
208 /// \brief Get the compression settings of the RNTuple being inspected.
209 ///
210 /// \return The integer representation (\f$algorithm * 10 + level\f$, where \f$algorithm\f$ follows
211 /// ROOT::RCompressionSetting::ELevel::EValues) of the compression settings used for the inspected RNTuple.
212 /// Empty for an empty ntuple.
213 ///
214 /// \note Here, we assume that the compression settings are consistent across all clusters and columns. If this is
215 /// not the case, an exception will be thrown when RNTupleInspector::Create is called.
216 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
217
218 /////////////////////////////////////////////////////////////////////////////
219 /// \brief Get a string describing compression settings of the RNTuple being inspected.
220 ///
221 /// \return A string describing the compression used for the inspected RNTuple. The format of the string is
222 /// `"A (level L)"`, where `A` is the name of the compression algorithm and `L` the compression level.
223 ///
224 /// \note Here, we assume that the compression settings are consistent across all clusters and columns. If this is
225 /// not the case, an exception will be thrown when RNTupleInspector::Create is called.
226 std::string GetCompressionSettingsAsString() const;
227
228 /////////////////////////////////////////////////////////////////////////////
229 /// \brief Get the compressed, on-disk size of the RNTuple being inspected.
230 ///
231 /// \return The compressed size of the inspected RNTuple, in bytes, excluding the size of the header and footer.
232 std::uint64_t GetCompressedSize() const { return fCompressedSize; }
233
234 /////////////////////////////////////////////////////////////////////////////
235 /// \brief Get the uncompressed total size of the RNTuple being inspected.
236 ///
237 /// \return The uncompressed size of the inspected RNTuple, in bytes, excluding the size of the header and footer.
238 std::uint64_t GetUncompressedSize() const { return fUncompressedSize; }
239
240 /////////////////////////////////////////////////////////////////////////////
241 /// \brief Get the compression factor of the RNTuple being inspected.
242 ///
243 /// \return The compression factor of the inspected RNTuple.
244 ///
245 /// The compression factor shows how well the data present in the RNTuple is compressed by the compression settings
246 /// that were used. The compression factor is calculated as \f$size_{uncompressed} / size_{compressed}\f$.
247 float GetCompressionFactor() const { return (float)fUncompressedSize / (float)fCompressedSize; }
248
249 /////////////////////////////////////////////////////////////////////////////
250 /// \brief Get storage information for a given column.
251 ///
252 /// \param[in] physicalColumnId The physical ID of the column for which to get the information.
253 ///
254 /// \return The storage information for the provided column.
255 const RColumnInspector &GetColumnInspector(ROOT::DescriptorId_t physicalColumnId) const;
256
257 /////////////////////////////////////////////////////////////////////////////
258 /// \brief Get the number of columns of a given type present in the RNTuple.
259 ///
260 /// \param[in] colType The column type to count, as defined by ROOT::ENTupleColumnType.
261 ///
262 /// \return The number of columns present in the inspected RNTuple of the provided type.
263 size_t GetColumnCountByType(ROOT::ENTupleColumnType colType) const;
264
265 /////////////////////////////////////////////////////////////////////////////
266 /// \brief Get the IDs of all columns with the given type.
267 ///
268 /// \param[in] colType The column type to collect, as defined by ROOT::ENTupleColumnType.
269 ///
270 /// \return A vector containing the physical IDs of columns of the provided type.
271 std::vector<ROOT::DescriptorId_t> GetColumnsByType(ROOT::ENTupleColumnType colType);
272
273 /////////////////////////////////////////////////////////////////////////////
274 /// \brief Get the columns that make up the given field, including its subfields.
275 ///
276 /// \param [in] fieldId The ID of the field for which to collect the columns.
277 ///
278 /// \return A vector containing the IDs of all columns for the provided field ID.
279 std::vector<ROOT::DescriptorId_t> GetAllColumnsOfField(ROOT::DescriptorId_t fieldId) const;
280
281 /////////////////////////////////////////////////////////////////////////////
282 /// \brief Get all column types present in the RNTuple being inspected.
283 ///
284 /// \return A vector containing all column types present in the RNTuple.
285 std::vector<ROOT::ENTupleColumnType> GetColumnTypes();
286
287 /////////////////////////////////////////////////////////////////////////////
288 /// \brief Print storage information per column type.
289 ///
290 /// \param[in] format Whether to print the information as a (markdown-parseable) table or in CSV format.
291 /// \param[in] output Where to write the output to. Default is `stdout`.
292 ///
293 /// The output includes for each column type its count, the total number of elements, the compressed size and the
294 /// uncompressed size.
295 ///
296 /// **Example: printing the column type information of an RNTuple as a table**
297 /// ~~~ {.cpp}
298 /// #include <ROOT/RNTupleInspector.hxx>
299 /// using ROOT::Experimental::RNTupleInspector;
300 /// using ROOT::Experimental::ENTupleInspectorPrintFormat;
301 ///
302 /// auto inspector = RNTupleInspector::Create("myNTuple", "some/file.root");
303 /// inspector->PrintColumnTypeInfo();
304 /// ~~~
305 /// Output:
306 /// ~~~
307 /// column type | count | # elements | compressed bytes | uncompressed bytes
308 /// ----------------|---------|-----------------|-------------------|--------------------
309 /// SplitIndex64 | 2 | 150 | 72 | 1200
310 /// SplitReal32 | 4 | 300 | 189 | 1200
311 /// SplitUInt32 | 3 | 225 | 123 | 900
312 /// ~~~
313 ///
314 /// **Example: printing the column type information of an RNTuple in CSV format**
315 /// ~~~ {.cpp}
316 /// #include <ROOT/RNTupleInspector.hxx>
317 /// using ROOT::Experimental::RNTupleInspector;
318 /// using ROOT::Experimental::ENTupleInspectorPrintFormat;
319 ///
320 /// auto inspector = RNTupleInspector::Create("myNTuple", "some/file.root");
321 /// inspector->PrintColumnTypeInfo();
322 /// ~~~
323 /// Output:
324 /// ~~~
325 /// columnType,count,nElements,compressedSize,uncompressedSize
326 /// SplitIndex64,2,150,72,1200
327 /// SplitReal32,4,300,189,1200
328 /// SplitUInt32,3,225,123,900
329 /// ~~~
331 std::ostream &output = std::cout);
332
333 /////////////////////////////////////////////////////////////////////////////
334 /// \brief Get a histogram showing information for each column type present,
335 ///
336 /// \param[in] histKind Which type of information should be returned.
337 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
338 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
339 ///
340 /// \return A pointer to a `TH1D` containing the specified kind of information.
341 ///
342 /// Get a histogram showing the count, number of elements, size on disk, or size in memory for each column
343 /// type present in the inspected RNTuple.
344 std::unique_ptr<TH1D> GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName = "",
345 std::string_view histTitle = "");
346
347 /////////////////////////////////////////////////////////////////////////////
348 /// \brief Get a histogram containing the size distribution of the compressed pages for an individual column.
349 ///
350 /// \param[in] physicalColumnId The physical ID of the column for which to get the page size distribution.
351 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
352 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
353 /// \param[in] nBins The desired number of histogram bins.
354 ///
355 /// \return A pointer to a `TH1D` containing the page size distribution.
356 ///
357 /// The x-axis will range from the smallest page size, to the largest (inclusive).
358 std::unique_ptr<TH1D> GetPageSizeDistribution(ROOT::DescriptorId_t physicalColumnId, std::string histName = "",
359 std::string histTitle = "", size_t nBins = 64);
360
361 /////////////////////////////////////////////////////////////////////////////
362 /// \brief Get a histogram containing the size distribution of the compressed pages for all columns of a given type.
363 ///
364 /// \param[in] colType The column type for which to get the size distribution, as defined by ROOT::ENTupleColumnType.
365 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
366 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
367 /// \param[in] nBins The desired number of histogram bins.
368 ///
369 /// \return A pointer to a `TH1D` containing the page size distribution.
370 ///
371 /// The x-axis will range from the smallest page size, to the largest (inclusive).
372 std::unique_ptr<TH1D> GetPageSizeDistribution(ROOT::ENTupleColumnType colType, std::string histName = "",
373 std::string histTitle = "", size_t nBins = 64);
374
375 /////////////////////////////////////////////////////////////////////////////
376 /// \brief Get a histogram containing the size distribution of the compressed pages for a collection columns.
377 ///
378 /// \param[in] colIds The physical IDs of the columns for which to get the page size distribution.
379 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
380 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
381 /// \param[in] nBins The desired number of histogram bins.
382 ///
383 /// \return A pointer to a `TH1D` containing the (cumulative) page size distribution.
384 ///
385 /// The x-axis will range from the smallest page size, to the largest (inclusive).
386 std::unique_ptr<TH1D> GetPageSizeDistribution(std::initializer_list<ROOT::DescriptorId_t> colIds,
387 std::string histName = "", std::string histTitle = "",
388 size_t nBins = 64);
389
390 /////////////////////////////////////////////////////////////////////////////
391 /// \brief Get a histogram containing the size distribution of the compressed pages for all columns of a given list
392 /// of types.
393 ///
394 /// \param[in] colTypes The column types for which to get the size distribution, as defined by
395 /// ROOT::ENTupleColumnType. The default is an empty vector, which indicates that the distribution
396 /// for *all* physical columns will be returned.
397 /// \param[in] histName The name of the histogram. An empty string means a default name will be used. The name of
398 /// each histogram inside the `THStack` will be `histName + colType`.
399 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
400 /// \param[in] nBins The desired number of histogram bins.
401 ///
402 /// \return A pointer to a `THStack` with one histogram for each column type.
403 ///
404 /// The x-axis will range from the smallest page size, to the largest (inclusive).
405 ///
406 /// **Example: Drawing a non-stacked page size distribution with a legend**
407 /// ~~~ {.cpp}
408 /// auto canvas = std::make_unique<TCanvas>();
409 /// auto inspector = RNTupleInspector::Create("myNTuple", "ntuple.root");
410 ///
411 /// // We want to show the page size distributions of columns with type `kSplitReal32` and `kSplitReal64`.
412 /// auto hist = inspector->GetPageSizeDistribution(
413 /// {ROOT::ENTupleColumnType::kSplitReal32, ROOT::ENTupleColumnType::kSplitReal64});
414 /// // The "PLC" option automatically sets the line color for each histogram in the `THStack`.
415 /// // The "NOSTACK" option will draw the histograms on top of each other instead of stacked.
416 /// hist->DrawClone("PLC NOSTACK");
417 /// canvas->BuildLegend(0.7, 0.8, 0.89, 0.89);
418 /// canvas->DrawClone();
419 /// ~~~
420 std::unique_ptr<THStack> GetPageSizeDistribution(std::initializer_list<ROOT::ENTupleColumnType> colTypes = {},
421 std::string histName = "", std::string histTitle = "",
422 size_t nBins = 64);
423
424 /////////////////////////////////////////////////////////////////////////////
425 /// \brief Get storage information for a given (sub)field by ID.
426 ///
427 /// \param[in] fieldId The ID of the (sub)field for which to get the information.
428 ///
429 /// \return The storage information inspector for the provided (sub)field tree.
430 const RFieldTreeInspector &GetFieldTreeInspector(ROOT::DescriptorId_t fieldId) const;
431
432 /////////////////////////////////////////////////////////////////////////////
433 /// \brief Get a storage information inspector for a given (sub)field by name, including its subfields.
434 ///
435 /// \param[in] fieldName The name of the (sub)field for which to get the information.
436 ///
437 /// \return The storage information inspector for the provided (sub)field tree.
438 const RFieldTreeInspector &GetFieldTreeInspector(std::string_view fieldName) const;
439
440 /////////////////////////////////////////////////////////////////////////////
441 /// \brief Get the number of fields of a given type or class present in the RNTuple.
442 ///
443 /// \param[in] typeNamePattern The type or class name to count. May contain regular expression patterns for grouping
444 /// multiple kinds of types or classes.
445 /// \param[in] searchInSubfields If set to `false`, only top-level fields will be considered.
446 ///
447 /// \return The number of fields that matches the provided type.
448 size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields = true) const;
449
450 /////////////////////////////////////////////////////////////////////////////
451 /// \brief Get the number of fields of a given type or class present in the RNTuple.
452 ///
453 /// \see GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields) const
454 size_t GetFieldCountByType(std::string_view typeNamePattern, bool searchInSubfields = true) const
455 {
456 return GetFieldCountByType(std::regex{std::string(typeNamePattern)}, searchInSubfields);
457 }
458
459 /////////////////////////////////////////////////////////////////////////////
460 /// \brief Get the IDs of (sub-)fields whose name matches the given string.
461 ///
462 /// \param[in] fieldNamePattern The name of the field name to get. Because field names are unique by design,
463 /// providing a single field name will return a vector containing just the ID of that field. However, regular
464 /// expression patterns are supported in order to get the IDs of all fields whose name follow a certain structure.
465 /// \param[in] searchInSubfields If set to `false`, only top-level fields will be considered.
466 ///
467 /// \return A vector containing the IDs of fields that match the provided name.
468 std::vector<ROOT::DescriptorId_t>
469 GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields = true) const;
470
471 /////////////////////////////////////////////////////////////////////////////
472 /// \brief Get the IDs of (sub-)fields whose name matches the given string.
473 ///
474 /// \see GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields) const
475 std::vector<ROOT::DescriptorId_t> GetFieldsByName(std::string_view fieldNamePattern, bool searchInSubfields = true)
476 {
477 return GetFieldsByName(std::regex{std::string(fieldNamePattern)}, searchInSubfields);
478 }
479 /////////////////////////////////////////////////////////////////////////////
480 /// \brief Print a .dot string that represents the tree of the (sub)fields of an RNTuple
481 ///
482 /// \param[in] fieldDescriptor The descriptor of the root field (this method works recursively)
483 ///
484
485 void PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output = std::cout) const;
486
487 /////////////////////////////////////////////////////////////////////////////
488 /// \brief Print the tree of all the (sub)fields of an RNTuple
489 /// \param[in] output
490 ///
491 /// \see PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output=std::cout) const
492 void PrintFieldTreeAsDot(std::ostream &output = std::cout) const
493 {
494 PrintFieldTreeAsDot(GetDescriptor().GetFieldZero(), output);
495 }
496};
497} // namespace Experimental
498} // namespace ROOT
499
500#endif // ROOT7_RNTupleInspector
RColumnInspector(const ROOT::RColumnDescriptor &colDesc, const std::vector< std::uint64_t > &compressedPageSizes, std::uint32_t elemSize, std::uint64_t nElems)
const ROOT::RColumnDescriptor & GetDescriptor() const
const std::vector< std::uint64_t > fCompressedPageSizes
const std::vector< std::uint64_t > & GetCompressedPageSizes() const
RFieldTreeInspector(const ROOT::RFieldDescriptor &fieldDesc, std::uint64_t onDiskSize, std::uint64_t inMemSize)
float GetCompressionFactor() const
Get the compression factor of the RNTuple being inspected.
std::vector< ROOT::DescriptorId_t > GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields=true) const
Get the IDs of (sub-)fields whose name matches the given string.
RNTupleInspector & operator=(RNTupleInspector &&other)=delete
const RFieldTreeInspector & GetFieldTreeInspector(ROOT::DescriptorId_t fieldId) const
Get storage information for a given (sub)field by ID.
std::unique_ptr< TH1D > GetPageSizeDistribution(ROOT::DescriptorId_t physicalColumnId, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for an individual column.
const ROOT::RNTupleDescriptor & GetDescriptor() const
Get the descriptor for the RNTuple being inspected.
RNTupleInspector(const RNTupleInspector &other)=delete
std::uint64_t GetCompressedSize() const
Get the compressed, on-disk size of the RNTuple being inspected.
size_t GetColumnCountByType(ROOT::ENTupleColumnType colType) const
Get the number of columns of a given type present in the RNTuple.
std::uint64_t GetUncompressedSize() const
Get the uncompressed total size of the RNTuple being inspected.
void PrintFieldTreeAsDot(std::ostream &output=std::cout) const
Print the tree of all the (sub)fields of an RNTuple.
RNTupleInspector(RNTupleInspector &&other)=delete
std::optional< std::uint32_t > fCompressionSettings
The compression settings are unknown for an empty ntuple.
std::vector< ROOT::ENTupleColumnType > GetColumnTypes()
Get all column types present in the RNTuple being inspected.
size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields=true) const
Get the number of fields of a given type or class present in the RNTuple.
std::vector< ROOT::DescriptorId_t > GetColumnsByType(ROOT::ENTupleColumnType colType)
Get the IDs of all columns with the given type.
std::string GetCompressionSettingsAsString() const
Get a string describing compression settings of the RNTuple being inspected.
RFieldTreeInspector CollectFieldTreeInfo(ROOT::DescriptorId_t fieldId)
Recursively gather field-level information.
RNTupleInspector(std::unique_ptr< ROOT::Internal::RPageSource > pageSource)
size_t GetFieldCountByType(std::string_view typeNamePattern, bool searchInSubfields=true) const
Get the number of fields of a given type or class present in the RNTuple.
void PrintColumnTypeInfo(ENTupleInspectorPrintFormat format=ENTupleInspectorPrintFormat::kTable, std::ostream &output=std::cout)
Print storage information per column type.
std::optional< std::uint32_t > GetCompressionSettings() const
Get the compression settings of the RNTuple being inspected.
const RColumnInspector & GetColumnInspector(ROOT::DescriptorId_t physicalColumnId) const
Get storage information for a given column.
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
RNTupleInspector & operator=(const RNTupleInspector &other)=delete
static std::unique_ptr< RNTupleInspector > Create(const RNTuple &sourceNTuple)
Create a new RNTupleInspector.
std::unordered_map< int, RFieldTreeInspector > fFieldTreeInfo
void CollectColumnInfo()
Gather column-level and RNTuple-level information.
std::unordered_map< int, RColumnInspector > fColumnInfo
void PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output=std::cout) const
Print a .dot string that represents the tree of the (sub)fields of an RNTuple.
std::vector< ROOT::DescriptorId_t > GetFieldsByName(std::string_view fieldNamePattern, bool searchInSubfields=true)
Get the IDs of (sub-)fields whose name matches the given string.
std::vector< ROOT::DescriptorId_t > GetAllColumnsOfField(ROOT::DescriptorId_t fieldId) const
Get the columns that make up the given field, including its subfields.
std::unique_ptr< TH1D > GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName="", std::string_view histTitle="")
Get a histogram showing information for each column type present,.
Abstract interface to read data from an ntuple.
Metadata stored for every column of an RNTuple.
Metadata stored for every field of an RNTuple.
The on-storage metadata of an RNTuple.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
Namespace for ROOT features in testing.
Definition TROOT.h:100
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
ENTupleColumnType