Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleInspector.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleInspector.hxx
2/// \ingroup NTuple ROOT7
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2023-01-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleInspector
17#define ROOT7_RNTupleInspector
18
19#include <ROOT/RError.hxx>
21
22#include <TFile.h>
23#include <TH1D.h>
24#include <THStack.h>
25
26#include <cstdlib>
27#include <iostream>
28#include <memory>
29#include <numeric>
30#include <optional>
31#include <regex>
32#include <vector>
33
34namespace ROOT {
35class RNTuple;
36
37namespace Internal {
38class RPageSource;
39} // namespace Internal
40
41namespace Experimental {
42
44 kTable,
45 kCSV
46};
53
54// clang-format off
55/**
56\class ROOT::Experimental::RNTupleInspector
57\ingroup NTuple
58\brief Inspect on-disk and storage-related information of an RNTuple.
59
60The RNTupleInspector can be used for studying an RNTuple in terms of its storage efficiency. It provides information on
61the level of the RNTuple itself, on the (sub)field level and on the column level.
62
63Example usage:
64
65~~~ {.cpp}
66#include <ROOT/RNTuple.hxx>
67#include <ROOT/RNTupleInspector.hxx>
68
69#include <iostream>
70
71using ROOT::Experimental::RNTupleInspector;
72
73auto file = TFile::Open("data.rntuple");
74auto rntuple = std::unique_ptr<ROOT::RNTuple>(file->Get<RNTuple>("NTupleName"));
75auto inspector = RNTupleInspector::Create(*rntuple);
76
77std::cout << "The compression factor is " << inspector->GetCompressionFactor()
78 << " using compression settings " << inspector->GetCompressionSettingsAsString()
79 << std::endl;
80~~~
81*/
82// clang-format on
84public:
85 /////////////////////////////////////////////////////////////////////////////
86 /// \brief Provides column-level storage information.
87 ///
88 /// The RColumnInspector class provides storage information for an individual column. This information is partly
89 /// collected during the construction of the RNTupleInspector object, and can partly be accessed using the
90 /// RColumnInspector that belongs to this field.
92 private:
94 const std::vector<std::uint64_t> fCompressedPageSizes = {};
95 std::uint32_t fElementSize = 0;
96 std::uint64_t fNElements = 0;
97
98 public:
105 ~RColumnInspector() = default;
106
108 const std::vector<std::uint64_t> &GetCompressedPageSizes() const { return fCompressedPageSizes; }
109 std::uint64_t GetNPages() const { return fCompressedPageSizes.size(); }
110 std::uint64_t GetCompressedSize() const
111 {
112 return std::accumulate(fCompressedPageSizes.begin(), fCompressedPageSizes.end(),
113 static_cast<std::uint64_t>(0));
114 }
115 std::uint64_t GetUncompressedSize() const { return fElementSize * fNElements; }
116 std::uint64_t GetElementSize() const { return fElementSize; }
117 std::uint64_t GetNElements() const { return fNElements; }
119 };
120
121 /////////////////////////////////////////////////////////////////////////////
122 /// \brief Provides field-level storage information.
123 ///
124 /// The RFieldTreeInspector class provides storage information for a field **and** its subfields. This information is
125 /// partly collected during the construction of the RNTupleInspector object, and can partly be accessed using
126 /// the RFieldDescriptor that belongs to this field.
142
143private:
144 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
146 std::optional<std::uint32_t> fCompressionSettings; ///< The compression settings are unknown for an empty ntuple
147 std::uint64_t fCompressedSize = 0;
148 std::uint64_t fUncompressedSize = 0;
149
150 std::unordered_map<int, RColumnInspector> fColumnInfo;
151 std::unordered_map<int, RFieldTreeInspector> fFieldTreeInfo;
152
153 RNTupleInspector(std::unique_ptr<ROOT::Internal::RPageSource> pageSource);
154
155 /////////////////////////////////////////////////////////////////////////////
156 /// \brief Gather column-level and RNTuple-level information.
157 ///
158 /// \note This method is called when the RNTupleInspector is initially created. This means that anything unexpected
159 /// about the RNTuple itself (e.g. inconsistent compression settings across clusters) will be detected here.
160 /// Therefore, any related exceptions will be thrown on creation of the inspector.
161 void CollectColumnInfo();
162
163 /////////////////////////////////////////////////////////////////////////////
164 /// \brief Recursively gather field-level information.
165 ///
166 /// \param[in] fieldId The ID of the field from which to start the recursive traversal. Typically this is the "zero
167 /// ID", i.e. the logical parent of all top-level fields.
168 ///
169 /// \return The RFieldTreeInspector for the provided field ID.
170 ///
171 /// This method is called when the RNTupleInspector is initially created.
173
174public:
180
181 /////////////////////////////////////////////////////////////////////////////
182 /// \brief Create a new RNTupleInspector.
183 ///
184 /// \param[in] sourceNTuple A pointer to the RNTuple to be inspected.
185 ///
186 /// \return A pointer to the newly created RNTupleInspector.
187 ///
188 /// \note When this factory method is called, all required static information is collected from the RNTuple's fields
189 /// and underlying columns are collected at ones. This means that when any inconsistencies are encountered (e.g.
190 /// inconsistent compression across clusters), it will throw an error here.
191 static std::unique_ptr<RNTupleInspector> Create(const RNTuple &sourceNTuple);
192
193 /////////////////////////////////////////////////////////////////////////////
194 /// \brief Create a new RNTupleInspector.
195 ///
196 /// \param[in] ntupleName The name of the RNTuple to be inspected.
197 /// \param[in] storage The path or URI to the RNTuple to be inspected.
198 ///
199 /// \see Create(RNTuple *sourceNTuple)
200 static std::unique_ptr<RNTupleInspector> Create(std::string_view ntupleName, std::string_view storage);
201
202 /////////////////////////////////////////////////////////////////////////////
203 /// \brief Get the descriptor for the RNTuple being inspected.
204 ///
205 /// \return A static copy of the ROOT::RNTupleDescriptor belonging to the inspected RNTuple.
207
208 /////////////////////////////////////////////////////////////////////////////
209 /// \brief Get the compression settings of the RNTuple being inspected.
210 ///
211 /// \return The integer representation (\f$algorithm * 10 + level\f$, where \f$algorithm\f$ follows
212 /// ROOT::RCompressionSetting::ELevel::EValues) of the compression settings used for the inspected RNTuple.
213 /// Empty for an empty ntuple.
214 ///
215 /// \note Here, we assume that the compression settings are consistent across all clusters and columns. If this is
216 /// not the case, an exception will be thrown when RNTupleInspector::Create is called.
217 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
218
219 /////////////////////////////////////////////////////////////////////////////
220 /// \brief Get a string describing compression settings of the RNTuple being inspected.
221 ///
222 /// \return A string describing the compression used for the inspected RNTuple. The format of the string is
223 /// `"A (level L)"`, where `A` is the name of the compression algorithm and `L` the compression level.
224 ///
225 /// \note Here, we assume that the compression settings are consistent across all clusters and columns. If this is
226 /// not the case, an exception will be thrown when RNTupleInspector::Create is called.
227 std::string GetCompressionSettingsAsString() const;
228
229 /////////////////////////////////////////////////////////////////////////////
230 /// \brief Get the compressed, on-disk size of the RNTuple being inspected.
231 ///
232 /// \return The compressed size of the inspected RNTuple, in bytes, excluding the size of the header and footer.
233 std::uint64_t GetCompressedSize() const { return fCompressedSize; }
234
235 /////////////////////////////////////////////////////////////////////////////
236 /// \brief Get the uncompressed total size of the RNTuple being inspected.
237 ///
238 /// \return The uncompressed size of the inspected RNTuple, in bytes, excluding the size of the header and footer.
239 std::uint64_t GetUncompressedSize() const { return fUncompressedSize; }
240
241 /////////////////////////////////////////////////////////////////////////////
242 /// \brief Get the compression factor of the RNTuple being inspected.
243 ///
244 /// \return The compression factor of the inspected RNTuple.
245 ///
246 /// The compression factor shows how well the data present in the RNTuple is compressed by the compression settings
247 /// that were used. The compression factor is calculated as \f$size_{uncompressed} / size_{compressed}\f$.
248 float GetCompressionFactor() const { return (float)fUncompressedSize / (float)fCompressedSize; }
249
250 /////////////////////////////////////////////////////////////////////////////
251 /// \brief Get storage information for a given column.
252 ///
253 /// \param[in] physicalColumnId The physical ID of the column for which to get the information.
254 ///
255 /// \return The storage information for the provided column.
256 const RColumnInspector &GetColumnInspector(ROOT::DescriptorId_t physicalColumnId) const;
257
258 /////////////////////////////////////////////////////////////////////////////
259 /// \brief Get the number of columns of a given type present in the RNTuple.
260 ///
261 /// \param[in] colType The column type to count, as defined by ROOT::ENTupleColumnType.
262 ///
263 /// \return The number of columns present in the inspected RNTuple of the provided type.
265
266 /////////////////////////////////////////////////////////////////////////////
267 /// \brief Get the IDs of all columns with the given type.
268 ///
269 /// \param[in] colType The column type to collect, as defined by ROOT::ENTupleColumnType.
270 ///
271 /// \return A vector containing the physical IDs of columns of the provided type.
272 std::vector<ROOT::DescriptorId_t> GetColumnsByType(ROOT::ENTupleColumnType colType);
273
274 /////////////////////////////////////////////////////////////////////////////
275 /// \brief Get the columns that make up the given field, including its subfields.
276 ///
277 /// \param [in] fieldId The ID of the field for which to collect the columns.
278 ///
279 /// \return A vector containing the IDs of all columns for the provided field ID.
280 std::vector<ROOT::DescriptorId_t> GetAllColumnsOfField(ROOT::DescriptorId_t fieldId) const;
281
282 /////////////////////////////////////////////////////////////////////////////
283 /// \brief Get all column types present in the RNTuple being inspected.
284 ///
285 /// \return A vector containing all column types present in the RNTuple.
286 std::vector<ROOT::ENTupleColumnType> GetColumnTypes();
287
288 /////////////////////////////////////////////////////////////////////////////
289 /// \brief Print storage information per column type.
290 ///
291 /// \param[in] format Whether to print the information as a (markdown-parseable) table or in CSV format.
292 /// \param[in] output Where to write the output to. Default is `stdout`.
293 ///
294 /// The output includes for each column type its count, the total number of elements, the compressed size and the
295 /// uncompressed size.
296 ///
297 /// **Example: printing the column type information of an RNTuple as a table**
298 /// ~~~ {.cpp}
299 /// #include <ROOT/RNTupleInspector.hxx>
300 /// using ROOT::Experimental::RNTupleInspector;
301 /// using ROOT::Experimental::ENTupleInspectorPrintFormat;
302 ///
303 /// auto inspector = RNTupleInspector::Create("myNTuple", "some/file.root");
304 /// inspector->PrintColumnTypeInfo();
305 /// ~~~
306 /// Output:
307 /// ~~~
308 /// column type | count | # elements | compressed bytes | uncompressed bytes
309 /// ----------------|---------|-----------------|-------------------|--------------------
310 /// SplitIndex64 | 2 | 150 | 72 | 1200
311 /// SplitReal32 | 4 | 300 | 189 | 1200
312 /// SplitUInt32 | 3 | 225 | 123 | 900
313 /// ~~~
314 ///
315 /// **Example: printing the column type information of an RNTuple in CSV format**
316 /// ~~~ {.cpp}
317 /// #include <ROOT/RNTupleInspector.hxx>
318 /// using ROOT::Experimental::RNTupleInspector;
319 /// using ROOT::Experimental::ENTupleInspectorPrintFormat;
320 ///
321 /// auto inspector = RNTupleInspector::Create("myNTuple", "some/file.root");
322 /// inspector->PrintColumnTypeInfo();
323 /// ~~~
324 /// Output:
325 /// ~~~
326 /// columnType,count,nElements,compressedSize,uncompressedSize
327 /// SplitIndex64,2,150,72,1200
328 /// SplitReal32,4,300,189,1200
329 /// SplitUInt32,3,225,123,900
330 /// ~~~
332 std::ostream &output = std::cout);
333
334 /////////////////////////////////////////////////////////////////////////////
335 /// \brief Get a histogram showing information for each column type present,
336 ///
337 /// \param[in] histKind Which type of information should be returned.
338 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
339 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
340 ///
341 /// \return A pointer to a `TH1D` containing the specified kind of information.
342 ///
343 /// Get a histogram showing the count, number of elements, size on disk, or size in memory for each column
344 /// type present in the inspected RNTuple.
345 std::unique_ptr<TH1D> GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName = "",
346 std::string_view histTitle = "");
347
348 /////////////////////////////////////////////////////////////////////////////
349 /// \brief Get a histogram containing the size distribution of the compressed pages for an individual column.
350 ///
351 /// \param[in] physicalColumnId The physical ID of the column for which to get the page size distribution.
352 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
353 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
354 /// \param[in] nBins The desired number of histogram bins.
355 ///
356 /// \return A pointer to a `TH1D` containing the page size distribution.
357 ///
358 /// The x-axis will range from the smallest page size, to the largest (inclusive).
359 std::unique_ptr<TH1D> GetPageSizeDistribution(ROOT::DescriptorId_t physicalColumnId, std::string histName = "",
360 std::string histTitle = "", size_t nBins = 64);
361
362 /////////////////////////////////////////////////////////////////////////////
363 /// \brief Get a histogram containing the size distribution of the compressed pages for all columns of a given type.
364 ///
365 /// \param[in] colType The column type for which to get the size distribution, as defined by ROOT::ENTupleColumnType.
366 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
367 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
368 /// \param[in] nBins The desired number of histogram bins.
369 ///
370 /// \return A pointer to a `TH1D` containing the page size distribution.
371 ///
372 /// The x-axis will range from the smallest page size, to the largest (inclusive).
373 std::unique_ptr<TH1D> GetPageSizeDistribution(ROOT::ENTupleColumnType colType, std::string histName = "",
374 std::string histTitle = "", size_t nBins = 64);
375
376 /////////////////////////////////////////////////////////////////////////////
377 /// \brief Get a histogram containing the size distribution of the compressed pages for a collection columns.
378 ///
379 /// \param[in] colIds The physical IDs of the columns for which to get the page size distribution.
380 /// \param[in] histName The name of the histogram. An empty string means a default name will be used.
381 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
382 /// \param[in] nBins The desired number of histogram bins.
383 ///
384 /// \return A pointer to a `TH1D` containing the (cumulative) page size distribution.
385 ///
386 /// The x-axis will range from the smallest page size, to the largest (inclusive).
387 std::unique_ptr<TH1D> GetPageSizeDistribution(std::initializer_list<ROOT::DescriptorId_t> colIds,
388 std::string histName = "", std::string histTitle = "",
389 size_t nBins = 64);
390
391 /////////////////////////////////////////////////////////////////////////////
392 /// \brief Get a histogram containing the size distribution of the compressed pages for all columns of a given list
393 /// of types.
394 ///
395 /// \param[in] colTypes The column types for which to get the size distribution, as defined by
396 /// ROOT::ENTupleColumnType. The default is an empty vector, which indicates that the distribution
397 /// for *all* physical columns will be returned.
398 /// \param[in] histName The name of the histogram. An empty string means a default name will be used. The name of
399 /// each histogram inside the `THStack` will be `histName + colType`.
400 /// \param[in] histTitle The title of the histogram. An empty string means a default title will be used.
401 /// \param[in] nBins The desired number of histogram bins.
402 ///
403 /// \return A pointer to a `THStack` with one histogram for each column type.
404 ///
405 /// The x-axis will range from the smallest page size, to the largest (inclusive).
406 ///
407 /// **Example: Drawing a non-stacked page size distribution with a legend**
408 /// ~~~ {.cpp}
409 /// auto canvas = std::make_unique<TCanvas>();
410 /// auto inspector = RNTupleInspector::Create("myNTuple", "ntuple.root");
411 ///
412 /// // We want to show the page size distributions of columns with type `kSplitReal32` and `kSplitReal64`.
413 /// auto hist = inspector->GetPageSizeDistribution(
414 /// {ROOT::ENTupleColumnType::kSplitReal32, ROOT::ENTupleColumnType::kSplitReal64});
415 /// // The "PLC" option automatically sets the line color for each histogram in the `THStack`.
416 /// // The "NOSTACK" option will draw the histograms on top of each other instead of stacked.
417 /// hist->DrawClone("PLC NOSTACK");
418 /// canvas->BuildLegend(0.7, 0.8, 0.89, 0.89);
419 /// canvas->DrawClone();
420 /// ~~~
421 std::unique_ptr<THStack> GetPageSizeDistribution(std::initializer_list<ROOT::ENTupleColumnType> colTypes = {},
422 std::string histName = "", std::string histTitle = "",
423 size_t nBins = 64);
424
425 /////////////////////////////////////////////////////////////////////////////
426 /// \brief Get storage information for a given (sub)field by ID.
427 ///
428 /// \param[in] fieldId The ID of the (sub)field for which to get the information.
429 ///
430 /// \return The storage information inspector for the provided (sub)field tree.
431 const RFieldTreeInspector &GetFieldTreeInspector(ROOT::DescriptorId_t fieldId) const;
432
433 /////////////////////////////////////////////////////////////////////////////
434 /// \brief Get a storage information inspector for a given (sub)field by name, including its subfields.
435 ///
436 /// \param[in] fieldName The name of the (sub)field for which to get the information.
437 ///
438 /// \return The storage information inspector for the provided (sub)field tree.
439 const RFieldTreeInspector &GetFieldTreeInspector(std::string_view fieldName) const;
440
441 /////////////////////////////////////////////////////////////////////////////
442 /// \brief Get the number of fields of a given type or class present in the RNTuple.
443 ///
444 /// \param[in] typeNamePattern The type or class name to count. May contain regular expression patterns for grouping
445 /// multiple kinds of types or classes.
446 /// \param[in] searchInSubfields If set to `false`, only top-level fields will be considered.
447 ///
448 /// \return The number of fields that matches the provided type.
449 size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields = true) const;
450
451 /////////////////////////////////////////////////////////////////////////////
452 /// \brief Get the number of fields of a given type or class present in the RNTuple.
453 ///
454 /// \see GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields) const
455 size_t GetFieldCountByType(std::string_view typeNamePattern, bool searchInSubfields = true) const
456 {
457 return GetFieldCountByType(std::regex{std::string(typeNamePattern)}, searchInSubfields);
458 }
459
460 /////////////////////////////////////////////////////////////////////////////
461 /// \brief Get the IDs of (sub-)fields whose name matches the given string.
462 ///
463 /// \param[in] fieldNamePattern The name of the field name to get. Because field names are unique by design,
464 /// providing a single field name will return a vector containing just the ID of that field. However, regular
465 /// expression patterns are supported in order to get the IDs of all fields whose name follow a certain structure.
466 /// \param[in] searchInSubfields If set to `false`, only top-level fields will be considered.
467 ///
468 /// \return A vector containing the IDs of fields that match the provided name.
469 std::vector<ROOT::DescriptorId_t>
470 GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields = true) const;
471
472 /////////////////////////////////////////////////////////////////////////////
473 /// \brief Get the IDs of (sub-)fields whose name matches the given string.
474 ///
475 /// \see GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields) const
476 std::vector<ROOT::DescriptorId_t> GetFieldsByName(std::string_view fieldNamePattern, bool searchInSubfields = true)
477 {
478 return GetFieldsByName(std::regex{std::string(fieldNamePattern)}, searchInSubfields);
479 }
480 /////////////////////////////////////////////////////////////////////////////
481 /// \brief Print a .dot string that represents the tree of the (sub)fields of an RNTuple
482 ///
483 /// \param[in] fieldDescriptor The descriptor of the root field (this method works recursively)
484 ///
485
486 void PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output = std::cout) const;
487
488 /////////////////////////////////////////////////////////////////////////////
489 /// \brief Print the tree of all the (sub)fields of an RNTuple
490 /// \param[in] output
491 ///
492 /// \see PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output=std::cout) const
493 void PrintFieldTreeAsDot(std::ostream &output = std::cout) const
494 {
495 PrintFieldTreeAsDot(GetDescriptor().GetFieldZero(), output);
496 }
497};
498} // namespace Experimental
499} // namespace ROOT
500
501#endif // ROOT7_RNTupleInspector
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
The available trivial, native content types of a column.
Provides column-level storage information.
RColumnInspector(const ROOT::RColumnDescriptor &colDesc, const std::vector< std::uint64_t > &compressedPageSizes, std::uint32_t elemSize, std::uint64_t nElems)
const ROOT::RColumnDescriptor & GetDescriptor() const
const std::vector< std::uint64_t > fCompressedPageSizes
const std::vector< std::uint64_t > & GetCompressedPageSizes() const
RFieldTreeInspector(const ROOT::RFieldDescriptor &fieldDesc, std::uint64_t onDiskSize, std::uint64_t inMemSize)
Inspect on-disk and storage-related information of an RNTuple.
float GetCompressionFactor() const
Get the compression factor of the RNTuple being inspected.
std::vector< ROOT::DescriptorId_t > GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields=true) const
Get the IDs of (sub-)fields whose name matches the given string.
RNTupleInspector & operator=(RNTupleInspector &&other)=delete
const RFieldTreeInspector & GetFieldTreeInspector(ROOT::DescriptorId_t fieldId) const
Get storage information for a given (sub)field by ID.
std::unique_ptr< TH1D > GetPageSizeDistribution(ROOT::DescriptorId_t physicalColumnId, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for an individual column.
const ROOT::RNTupleDescriptor & GetDescriptor() const
Get the descriptor for the RNTuple being inspected.
RNTupleInspector(const RNTupleInspector &other)=delete
std::uint64_t GetCompressedSize() const
Get the compressed, on-disk size of the RNTuple being inspected.
size_t GetColumnCountByType(ROOT::ENTupleColumnType colType) const
Get the number of columns of a given type present in the RNTuple.
std::uint64_t GetUncompressedSize() const
Get the uncompressed total size of the RNTuple being inspected.
void PrintFieldTreeAsDot(std::ostream &output=std::cout) const
Print the tree of all the (sub)fields of an RNTuple.
RNTupleInspector(RNTupleInspector &&other)=delete
std::optional< std::uint32_t > fCompressionSettings
The compression settings are unknown for an empty ntuple.
std::vector< ROOT::ENTupleColumnType > GetColumnTypes()
Get all column types present in the RNTuple being inspected.
size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields=true) const
Get the number of fields of a given type or class present in the RNTuple.
std::vector< ROOT::DescriptorId_t > GetColumnsByType(ROOT::ENTupleColumnType colType)
Get the IDs of all columns with the given type.
std::string GetCompressionSettingsAsString() const
Get a string describing compression settings of the RNTuple being inspected.
RFieldTreeInspector CollectFieldTreeInfo(ROOT::DescriptorId_t fieldId)
Recursively gather field-level information.
RNTupleInspector(std::unique_ptr< ROOT::Internal::RPageSource > pageSource)
size_t GetFieldCountByType(std::string_view typeNamePattern, bool searchInSubfields=true) const
Get the number of fields of a given type or class present in the RNTuple.
void PrintColumnTypeInfo(ENTupleInspectorPrintFormat format=ENTupleInspectorPrintFormat::kTable, std::ostream &output=std::cout)
Print storage information per column type.
std::optional< std::uint32_t > GetCompressionSettings() const
Get the compression settings of the RNTuple being inspected.
const RColumnInspector & GetColumnInspector(ROOT::DescriptorId_t physicalColumnId) const
Get storage information for a given column.
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
RNTupleInspector & operator=(const RNTupleInspector &other)=delete
static std::unique_ptr< RNTupleInspector > Create(const RNTuple &sourceNTuple)
Create a new RNTupleInspector.
std::unordered_map< int, RFieldTreeInspector > fFieldTreeInfo
void CollectColumnInfo()
Gather column-level and RNTuple-level information.
std::unordered_map< int, RColumnInspector > fColumnInfo
void PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output=std::cout) const
Print a .dot string that represents the tree of the (sub)fields of an RNTuple.
std::vector< ROOT::DescriptorId_t > GetFieldsByName(std::string_view fieldNamePattern, bool searchInSubfields=true)
Get the IDs of (sub-)fields whose name matches the given string.
std::vector< ROOT::DescriptorId_t > GetAllColumnsOfField(ROOT::DescriptorId_t fieldId) const
Get the columns that make up the given field, including its subfields.
std::unique_ptr< TH1D > GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName="", std::string_view histTitle="")
Get a histogram showing information for each column type present,.
Metadata stored for every column of an RNTuple.
Metadata stored for every field of an RNTuple.
The on-storage metadata of an RNTuple.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
const_iterator begin() const
const_iterator end() const
Namespace for new ROOT classes and functions.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
static void output()