Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleInspector.cxx
Go to the documentation of this file.
1/// \file RNTupleInspector.cxx
2/// \ingroup NTuple ROOT7
3/// \author Florine de Geus <florine.willemijn.de.geus@cern.ch>
4/// \date 2023-01-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RError.hxx>
17#include <ROOT/RNTuple.hxx>
20#include <ROOT/RError.hxx>
21
22#include <TFile.h>
23
24#include <algorithm>
25#include <cstring>
26#include <deque>
27#include <exception>
28#include <iomanip>
29#include <iostream>
30
32 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> pageSource)
33 : fPageSource(std::move(pageSource))
34{
35 fPageSource->Attach();
36 auto descriptorGuard = fPageSource->GetSharedDescriptorGuard();
37 fDescriptor = descriptorGuard->Clone();
38}
39
41{
42 fCompressedSize = 0;
43 fUncompressedSize = 0;
44
45 for (const auto &colDesc : fDescriptor->GetColumnIterable()) {
46 auto colId = colDesc.GetPhysicalId();
47
48 // We generate the default memory representation for the given column type in order
49 // to report the size _in memory_ of column elements.
50 auto colType = colDesc.GetModel().GetType();
51 std::uint32_t elemSize = ROOT::Experimental::Detail::RColumnElementBase::Generate(colType)->GetSize();
52 std::uint64_t nElems = 0;
53 std::uint64_t compressedSize = 0;
54
55 for (const auto &clusterDescriptor : fDescriptor->GetClusterIterable()) {
56 if (!clusterDescriptor.ContainsColumn(colId)) {
57 continue;
58 }
59
60 auto columnRange = clusterDescriptor.GetColumnRange(colId);
61 nElems += columnRange.fNElements;
62
63 if (fCompressionSettings == -1) {
64 fCompressionSettings = columnRange.fCompressionSettings;
65 } else if (fCompressionSettings != columnRange.fCompressionSettings) {
66 // Note that currently all clusters and columns are compressed with the same settings and it is not yet
67 // possible to do otherwise. This measn that currently, this exception should never be thrown, but this
68 // could change in the future.
69 throw RException(R__FAIL("compression setting mismatch between column ranges (" +
70 std::to_string(fCompressionSettings) + " vs " +
71 std::to_string(columnRange.fCompressionSettings) + ")"));
72 }
73
74 const auto &pageRange = clusterDescriptor.GetPageRange(colId);
75
76 for (const auto &page : pageRange.fPageInfos) {
77 compressedSize += page.fLocator.fBytesOnStorage;
78 fCompressedSize += page.fLocator.fBytesOnStorage;
79 fUncompressedSize += page.fNElements * elemSize;
80 }
81 }
82
83 fColumnInfo.emplace(colId, RColumnInfo(colDesc, compressedSize, elemSize, nElems));
84 }
85}
86
89{
90 std::uint64_t compressedSize = 0;
91 std::uint64_t uncompressedSize = 0;
92
93 for (const auto &colDescriptor : fDescriptor->GetColumnIterable(fieldId)) {
94 auto colInfo = GetColumnInfo(colDescriptor.GetPhysicalId());
95 compressedSize += colInfo.GetCompressedSize();
96 uncompressedSize += colInfo.GetUncompressedSize();
97 }
98
99 for (const auto &subFieldDescriptor : fDescriptor->GetFieldIterable(fieldId)) {
100 DescriptorId_t subFieldId = subFieldDescriptor.GetId();
101
102 auto subFieldInfo = CollectFieldTreeInfo(subFieldId);
103
104 compressedSize += subFieldInfo.GetCompressedSize();
105 uncompressedSize += subFieldInfo.GetUncompressedSize();
106 }
107
108 auto fieldInfo = RFieldTreeInfo(fDescriptor->GetFieldDescriptor(fieldId), compressedSize, uncompressedSize);
109 fFieldTreeInfo.emplace(fieldId, fieldInfo);
110 return fieldInfo;
111}
112
113std::vector<ROOT::Experimental::DescriptorId_t>
115{
116 std::vector<DescriptorId_t> colIds;
117 std::deque<DescriptorId_t> fieldIdQueue{fieldId};
118
119 while (!fieldIdQueue.empty()) {
120 auto currId = fieldIdQueue.front();
121 fieldIdQueue.pop_front();
122
123 for (const auto &col : fDescriptor->GetColumnIterable(currId)) {
124 if (col.IsAliasColumn()) {
125 continue;
126 }
127
128 colIds.emplace_back(col.GetPhysicalId());
129 }
130
131 for (const auto &fld : fDescriptor->GetFieldIterable(currId)) {
132 fieldIdQueue.push_back(fld.GetId());
133 }
134 }
135
136 return colIds;
137}
138
139std::unique_ptr<ROOT::Experimental::RNTupleInspector>
140ROOT::Experimental::RNTupleInspector::Create(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> pageSource)
141{
142 auto inspector = std::unique_ptr<RNTupleInspector>(new RNTupleInspector(std::move(pageSource)));
143
144 inspector->CollectColumnInfo();
145 inspector->CollectFieldTreeInfo(inspector->GetDescriptor()->GetFieldZeroId());
146
147 return inspector;
148}
149
150std::unique_ptr<ROOT::Experimental::RNTupleInspector>
152{
153 if (!sourceNTuple) {
154 throw RException(R__FAIL("provided RNTuple is null"));
155 }
156
157 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> pageSource = sourceNTuple->MakePageSource();
158
159 return ROOT::Experimental::RNTupleInspector::Create(std::move(pageSource));
160}
161
162std::unique_ptr<ROOT::Experimental::RNTupleInspector>
163ROOT::Experimental::RNTupleInspector::Create(std::string_view ntupleName, std::string_view sourceFileName)
164{
165 auto sourceFile = std::unique_ptr<TFile>(TFile::Open(std::string(sourceFileName).c_str()));
166 if (!sourceFile || sourceFile->IsZombie()) {
167 throw RException(R__FAIL("cannot open source file " + std::string(sourceFileName)));
168 }
169 auto ntuple = std::unique_ptr<ROOT::Experimental::RNTuple>(
170 sourceFile->Get<ROOT::Experimental::RNTuple>(std::string(ntupleName).c_str()));
171 if (!ntuple) {
172 throw RException(
173 R__FAIL("cannot read RNTuple " + std::string(ntupleName) + " from " + std::string(sourceFileName)));
174 }
175
176 auto inspector = std::unique_ptr<RNTupleInspector>(new RNTupleInspector(ntuple->MakePageSource()));
177 inspector->fSourceFile = std::move(sourceFile);
178
179 inspector->CollectColumnInfo();
180 inspector->CollectFieldTreeInfo(inspector->GetDescriptor()->GetFieldZeroId());
181
182 return inspector;
183}
184
186{
187 int algorithm = fCompressionSettings / 100;
188 int level = fCompressionSettings - (algorithm * 100);
189
191 " (level " + std::to_string(level) + ")";
192}
193
194//------------------------------------------------------------------------------
195
198{
199 if (physicalColumnId > fDescriptor->GetNPhysicalColumns()) {
200 throw RException(R__FAIL("No column with physical ID " + std::to_string(physicalColumnId) + " present"));
201 }
202
203 return fColumnInfo.at(physicalColumnId);
204}
205
207{
208 size_t typeCount = 0;
209
210 for (auto &[colId, colInfo] : fColumnInfo) {
211 if (colInfo.GetType() == colType) {
212 ++typeCount;
213 }
214 }
215
216 return typeCount;
217}
218
219const std::vector<ROOT::Experimental::DescriptorId_t>
221{
222 std::vector<DescriptorId_t> colIds;
223
224 for (const auto &[colId, colInfo] : fColumnInfo) {
225 if (colInfo.GetType() == colType)
226 colIds.emplace_back(colId);
227 }
228
229 return colIds;
230}
231
233{
234 struct ColumnTypeInfo {
235 std::uint32_t count;
236 std::uint64_t nElems, compressedSize, uncompressedSize;
237
238 void operator+=(const RColumnInfo &colInfo)
239 {
240 this->count++;
241 this->nElems += colInfo.GetNElements();
242 this->compressedSize += colInfo.GetCompressedSize();
243 this->uncompressedSize += colInfo.GetUncompressedSize();
244 }
245 };
246
247 std::map<EColumnType, ColumnTypeInfo> colTypeInfo;
248
249 for (const auto &[colId, colInfo] : fColumnInfo) {
250 colTypeInfo[colInfo.GetType()] += colInfo;
251 }
252
253 switch (format) {
255 output << " column type | count | # elements | compressed bytes | uncompressed bytes\n"
256 << "----------------|---------|-----------------|-------------------|--------------------" << std::endl;
257 for (const auto &[colType, typeInfo] : colTypeInfo) {
258 output << std::setw(15) << Detail::RColumnElementBase::GetTypeName(colType) << " |" << std::setw(8)
259 << typeInfo.count << " |" << std::setw(16) << typeInfo.nElems << " |" << std::setw(18)
260 << typeInfo.compressedSize << " |" << std::setw(18) << typeInfo.uncompressedSize << " " << std::endl;
261 }
262 break;
264 output << "columnType,count,nElements,compressedSize,uncompressedSize" << std::endl;
265 for (const auto &[colType, typeInfo] : colTypeInfo) {
266 output << Detail::RColumnElementBase::GetTypeName(colType) << "," << typeInfo.count << "," << typeInfo.nElems
267 << "," << typeInfo.compressedSize << "," << typeInfo.uncompressedSize << std::endl;
268 }
269 break;
270 default: throw RException(R__FAIL("Invalid print format"));
271 }
272}
273
274std::unique_ptr<TH1D>
276 std::string_view histName, std::string_view histTitle)
277{
278 if (histName == "") {
279 switch (histKind) {
280 case ENTupleInspectorHist::kCount: histName = "colTypeCountHist"; break;
281 case ENTupleInspectorHist::kNElems: histName = "colTypeElemCountHist"; break;
282 case ENTupleInspectorHist::kCompressedSize: histName = "colTypeCompSizeHist"; break;
283 case ENTupleInspectorHist::kUncompressedSize: histName = "colTypeUncompSizeHist"; break;
284 default: throw RException(R__FAIL("Unknown histogram type"));
285 }
286 }
287
288 if (histTitle == "") {
289 switch (histKind) {
290 case ENTupleInspectorHist::kCount: histTitle = "Column count by type"; break;
291 case ENTupleInspectorHist::kNElems: histTitle = "Number of elements by column type"; break;
292 case ENTupleInspectorHist::kCompressedSize: histTitle = "Compressed size by column type"; break;
293 case ENTupleInspectorHist::kUncompressedSize: histTitle = "Uncompressed size by column type"; break;
294 default: throw RException(R__FAIL("Unknown histogram type"));
295 }
296 }
297
298 auto hist = std::make_unique<TH1D>(std::string(histName).c_str(), std::string(histTitle).c_str(), 1, 0, 1);
299
300 double data;
301 for (const auto &[colId, colInfo] : fColumnInfo) {
302 switch (histKind) {
303 case ENTupleInspectorHist::kCount: data = 1.; break;
304 case ENTupleInspectorHist::kNElems: data = colInfo.GetNElements(); break;
305 case ENTupleInspectorHist::kCompressedSize: data = colInfo.GetCompressedSize(); break;
306 case ENTupleInspectorHist::kUncompressedSize: data = colInfo.GetUncompressedSize(); break;
307 default: throw RException(R__FAIL("Unknown histogram type"));
308 }
309
310 hist->AddBinContent(hist->GetXaxis()->FindBin(Detail::RColumnElementBase::GetTypeName(colInfo.GetType()).c_str()),
311 data);
312 }
313
314 return hist;
315}
316
317//------------------------------------------------------------------------------
318
321{
322 if (fieldId >= fDescriptor->GetNFields()) {
323 throw RException(R__FAIL("No field with ID " + std::to_string(fieldId) + " present"));
324 }
325
326 return fFieldTreeInfo.at(fieldId);
327}
328
331{
332 DescriptorId_t fieldId = fDescriptor->FindFieldId(fieldName);
333
334 if (fieldId == kInvalidDescriptorId) {
335 throw RException(R__FAIL("Could not find field `" + std::string(fieldName) + "`"));
336 }
337
338 return GetFieldTreeInfo(fieldId);
339}
340
341size_t ROOT::Experimental::RNTupleInspector::GetFieldCountByType(const std::regex &typeNamePattern,
342 bool includeSubFields) const
343{
344 size_t typeCount = 0;
345
346 for (auto &[fldId, fldInfo] : fFieldTreeInfo) {
347 if (!includeSubFields && fldInfo.GetDescriptor().GetParentId() != fDescriptor->GetFieldZeroId()) {
348 continue;
349 }
350
351 if (std::regex_match(fldInfo.GetDescriptor().GetTypeName(), typeNamePattern)) {
352 typeCount++;
353 }
354 }
355
356 return typeCount;
357}
358
359const std::vector<ROOT::Experimental::DescriptorId_t>
360ROOT::Experimental::RNTupleInspector::GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubFields) const
361{
362 std::vector<DescriptorId_t> fieldIds;
363
364 for (auto &[fldId, fldInfo] : fFieldTreeInfo) {
365
366 if (!searchInSubFields && fldInfo.GetDescriptor().GetParentId() != fDescriptor->GetFieldZeroId()) {
367 continue;
368 }
369
370 if (std::regex_match(fldInfo.GetDescriptor().GetFieldName(), fieldNamePattern)) {
371 fieldIds.emplace_back(fldId);
372 }
373 }
374
375 return fieldIds;
376}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:303
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
std::string & operator+=(std::string &left, const TString &right)
Definition TString.h:490
static std::string GetTypeName(EColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
If CppT == void, use the default C++ type for the given column type.
The available trivial, native content types of a column.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Holds column-level storage information.
Inspect on-disk and storage-related information of an RNTuple.
const RFieldTreeInfo & GetFieldTreeInfo(DescriptorId_t fieldId) const
Get storage information for a given (sub)field by ID.
const std::vector< DescriptorId_t > GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubFields=true) const
Get the IDs of (sub-)fields whose name matches the given string.
size_t GetColumnCountByType(EColumnType colType) const
Get the number of columns of a given type present in the RNTuple.
std::string GetCompressionSettingsAsString() const
Get a string describing compression settings of the RNTuple being inspected.
static std::unique_ptr< RNTupleInspector > Create(RNTuple *sourceNTuple)
Create a new RNTupleInspector.
const std::vector< DescriptorId_t > GetColumnsByType(EColumnType colType)
Get the IDs of all columns with the given type.
void PrintColumnTypeInfo(ENTupleInspectorPrintFormat format=ENTupleInspectorPrintFormat::kTable, std::ostream &output=std::cout)
Print storage information per column type.
RFieldTreeInfo CollectFieldTreeInfo(DescriptorId_t fieldId)
Recursively gather field-level information.
std::unique_ptr< RNTupleDescriptor > fDescriptor
std::vector< DescriptorId_t > GetColumnsByFieldId(DescriptorId_t fieldId) const
Get the columns that make up the given field, including its subfields.
RNTupleInspector(std::unique_ptr< Detail::RPageSource > pageSource)
void CollectColumnInfo()
Gather column-level and RNTuple-level information.
size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubFields=true) const
Get the number of fields of a given type or class present in the RNTuple.
const RColumnInfo & GetColumnInfo(DescriptorId_t physicalColumnId) const
Get storage information for a given column.
std::unique_ptr< TH1D > GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName="", std::string_view histTitle="")
Get a histogram showing information for each column type present,.
std::unique_ptr< Detail::RPageSource > fPageSource
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:512
std::unique_ptr< Detail::RPageSource > MakePageSource(const RNTupleReadOptions &options=RNTupleReadOptions())
Create a page source from the RNTuple object.
Definition RNTuple.cxx:383
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4075
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
EValues
Note: this is only temporarily a struct and will become a enum class hence the name.
Definition Compression.h:85
static std::string AlgorithmToString(EAlgorithm::EValues algorithm)
static void output()