Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleInspector.cxx
Go to the documentation of this file.
1/// \file RNTupleInspector.cxx
2/// \author Florine de Geus <florine.willemijn.de.geus@cern.ch>
3/// \date 2023-01-09
4/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
5/// is welcome!
6
7/*************************************************************************
8 * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
16#include <ROOT/RError.hxx>
20
21#include <TFile.h>
22
23#include <algorithm>
24#include <cstring>
25#include <deque>
26#include <exception>
27#include <functional>
28#include <iomanip>
29#include <iostream>
30
32
33ROOT::Experimental::RNTupleInspector::RNTupleInspector(std::unique_ptr<ROOT::Internal::RPageSource> pageSource)
34 : fPageSource(std::move(pageSource))
35{
36 fPageSource->Attach();
37 auto descriptorGuard = fPageSource->GetSharedDescriptorGuard();
39
42}
43
44// NOTE: outlined to avoid including RPageStorage in the header
46
48{
49 fCompressedSize = 0;
50 fUncompressedSize = 0;
51
52 for (const auto &colDesc : fDescriptor.GetColumnIterable()) {
53 if (colDesc.IsAliasColumn())
54 continue;
55
56 auto colId = colDesc.GetPhysicalId();
57
58 // We generate the default memory representation for the given column type in order
59 // to report the size _in memory_ of column elements.
60 std::uint32_t elemSize = RColumnElementBase::Generate(colDesc.GetType())->GetSize();
61 std::uint64_t nElems = 0;
62 std::vector<std::uint64_t> compressedPageSizes{};
63
64 for (const auto &clusterDescriptor : fDescriptor.GetClusterIterable()) {
65 if (!clusterDescriptor.ContainsColumn(colId)) {
66 continue;
67 }
68
69 auto columnRange = clusterDescriptor.GetColumnRange(colId);
70 if (columnRange.IsSuppressed())
71 continue;
72
73 nElems += columnRange.GetNElements();
74
75 if (!fCompressionSettings && columnRange.GetCompressionSettings()) {
76 fCompressionSettings = columnRange.GetCompressionSettings();
77 } else if (fCompressionSettings && columnRange.GetCompressionSettings() &&
78 (*fCompressionSettings != *columnRange.GetCompressionSettings())) {
79 // Note that currently all clusters and columns are compressed with the same settings and it is not yet
80 // possible to do otherwise. This means that currently, this exception should never be thrown, but this
81 // could change in the future.
82 throw RException(R__FAIL("compression setting mismatch between column ranges (" +
83 std::to_string(*fCompressionSettings) + " vs " +
84 std::to_string(*columnRange.GetCompressionSettings()) +
85 ") for column with physical ID " + std::to_string(colId)));
86 }
87
88 const auto &pageRange = clusterDescriptor.GetPageRange(colId);
89
90 for (const auto &page : pageRange.GetPageInfos()) {
91 compressedPageSizes.emplace_back(page.GetLocator().GetNBytesOnStorage());
92 fUncompressedSize += page.GetNElements() * elemSize;
93 }
94 }
95
96 fCompressedSize +=
97 std::accumulate(compressedPageSizes.begin(), compressedPageSizes.end(), static_cast<std::uint64_t>(0));
99 }
100}
101
104{
105 std::uint64_t compressedSize = 0;
106 std::uint64_t uncompressedSize = 0;
107
108 for (const auto &colDescriptor : fDescriptor.GetColumnIterable(fieldId)) {
109 auto colInfo = GetColumnInspector(colDescriptor.GetPhysicalId());
110 compressedSize += colInfo.GetCompressedSize();
111 uncompressedSize += colInfo.GetUncompressedSize();
112 }
113
114 for (const auto &subFieldDescriptor : fDescriptor.GetFieldIterable(fieldId)) {
115 auto subFieldId = subFieldDescriptor.GetId();
116
117 auto subFieldInfo = CollectFieldTreeInfo(subFieldId);
118
119 compressedSize += subFieldInfo.GetCompressedSize();
120 uncompressedSize += subFieldInfo.GetUncompressedSize();
121 }
122
123 auto fieldInfo = RFieldTreeInspector(fDescriptor.GetFieldDescriptor(fieldId), compressedSize, uncompressedSize);
124 fFieldTreeInfo.emplace(fieldId, fieldInfo);
125 return fieldInfo;
126}
127
128std::vector<ROOT::DescriptorId_t>
130{
131 std::vector<ROOT::DescriptorId_t> colIds;
132 std::deque<ROOT::DescriptorId_t> fieldIdQueue{fieldId};
133
134 while (!fieldIdQueue.empty()) {
135 auto currId = fieldIdQueue.front();
136 fieldIdQueue.pop_front();
137
138 for (const auto &col : fDescriptor.GetColumnIterable(currId)) {
139 if (col.IsAliasColumn()) {
140 continue;
141 }
142
143 colIds.emplace_back(col.GetPhysicalId());
144 }
145
146 for (const auto &fld : fDescriptor.GetFieldIterable(currId)) {
147 fieldIdQueue.push_back(fld.GetId());
148 }
149 }
150
151 return colIds;
152}
153
154std::unique_ptr<ROOT::Experimental::RNTupleInspector>
160
161std::unique_ptr<ROOT::Experimental::RNTupleInspector>
163{
165 return std::unique_ptr<RNTupleInspector>(new RNTupleInspector(std::move(pageSource)));
166}
167
169{
170 if (!fCompressionSettings)
171 return "unknown";
172
173 int algorithm = *fCompressionSettings / 100;
174 int level = *fCompressionSettings - (algorithm * 100);
175
177 " (level " + std::to_string(level) + ")";
178}
179
180//------------------------------------------------------------------------------
181
184{
185 if (physicalColumnId > fDescriptor.GetNPhysicalColumns()) {
186 throw RException(R__FAIL("No column with physical ID " + std::to_string(physicalColumnId) + " present"));
187 }
188
189 return fColumnInfo.at(physicalColumnId);
190}
191
193{
194 size_t typeCount = 0;
195
196 for (auto &[colId, colInfo] : fColumnInfo) {
197 if (colInfo.GetType() == colType) {
198 ++typeCount;
199 }
200 }
201
202 return typeCount;
203}
204
205std::vector<ROOT::DescriptorId_t>
207{
208 std::vector<ROOT::DescriptorId_t> colIds;
209
210 for (const auto &[colId, colInfo] : fColumnInfo) {
211 if (colInfo.GetType() == colType)
212 colIds.emplace_back(colId);
213 }
214
215 return colIds;
216}
217
218std::vector<ROOT::ENTupleColumnType> ROOT::Experimental::RNTupleInspector::GetColumnTypes()
219{
220 std::set<ROOT::ENTupleColumnType> colTypes;
221
222 for (const auto &[colId, colInfo] : fColumnInfo) {
223 colTypes.emplace(colInfo.GetType());
224 }
225
226 return std::vector(colTypes.begin(), colTypes.end());
227}
228
230{
231 struct ColumnTypeInfo {
232 std::uint64_t nElems = 0;
233 std::uint64_t compressedSize = 0;
234 std::uint64_t uncompressedSize = 0;
235 std::uint64_t nPages = 0;
236 std::uint32_t count = 0;
237
239 {
240 this->count++;
241 this->nElems += colInfo.GetNElements();
242 this->compressedSize += colInfo.GetCompressedSize();
243 this->uncompressedSize += colInfo.GetUncompressedSize();
244 this->nPages += colInfo.GetNPages();
245 }
246
247 // Helper method to calculate compression factor
248 float GetCompressionFactor() const
249 {
250 if (compressedSize == 0)
251 return 1.0;
252 return static_cast<float>(uncompressedSize) / static_cast<float>(compressedSize);
253 }
254 };
255
256 std::map<ENTupleColumnType, ColumnTypeInfo> colTypeInfo;
257
258 // Collect information for each column
259 for (const auto &[colId, colInfo] : fColumnInfo) {
260 colTypeInfo[colInfo.GetType()] += colInfo;
261 }
262
263 switch (format) {
265 output << " column type | count | # elements | compressed bytes | uncompressed bytes | compression ratio | "
266 "# pages \n"
267 << "----------------|---------|-------------|------------------|--------------------|-------------------|-"
268 "------\n";
269 for (const auto &[colType, typeInfo] : colTypeInfo)
270 output << std::setw(15) << RColumnElementBase::GetColumnTypeName(colType) << " |" << std::setw(8)
271 << typeInfo.count << " |" << std::setw(12) << typeInfo.nElems << " |" << std::setw(17)
272 << typeInfo.compressedSize << " |" << std::setw(19) << typeInfo.uncompressedSize << " |" << std::fixed
273 << std::setprecision(3) << std::setw(18) << typeInfo.GetCompressionFactor() << " |" << std::setw(6)
274 << typeInfo.nPages << " \n";
275 break;
277 output << "columnType,count,nElements,compressedSize,uncompressedSize,compressionFactor,nPages\n";
278 for (const auto &[colType, typeInfo] : colTypeInfo) {
279 output << RColumnElementBase::GetColumnTypeName(colType) << "," << typeInfo.count << "," << typeInfo.nElems
280 << "," << typeInfo.compressedSize << "," << typeInfo.uncompressedSize << "," << std::fixed
281 << std::setprecision(3) << typeInfo.GetCompressionFactor() << "," << typeInfo.nPages << '\n';
282 }
283 break;
284 default: R__ASSERT(false && "Invalid print format");
285 }
286}
287
288std::unique_ptr<TH1D>
290 std::string_view histName, std::string_view histTitle)
291{
292 if (histName.empty()) {
293 switch (histKind) {
294 case ENTupleInspectorHist::kCount: histName = "colTypeCountHist"; break;
295 case ENTupleInspectorHist::kNElems: histName = "colTypeElemCountHist"; break;
296 case ENTupleInspectorHist::kCompressedSize: histName = "colTypeCompSizeHist"; break;
297 case ENTupleInspectorHist::kUncompressedSize: histName = "colTypeUncompSizeHist"; break;
298 default: throw RException(R__FAIL("Unknown histogram type"));
299 }
300 }
301
302 if (histTitle.empty()) {
303 switch (histKind) {
304 case ENTupleInspectorHist::kCount: histTitle = "Column count by type"; break;
305 case ENTupleInspectorHist::kNElems: histTitle = "Number of elements by column type"; break;
306 case ENTupleInspectorHist::kCompressedSize: histTitle = "Compressed size by column type"; break;
307 case ENTupleInspectorHist::kUncompressedSize: histTitle = "Uncompressed size by column type"; break;
308 default: throw RException(R__FAIL("Unknown histogram type"));
309 }
310 }
311
312 auto hist = std::make_unique<TH1D>(std::string(histName).c_str(), std::string(histTitle).c_str(), 1, 0, 1);
313
314 double data;
315 for (const auto &[colId, colInfo] : fColumnInfo) {
316 switch (histKind) {
317 case ENTupleInspectorHist::kCount: data = 1.; break;
318 case ENTupleInspectorHist::kNElems: data = colInfo.GetNElements(); break;
319 case ENTupleInspectorHist::kCompressedSize: data = colInfo.GetCompressedSize(); break;
320 case ENTupleInspectorHist::kUncompressedSize: data = colInfo.GetUncompressedSize(); break;
321 default: throw RException(R__FAIL("Unknown histogram type"));
322 }
323
324 hist->AddBinContent(hist->GetXaxis()->FindBin(RColumnElementBase::GetColumnTypeName(colInfo.GetType())), data);
325 }
326
327 return hist;
328}
329
330std::unique_ptr<TH1D>
332 std::string histName, std::string histTitle, size_t nBins)
333{
334 if (histTitle.empty())
335 histTitle = "Page size distribution for column with ID " + std::to_string(physicalColumnId);
336
337 return GetPageSizeDistribution({physicalColumnId}, histName, histTitle, nBins);
338}
339
341 std::string histName,
342 std::string histTitle, size_t nBins)
343{
344 if (histName.empty())
345 histName = "pageSizeHistCol" + std::string{RColumnElementBase::GetColumnTypeName(colType)};
346 if (histTitle.empty())
347 histTitle =
348 "Page size distribution for columns with type " + std::string{RColumnElementBase::GetColumnTypeName(colType)};
349
350 auto perTypeHist = GetPageSizeDistribution({colType}, histName, histTitle, nBins);
351
352 if (perTypeHist->GetNhists() < 1)
353 return std::make_unique<TH1D>(histName.c_str(), histTitle.c_str(), 64, 0, 0);
354
355 auto hist = std::unique_ptr<TH1D>(dynamic_cast<TH1D *>(perTypeHist->GetHists()->First()));
356
357 hist->SetName(histName.c_str());
358 hist->SetTitle(histTitle.c_str());
359 hist->SetXTitle("Page size (B)");
360 hist->SetYTitle("N_{pages}");
361 return hist;
362}
363
364std::unique_ptr<TH1D>
366 std::string histName, std::string histTitle, size_t nBins)
367{
368 auto hist = std::make_unique<TH1D>();
369
370 if (histName.empty())
371 histName = "pageSizeHist";
372 hist->SetName(histName.c_str());
373 if (histTitle.empty())
374 histTitle = "Page size distribution";
375 hist->SetTitle(histTitle.c_str());
376 hist->SetXTitle("Page size (B)");
377 hist->SetYTitle("N_{pages}");
378
379 std::vector<std::uint64_t> pageSizes;
380 std::for_each(colIds.begin(), colIds.end(), [this, &pageSizes](const auto colId) {
381 auto colInfo = GetColumnInspector(colId);
382 pageSizes.insert(pageSizes.end(), colInfo.GetCompressedPageSizes().begin(),
383 colInfo.GetCompressedPageSizes().end());
384 });
385
386 if (!pageSizes.empty()) {
387 auto histMinMax = std::minmax_element(pageSizes.begin(), pageSizes.end());
388 hist->SetBins(nBins, *histMinMax.first,
389 *histMinMax.second + ((*histMinMax.second - *histMinMax.first) / static_cast<double>(nBins)));
390
391 for (const auto pageSize : pageSizes) {
392 hist->Fill(pageSize);
393 }
394 }
395
396 return hist;
397}
398
399std::unique_ptr<THStack>
400ROOT::Experimental::RNTupleInspector::GetPageSizeDistribution(std::initializer_list<ROOT::ENTupleColumnType> colTypes,
401 std::string histName, std::string histTitle, size_t nBins)
402{
403 if (histName.empty())
404 histName = "pageSizeHist";
405 if (histTitle.empty())
406 histTitle = "Per-column type page size distribution";
407
408 auto stackedHist = std::make_unique<THStack>(histName.c_str(), histTitle.c_str());
409
410 double histMin = std::numeric_limits<double>::max();
411 double histMax = 0;
412 std::map<ROOT::ENTupleColumnType, std::vector<std::uint64_t>> pageSizes;
413
414 std::vector<ROOT::ENTupleColumnType> colTypeVec = colTypes;
415 if (std::empty(colTypes)) {
416 colTypeVec = GetColumnTypes();
417 }
418
419 for (const auto colType : colTypeVec) {
420 auto colIds = GetColumnsByType(colType);
421
422 if (colIds.empty())
423 continue;
424
425 std::vector<std::uint64_t> pageSizesForColType;
426 std::for_each(colIds.cbegin(), colIds.cend(), [this, &pageSizesForColType](const auto colId) {
427 auto colInfo = GetColumnInspector(colId);
428 pageSizesForColType.insert(pageSizesForColType.end(), colInfo.GetCompressedPageSizes().begin(),
429 colInfo.GetCompressedPageSizes().end());
430 });
431 if (pageSizesForColType.empty())
432 continue;
433
435
436 auto histMinMax = std::minmax_element(pageSizesForColType.begin(), pageSizesForColType.end());
437 histMin = std::min(histMin, static_cast<double>(*histMinMax.first));
438 histMax = std::max(histMax, static_cast<double>(*histMinMax.second));
439 }
440
441 for (const auto &[colType, pageSizesForColType] : pageSizes) {
442 auto hist = std::make_unique<TH1D>(
445 histMax + ((histMax - histMin) / static_cast<double>(nBins)));
446
447 for (const auto pageSize : pageSizesForColType) {
448 hist->Fill(pageSize);
449 }
450
451 stackedHist->Add(hist.release());
452 }
453
454 return stackedHist;
455}
456
457//------------------------------------------------------------------------------
458
461{
462 if (fieldId >= fDescriptor.GetNFields()) {
463 throw RException(R__FAIL("No field with ID " + std::to_string(fieldId) + " present"));
464 }
465
466 return fFieldTreeInfo.at(fieldId);
467}
468
471{
472 auto fieldId = fDescriptor.FindFieldId(fieldName);
473
475 throw RException(R__FAIL("Could not find field `" + std::string(fieldName) + "`"));
476 }
477
478 return GetFieldTreeInspector(fieldId);
479}
480
482 bool includeSubfields) const
483{
484 size_t typeCount = 0;
485
486 for (auto &[fldId, fldInfo] : fFieldTreeInfo) {
487 if (!includeSubfields && fldInfo.GetDescriptor().GetParentId() != fDescriptor.GetFieldZeroId()) {
488 continue;
489 }
490
491 if (std::regex_match(fldInfo.GetDescriptor().GetTypeName(), typeNamePattern)) {
492 typeCount++;
493 }
494 }
495
496 return typeCount;
497}
498
499std::vector<ROOT::DescriptorId_t>
501{
502 std::vector<ROOT::DescriptorId_t> fieldIds;
503
504 for (auto &[fldId, fldInfo] : fFieldTreeInfo) {
505
506 if (!searchInSubfields && fldInfo.GetDescriptor().GetParentId() != fDescriptor.GetFieldZeroId()) {
507 continue;
508 }
509
510 if (std::regex_match(fldInfo.GetDescriptor().GetFieldName(), fieldNamePattern)) {
511 fieldIds.emplace_back(fldId);
512 }
513 }
514
515 return fieldIds;
516}
517
519 std::ostream &output) const
520{
521 const auto &tupleDescriptor = GetDescriptor();
522 const bool isZeroField = fieldDescriptor.GetParentId() == ROOT::kInvalidDescriptorId;
523 if (isZeroField) {
524 output << "digraph D {\n";
525 output << "node[shape=box]\n";
526 }
527 const std::string &nodeId = (isZeroField) ? "0" : std::to_string(fieldDescriptor.GetId() + 1);
528 const std::string &description = fieldDescriptor.GetFieldDescription();
529 const std::uint32_t &version = fieldDescriptor.GetFieldVersion();
530
531 auto htmlEscape = [&](const std::string &in) -> std::string {
532 std::string out;
533 out.reserve(in.size());
534 for (const char &c : in) {
535 switch (c) {
536 case '&': out += "&amp;"; break;
537 case '<': out += "&lt;"; break;
538 case '>': out += "&gt;"; break;
539 case '\"': out += "&quot;"; break;
540 case '\'': out += "&#39;"; break;
541 default: out += c; break;
542 }
543 }
544 return out;
545 };
546
547 output << nodeId << "[label=<";
548 if (!isZeroField) {
549 output << "<b>Name: </b>" << htmlEscape(fieldDescriptor.GetFieldName()) << "<br></br>";
550 output << "<b>Type: </b>" << htmlEscape(fieldDescriptor.GetTypeName()) << "<br></br>";
551 output << "<b>ID: </b>" << std::to_string(fieldDescriptor.GetId()) << "<br></br>";
552 if (description != "")
553 output << "<b>Description: </b>" << htmlEscape(description) << "<br></br>";
554 if (version != 0)
555 output << "<b>Version: </b>" << version << "<br></br>";
556 } else
557 output << "<b>RFieldZero</b>";
558 output << ">]\n";
559 for (const auto &childFieldId : fieldDescriptor.GetLinkIds()) {
560 const auto &childFieldDescriptor = tupleDescriptor.GetFieldDescriptor(childFieldId);
561 output << nodeId + "->" + std::to_string(childFieldDescriptor.GetId() + 1) + "\n";
562 PrintFieldTreeAsDot(childFieldDescriptor, output);
563 }
564 if (isZeroField)
565 output << "}";
566}
567
568namespace {
569
570struct SpeedscopeFrame {
571 std::string fPrimaryString;
572 std::string fSecondaryString;
573 std::uint64_t fOpeningPosition = 0;
574 std::uint64_t fClosingPosition = 0;
575};
576
577static void PrintSpeedscopeFrames(const std::vector<SpeedscopeFrame> &frames, std::ostream &output)
578{
579 output << "{\n";
580 output << " \"$schema\":\"https://www.speedscope.app/file-format-schema.json\",\n";
581 output << " \"shared\":{\n";
582 output << " \"frames\":[\n";
583
584 for (std::size_t i = 0; i < frames.size(); ++i) {
585 output << " { \"name\":\"" << frames[i].fPrimaryString
586 << "\", \"file\":\"Type: " << frames[i].fSecondaryString
587 << ", Size: " << frames[i].fClosingPosition - frames[i].fOpeningPosition << "B\" }"
588 << (i + 1 < frames.size() ? ",\n" : "\n");
589 }
590
591 output << " ]\n";
592 output << " },\n";
593 output << " \"profiles\":[\n";
594 output << " {\n";
595 output << " \"type\":\"evented\",\n";
596 output << " \"name\":\"Flattened Timeline\",\n";
597 output << " \"unit\":\"bytes\",\n";
598 output << " \"startValue\":0,\n";
599 output << " \"endValue\":" << frames.back().fClosingPosition << ",\n";
600 output << " \"events\":[\n";
601
602 bool first = true;
603
604 // Parameter idx Index of the frame being processed
605 // Parameter limit
606 // - If the frame is not root: Closing Position of its father
607 // - If the frame is root: Closing Position of the last element of frames
608 // Returns the next index to be processed
609 std::function<std::size_t(std::size_t, std::uint32_t)> processRecursive = [&](std::size_t nextIdxToProcess,
610 std::uint32_t limit) -> std::size_t {
611 while (nextIdxToProcess < frames.size() && frames[nextIdxToProcess].fOpeningPosition < limit) {
612 const std::size_t currentIdx = nextIdxToProcess;
613
614 if (!first)
615 output << ",\n";
616
617 output << " {\"type\":\"O\",\"frame\":" << currentIdx
618 << ",\"at\":" << frames[currentIdx].fOpeningPosition << "}";
619 first = false;
620
622
623 output << ",\n {\"type\":\"C\",\"frame\":" << currentIdx
624 << ",\"at\":" << frames[currentIdx].fClosingPosition << "}";
625 }
626 return nextIdxToProcess;
627 };
628
629 processRecursive(0, frames.back().fClosingPosition);
630
631 output << "\n ]\n";
632 output << " }\n";
633 output << " ]\n";
634 output << "}\n";
635}
636} // namespace
637
639{
640 // There is only one format at the moment
642
643 const auto &tupleDescriptor = GetDescriptor();
645 const auto &rootFieldDescriptor = tupleDescriptor.GetFieldDescriptor(rootId);
646
647 std::vector<SpeedscopeFrame> frames;
648 std::uint64_t positionCursor = 0;
649
650 // Returns size of the visited field
651 auto visitFieldsRecursive = [&](auto &self, const ROOT::RFieldDescriptor &fieldDescriptor) -> std::size_t {
652 SpeedscopeFrame fieldSpeedscopeFrame;
653 fieldSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId());
654 fieldSpeedscopeFrame.fSecondaryString = fieldDescriptor.GetTypeName();
655 fieldSpeedscopeFrame.fOpeningPosition = positionCursor;
656 frames.push_back(fieldSpeedscopeFrame);
657
658 const std::size_t fieldSpeedscopeFrameIndex = frames.size() - 1;
659
660 std::size_t subTreeSize = 0;
661 const auto &childIds = fieldDescriptor.GetLinkIds();
662
663 for (const auto &childFieldId : childIds) {
664 const auto &childFieldDescriptor = tupleDescriptor.GetFieldDescriptor(childFieldId);
666 }
667
668 for (const auto &columnDescriptor : tupleDescriptor.GetColumnIterable(fieldDescriptor.GetId())) {
669 const auto &columnInfo = GetColumnInspector(columnDescriptor.GetPhysicalId());
670 std::size_t columnSize = columnInfo.GetCompressedSize();
671
672 SpeedscopeFrame columnSpeedscopeFrame;
673 columnSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()) +
674 " [col#" + std::to_string(columnDescriptor.GetPhysicalId()) + "]";
675 columnSpeedscopeFrame.fSecondaryString =
677 columnSpeedscopeFrame.fOpeningPosition = positionCursor;
679 columnSpeedscopeFrame.fClosingPosition = positionCursor;
680 frames.push_back(columnSpeedscopeFrame);
682 }
683
685
686 return subTreeSize;
687 };
688
690
692}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:322
#define c(i)
Definition RSha256.hxx:101
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
std::string & operator+=(std::string &left, const TString &right)
Definition TString.h:497
The available trivial, native content types of a column.
Provides column-level storage information.
Inspect on-disk and storage-related information of an RNTuple.
std::vector< ROOT::DescriptorId_t > GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields=true) const
Get the IDs of (sub-)fields whose name matches the given string.
const RFieldTreeInspector & GetFieldTreeInspector(ROOT::DescriptorId_t fieldId) const
Get storage information for a given (sub)field by ID.
std::unique_ptr< TH1D > GetPageSizeDistribution(ROOT::DescriptorId_t physicalColumnId, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for an individual column.
size_t GetColumnCountByType(ROOT::ENTupleColumnType colType) const
Get the number of columns of a given type present in the RNTuple.
std::vector< ROOT::ENTupleColumnType > GetColumnTypes()
Get all column types present in the RNTuple being inspected.
void PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output=std::cout) const
Print a string that represents the tree of the (sub)fields and columns of an RNTuple in a format whic...
size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields=true) const
Get the number of fields of a given type or class present in the RNTuple.
std::vector< ROOT::DescriptorId_t > GetColumnsByType(ROOT::ENTupleColumnType colType)
Get the IDs of all columns with the given type.
std::string GetCompressionSettingsAsString() const
Get a string describing compression settings of the RNTuple being inspected.
RFieldTreeInspector CollectFieldTreeInfo(ROOT::DescriptorId_t fieldId)
Recursively gather field-level information.
RNTupleInspector(std::unique_ptr< ROOT::Internal::RPageSource > pageSource)
void PrintColumnTypeInfo(ENTupleInspectorPrintFormat format=ENTupleInspectorPrintFormat::kTable, std::ostream &output=std::cout)
Print storage information per column type.
const RColumnInspector & GetColumnInspector(ROOT::DescriptorId_t physicalColumnId) const
Get storage information for a given column.
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
static std::unique_ptr< RNTupleInspector > Create(const RNTuple &sourceNTuple)
Create a new RNTupleInspector.
void CollectColumnInfo()
Gather column-level and RNTuple-level information.
void PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output=std::cout) const
Print a .dot string that represents the tree of the (sub)fields of an RNTuple.
std::vector< ROOT::DescriptorId_t > GetAllColumnsOfField(ROOT::DescriptorId_t fieldId) const
Get the columns that make up the given field, including its subfields.
std::unique_ptr< TH1D > GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName="", std::string_view histTitle="")
Get a histogram showing information for each column type present,.
A column element encapsulates the translation between basic C++ types and their column representation...
static const char * GetColumnTypeName(ROOT::ENTupleColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(ROOT::ENTupleColumnType type)
If CppT == void, use the default C++ type for the given column type.
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Metadata stored for every field of an RNTuple.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
const_iterator begin() const
const_iterator end() const
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:926
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2459
@ kSpeedscopeJSON
https://www.speedscope.app/file-format-schema.json
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
static std::string AlgorithmToString(EAlgorithm::EValues algorithm)