50 fUncompressedSize = 0;
52 for (
const auto &
colDesc : fDescriptor.GetColumnIterable()) {
75 if (!fCompressionSettings &&
columnRange.GetCompressionSettings()) {
76 fCompressionSettings =
columnRange.GetCompressionSettings();
77 }
else if (fCompressionSettings &&
columnRange.GetCompressionSettings() &&
78 (*fCompressionSettings != *
columnRange.GetCompressionSettings())) {
83 std::to_string(*fCompressionSettings) +
" vs " +
84 std::to_string(*
columnRange.GetCompressionSettings()) +
85 ") for column with physical ID " + std::to_string(
colId)));
128std::vector<ROOT::DescriptorId_t>
131 std::vector<ROOT::DescriptorId_t>
colIds;
138 for (
const auto &col : fDescriptor.GetColumnIterable(
currId)) {
139 if (col.IsAliasColumn()) {
143 colIds.emplace_back(col.GetPhysicalId());
146 for (
const auto &
fld : fDescriptor.GetFieldIterable(
currId)) {
154std::unique_ptr<ROOT::Experimental::RNTupleInspector>
161std::unique_ptr<ROOT::Experimental::RNTupleInspector>
170 if (!fCompressionSettings)
173 int algorithm = *fCompressionSettings / 100;
174 int level = *fCompressionSettings - (
algorithm * 100);
177 " (level " + std::to_string(level) +
")";
205std::vector<ROOT::DescriptorId_t>
208 std::vector<ROOT::DescriptorId_t>
colIds;
220 std::set<ROOT::ENTupleColumnType>
colTypes;
236 std::uint32_t count = 0;
241 this->nElems +=
colInfo.GetNElements();
242 this->compressedSize +=
colInfo.GetCompressedSize();
243 this->uncompressedSize +=
colInfo.GetUncompressedSize();
244 this->nPages +=
colInfo.GetNPages();
248 float GetCompressionFactor()
const
256 std::map<ENTupleColumnType, ColumnTypeInfo>
colTypeInfo;
265 output <<
" column type | count | # elements | compressed bytes | uncompressed bytes | compression ratio | "
267 <<
"----------------|---------|-------------|------------------|--------------------|-------------------|-"
271 <<
typeInfo.count <<
" |" << std::setw(12) <<
typeInfo.nElems <<
" |" << std::setw(17)
272 <<
typeInfo.compressedSize <<
" |" << std::setw(19) <<
typeInfo.uncompressedSize <<
" |" << std::fixed
273 << std::setprecision(3) << std::setw(18) <<
typeInfo.GetCompressionFactor() <<
" |" << std::setw(6)
277 output <<
"columnType,count,nElements,compressedSize,uncompressedSize,compressionFactor,nPages\n";
280 <<
"," <<
typeInfo.compressedSize <<
"," <<
typeInfo.uncompressedSize <<
"," << std::fixed
281 << std::setprecision(3) <<
typeInfo.GetCompressionFactor() <<
"," <<
typeInfo.nPages <<
'\n';
284 default:
R__ASSERT(
false &&
"Invalid print format");
290 std::string_view histName, std::string_view
histTitle)
292 if (histName.empty()) {
312 auto hist = std::make_unique<TH1D>(std::string(histName).c_str(), std::string(
histTitle).c_str(), 1, 0, 1);
332 std::string histName, std::string
histTitle,
size_t nBins)
341 std::string histName,
344 if (histName.empty())
353 return std::make_unique<TH1D>(histName.c_str(),
histTitle.c_str(), 64, 0, 0);
355 auto hist = std::unique_ptr<TH1D>(
dynamic_cast<TH1D *
>(
perTypeHist->GetHists()->First()));
357 hist->SetName(histName.c_str());
359 hist->SetXTitle(
"Page size (B)");
360 hist->SetYTitle(
"N_{pages}");
366 std::string histName, std::string
histTitle,
size_t nBins)
368 auto hist = std::make_unique<TH1D>();
370 if (histName.empty())
371 histName =
"pageSizeHist";
372 hist->SetName(histName.c_str());
376 hist->SetXTitle(
"Page size (B)");
377 hist->SetYTitle(
"N_{pages}");
381 auto colInfo = GetColumnInspector(colId);
382 pageSizes.insert(pageSizes.end(), colInfo.GetCompressedPageSizes().begin(),
383 colInfo.GetCompressedPageSizes().end());
399std::unique_ptr<THStack>
401 std::string histName, std::string
histTitle,
size_t nBins)
403 if (histName.empty())
404 histName =
"pageSizeHist";
406 histTitle =
"Per-column type page size distribution";
410 double histMin = std::numeric_limits<double>::max();
412 std::map<ROOT::ENTupleColumnType, std::vector<std::uint64_t>>
pageSizes;
427 auto colInfo = GetColumnInspector(colId);
428 pageSizesForColType.insert(pageSizesForColType.end(), colInfo.GetCompressedPageSizes().begin(),
429 colInfo.GetCompressedPageSizes().end());
442 auto hist = std::make_unique<TH1D>(
462 if (
fieldId >= fDescriptor.GetNFields()) {
466 return fFieldTreeInfo.at(
fieldId);
478 return GetFieldTreeInspector(
fieldId);
499std::vector<ROOT::DescriptorId_t>
502 std::vector<ROOT::DescriptorId_t>
fieldIds;
519 std::ostream &output)
const
524 output <<
"digraph D {\n";
525 output <<
"node[shape=box]\n";
531 auto htmlEscape = [&](
const std::string &
in) -> std::string {
533 out.reserve(
in.size());
534 for (
const char &
c :
in) {
536 case '&': out +=
"&";
break;
537 case '<': out +=
"<";
break;
538 case '>': out +=
">";
break;
539 case '\"': out +=
""";
break;
540 case '\'': out +=
"'";
break;
541 default: out +=
c;
break;
547 output << nodeId <<
"[label=<";
551 output <<
"<b>ID: </b>" << std::to_string(
fieldDescriptor.GetId()) <<
"<br></br>";
555 output <<
"<b>Version: </b>" <<
version <<
"<br></br>";
557 output <<
"<b>RFieldZero</b>";
570struct SpeedscopeFrame {
571 std::string fPrimaryString;
572 std::string fSecondaryString;
573 std::uint64_t fOpeningPosition = 0;
574 std::uint64_t fClosingPosition = 0;
580 output <<
" \"$schema\":\"https://www.speedscope.app/file-format-schema.json\",\n";
581 output <<
" \"shared\":{\n";
582 output <<
" \"frames\":[\n";
584 for (std::size_t i = 0; i <
frames.size(); ++i) {
585 output <<
" { \"name\":\"" <<
frames[i].fPrimaryString
586 <<
"\", \"file\":\"Type: " <<
frames[i].fSecondaryString
587 <<
", Size: " <<
frames[i].fClosingPosition -
frames[i].fOpeningPosition <<
"B\" }"
588 << (i + 1 <
frames.size() ?
",\n" :
"\n");
593 output <<
" \"profiles\":[\n";
595 output <<
" \"type\":\"evented\",\n";
596 output <<
" \"name\":\"Flattened Timeline\",\n";
597 output <<
" \"unit\":\"bytes\",\n";
598 output <<
" \"startValue\":0,\n";
599 output <<
" \"endValue\":" <<
frames.back().fClosingPosition <<
",\n";
600 output <<
" \"events\":[\n";
610 std::uint32_t limit) -> std::size_t {
617 output <<
" {\"type\":\"O\",\"frame\":" <<
currentIdx
623 output <<
",\n {\"type\":\"C\",\"frame\":" <<
currentIdx
647 std::vector<SpeedscopeFrame>
frames;
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
std::string & operator+=(std::string &left, const TString &right)
The available trivial, native content types of a column.
Provides column-level storage information.
Provides field-level storage information.
Inspect on-disk and storage-related information of an RNTuple.
std::vector< ROOT::DescriptorId_t > GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubfields=true) const
Get the IDs of (sub-)fields whose name matches the given string.
const RFieldTreeInspector & GetFieldTreeInspector(ROOT::DescriptorId_t fieldId) const
Get storage information for a given (sub)field by ID.
std::unique_ptr< TH1D > GetPageSizeDistribution(ROOT::DescriptorId_t physicalColumnId, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for an individual column.
size_t GetColumnCountByType(ROOT::ENTupleColumnType colType) const
Get the number of columns of a given type present in the RNTuple.
std::vector< ROOT::ENTupleColumnType > GetColumnTypes()
Get all column types present in the RNTuple being inspected.
void PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output=std::cout) const
Print a string that represents the tree of the (sub)fields and columns of an RNTuple in a format whic...
size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubfields=true) const
Get the number of fields of a given type or class present in the RNTuple.
std::vector< ROOT::DescriptorId_t > GetColumnsByType(ROOT::ENTupleColumnType colType)
Get the IDs of all columns with the given type.
std::string GetCompressionSettingsAsString() const
Get a string describing compression settings of the RNTuple being inspected.
RFieldTreeInspector CollectFieldTreeInfo(ROOT::DescriptorId_t fieldId)
Recursively gather field-level information.
RNTupleInspector(std::unique_ptr< ROOT::Internal::RPageSource > pageSource)
void PrintColumnTypeInfo(ENTupleInspectorPrintFormat format=ENTupleInspectorPrintFormat::kTable, std::ostream &output=std::cout)
Print storage information per column type.
const RColumnInspector & GetColumnInspector(ROOT::DescriptorId_t physicalColumnId) const
Get storage information for a given column.
std::unique_ptr< ROOT::Internal::RPageSource > fPageSource
static std::unique_ptr< RNTupleInspector > Create(const RNTuple &sourceNTuple)
Create a new RNTupleInspector.
void CollectColumnInfo()
Gather column-level and RNTuple-level information.
void PrintFieldTreeAsDot(const ROOT::RFieldDescriptor &fieldDescriptor, std::ostream &output=std::cout) const
Print a .dot string that represents the tree of the (sub)fields of an RNTuple.
std::vector< ROOT::DescriptorId_t > GetAllColumnsOfField(ROOT::DescriptorId_t fieldId) const
Get the columns that make up the given field, including its subfields.
std::unique_ptr< TH1D > GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName="", std::string_view histTitle="")
Get a histogram showing information for each column type present,.
ROOT::RNTupleDescriptor fDescriptor
A column element encapsulates the translation between basic C++ types and their column representation...
static const char * GetColumnTypeName(ROOT::ENTupleColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(ROOT::ENTupleColumnType type)
If CppT == void, use the default C++ type for the given column type.
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
Base class for all ROOT issued exceptions.
Metadata stored for every field of an RNTuple.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
Representation of an RNTuple data set in a ROOT file.
const_iterator begin() const
const_iterator end() const
1-D histogram with a double per channel (see TH1 documentation)
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
@ kSpeedscopeJSON
https://www.speedscope.app/file-format-schema.json
ENTupleInspectorPrintFormat
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
static std::string AlgorithmToString(EAlgorithm::EValues algorithm)