Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleDescriptorFmt.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptorFmt.cxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2019-08-25
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
15#include <ROOT/RNTupleTypes.hxx>
16
17#include <algorithm>
18#include <iomanip>
19#include <ostream>
20#include <unordered_map>
21#include <vector>
22
23namespace {
24
25struct ClusterInfo {
26 std::uint64_t fFirstEntry = 0;
27 std::uint32_t fNPages = 0;
28 std::uint32_t fNEntries = 0;
29 std::uint32_t fNBytesOnStorage = 0;
30 std::uint32_t fNBytesInMemory = 0;
31
32 bool operator==(const ClusterInfo &other) const { return fFirstEntry == other.fFirstEntry; }
33
34 bool operator<(const ClusterInfo &other) const { return fFirstEntry < other.fFirstEntry; }
35};
36
37struct ColumnInfo {
38 ROOT::DescriptorId_t fPhysicalColumnId = 0;
39 ROOT::DescriptorId_t fLogicalColumnId = 0;
40 ROOT::DescriptorId_t fFieldId = 0;
41 std::uint64_t fNElements = 0;
42 std::uint64_t fNPages = 0;
43 std::uint64_t fNBytesOnStorage = 0;
44 std::uint32_t fElementSize = 0;
45 std::uint32_t fColumnIndex = 0;
46 std::uint16_t fRepresentationIndex = 0;
48 std::string fFieldName;
49 std::string fFieldDescription;
50
51 bool operator<(const ColumnInfo &other) const
52 {
53 if (fFieldName == other.fFieldName) {
54 if (fRepresentationIndex == other.fRepresentationIndex)
55 return fColumnIndex < other.fColumnIndex;
56 return fRepresentationIndex < other.fRepresentationIndex;
57 }
58 return fFieldName < other.fFieldName;
59 }
60};
61
62std::string GetFieldName(ROOT::DescriptorId_t fieldId, const ROOT::RNTupleDescriptor &ntupleDesc)
63{
64 const auto &fieldDesc = ntupleDesc.GetFieldDescriptor(fieldId);
65 if (fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId)
66 return fieldDesc.GetFieldName();
67 return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) + "." + fieldDesc.GetFieldName();
68}
69
70std::string GetFieldDescription(ROOT::DescriptorId_t fFieldId, const ROOT::RNTupleDescriptor &ntupleDesc)
71{
72 const auto &fieldDesc = ntupleDesc.GetFieldDescriptor(fFieldId);
73 return fieldDesc.GetFieldDescription();
74}
75
76} // anonymous namespace
77
78void ROOT::RNTupleDescriptor::PrintInfo(std::ostream &output) const
79{
80 std::vector<ColumnInfo> columns;
81 std::vector<ClusterInfo> clusters;
82 std::unordered_map<ROOT::DescriptorId_t, unsigned int> cluster2Idx;
83 clusters.reserve(fClusterDescriptors.size());
84 for (const auto &cluster : fClusterDescriptors) {
85 ClusterInfo info;
86 info.fFirstEntry = cluster.second.GetFirstEntryIndex();
87 info.fNEntries = cluster.second.GetNEntries();
88 cluster2Idx[cluster.first] = clusters.size();
89 clusters.emplace_back(info);
90 }
91
92 std::uint64_t nBytesOnStorage = 0;
93 std::uint64_t nBytesInMemory = 0;
94 std::uint64_t nPages = 0;
95 int compression = -1;
96 for (const auto &column : fColumnDescriptors) {
97 // Alias columns (columns of projected fields) don't contribute to the storage consumption. Count them
98 // but don't add the the page sizes to the overall volume.
99 if (column.second.IsAliasColumn())
100 continue;
101
102 // We generate the default memory representation for the given column type in order
103 // to report the size _in memory_ of column elements
104 auto elementSize = ROOT::Internal::RColumnElementBase::Generate(column.second.GetType())->GetSize();
105
106 ColumnInfo info;
107 info.fPhysicalColumnId = column.second.GetPhysicalId();
108 info.fLogicalColumnId = column.second.GetLogicalId();
109 info.fFieldId = column.second.GetFieldId();
110 info.fColumnIndex = column.second.GetIndex();
111 info.fElementSize = elementSize;
112 info.fType = column.second.GetType();
113 info.fRepresentationIndex = column.second.GetRepresentationIndex();
114
115 for (const auto &cluster : fClusterDescriptors) {
116 auto columnRange = cluster.second.GetColumnRange(column.second.GetPhysicalId());
117 if (columnRange.IsSuppressed())
118 continue;
119
120 info.fNElements += columnRange.GetNElements();
121 if (compression == -1 && columnRange.GetCompressionSettings()) {
122 compression = *columnRange.GetCompressionSettings();
123 }
124 const auto &pageRange = cluster.second.GetPageRange(column.second.GetPhysicalId());
125 auto idx = cluster2Idx[cluster.first];
126 for (const auto &page : pageRange.GetPageInfos()) {
127 nBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
128 nBytesInMemory += page.GetNElements() * elementSize;
129 clusters[idx].fNBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
130 clusters[idx].fNBytesInMemory += page.GetNElements() * elementSize;
131 ++clusters[idx].fNPages;
132 info.fNBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
133 ++info.fNPages;
134 ++nPages;
135 }
136 }
137 columns.emplace_back(info);
138 }
139 auto headerSize = GetOnDiskHeaderSize();
140 auto footerSize = GetOnDiskFooterSize();
141 output << "============================================================\n";
142 output << "NTUPLE: " << GetName() << "\n";
143 output << "Compression: " << compression << "\n";
144 output << "------------------------------------------------------------\n";
145 output << " # Entries: " << GetNEntries() << "\n";
146 output << " # Fields: " << GetNFields() << "\n";
147 output << " # Columns: " << GetNPhysicalColumns() << "\n";
148 output << " # Alias Columns: " << GetNLogicalColumns() - GetNPhysicalColumns() << "\n";
149 output << " # Pages: " << nPages << "\n";
150 output << " # Clusters: " << GetNClusters() << "\n";
151 output << " Size on storage: " << nBytesOnStorage << " B" << "\n";
152 output << " Compression rate: " << std::fixed << std::setprecision(2)
153 << float(nBytesInMemory) / float(nBytesOnStorage) << "\n";
154 output << " Header size: " << headerSize << " B"
155 << "\n";
156 output << " Footer size: " << footerSize << " B"
157 << "\n";
158 output << " Metadata / data: " << std::fixed << std::setprecision(3)
159 << float(headerSize + footerSize) / float(nBytesOnStorage) << "\n";
160 output << "------------------------------------------------------------\n";
161 output << "CLUSTER DETAILS\n";
162 output << "------------------------------------------------------------" << std::endl;
163
164 std::sort(clusters.begin(), clusters.end());
165 for (unsigned int i = 0; i < clusters.size(); ++i) {
166 output << " # " << std::setw(5) << i << " Entry range: [" << clusters[i].fFirstEntry << ".."
167 << clusters[i].fFirstEntry + clusters[i].fNEntries - 1 << "] -- " << clusters[i].fNEntries << "\n";
168 output << " " << " # Pages: " << clusters[i].fNPages << "\n";
169 output << " " << " Size on storage: " << clusters[i].fNBytesOnStorage << " B\n";
170 output << " " << " Compression: " << std::fixed << std::setprecision(2)
171 << float(clusters[i].fNBytesInMemory) / float(float(clusters[i].fNBytesOnStorage)) << std::endl;
172 }
173
174 output << "------------------------------------------------------------\n";
175 output << "COLUMN DETAILS\n";
176 output << "------------------------------------------------------------\n";
177 for (auto &col : columns) {
178 col.fFieldName = GetFieldName(col.fFieldId, *this).substr(1);
179 col.fFieldDescription = GetFieldDescription(col.fFieldId, *this);
180 }
181 std::sort(columns.begin(), columns.end());
182 for (const auto &col : columns) {
183 auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fNBytesOnStorage / col.fNPages);
184 auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
185 std::string nameAndType = std::string(" ") + col.fFieldName + " [#" + std::to_string(col.fColumnIndex);
186 if (col.fRepresentationIndex > 0)
187 nameAndType += " / R." + std::to_string(col.fRepresentationIndex);
188 nameAndType += "] -- " + std::string{ROOT::Internal::RColumnElementBase::GetColumnTypeName(col.fType)};
189 std::string id = std::string("{id:") + std::to_string(col.fLogicalColumnId) + "}";
190 if (col.fLogicalColumnId != col.fPhysicalColumnId)
191 id += " --alias--> " + std::to_string(col.fPhysicalColumnId);
192 output << nameAndType << std::setw(60 - nameAndType.length()) << id << "\n";
193 if (!col.fFieldDescription.empty())
194 output << " Description: " << col.fFieldDescription << "\n";
195 output << " # Elements: " << col.fNElements << "\n";
196 output << " # Pages: " << col.fNPages << "\n";
197 output << " Avg elements / page: " << avgElementsPerPage << "\n";
198 output << " Avg page size: " << avgPageSize << " B\n";
199 output << " Size on storage: " << col.fNBytesOnStorage << " B\n";
200 output << " Compression: " << std::fixed << std::setprecision(2)
201 << float(col.fElementSize * col.fNElements) / float(col.fNBytesOnStorage) << "\n";
202 output << "............................................................" << std::endl;
203 }
204}
Bool_t operator<(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:106
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:102
static const char * GetColumnTypeName(ROOT::ENTupleColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(ROOT::ENTupleColumnType type)
If CppT == void, use the default C++ type for the given column type.
const std::string & GetFieldDescription() const
const std::string & GetFieldName() const
The on-storage metadata of an RNTuple.
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
std::uint64_t GetOnDiskFooterSize() const
const std::string & GetName() const
ROOT::NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
std::size_t GetNClusters() const
std::size_t GetNPhysicalColumns() const
void PrintInfo(std::ostream &output) const
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::size_t GetNFields() const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNLogicalColumns() const
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleColumnType