Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptorFmt.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptorFmt.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-08-25
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
18#include <ROOT/RNTupleUtil.hxx>
19
20#include <algorithm>
21#include <iomanip>
22#include <ostream>
23#include <unordered_map>
24#include <vector>
25
26namespace {
27
28struct ClusterInfo {
29 std::uint64_t fFirstEntry = 0;
30 std::uint32_t fNPages = 0;
31 std::uint32_t fNEntries = 0;
32 std::uint32_t fBytesOnStorage = 0;
33 std::uint32_t fBytesInMemory = 0;
34
35 bool operator==(const ClusterInfo &other) const { return fFirstEntry == other.fFirstEntry; }
36
37 bool operator<(const ClusterInfo &other) const { return fFirstEntry < other.fFirstEntry; }
38};
39
40struct ColumnInfo {
41 ROOT::Experimental::DescriptorId_t fPhysicalColumnId = 0;
42 ROOT::Experimental::DescriptorId_t fLogicalColumnId = 0;
44 std::uint64_t fNElements = 0;
45 std::uint64_t fNPages = 0;
46 std::uint64_t fBytesOnStorage = 0;
47 std::uint32_t fElementSize = 0;
48 std::uint32_t fColumnIndex = 0;
49 std::uint16_t fRepresentationIndex = 0;
51 std::string fFieldName;
52 std::string fFieldDescription;
53
54 bool operator<(const ColumnInfo &other) const
55 {
56 if (fFieldName == other.fFieldName) {
57 if (fRepresentationIndex == other.fRepresentationIndex)
58 return fColumnIndex < other.fColumnIndex;
59 return fRepresentationIndex < other.fRepresentationIndex;
60 }
61 return fFieldName < other.fFieldName;
62 }
63};
64
65std::string
67{
68 const auto &fieldDesc = ntupleDesc.GetFieldDescriptor(fieldId);
69 if (fieldDesc.GetParentId() == ROOT::Experimental::kInvalidDescriptorId)
70 return fieldDesc.GetFieldName();
71 return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) + "." + fieldDesc.GetFieldName();
72}
73
74std::string GetFieldDescription(ROOT::Experimental::DescriptorId_t fFieldId,
76{
77 const auto &fieldDesc = ntupleDesc.GetFieldDescriptor(fFieldId);
78 return fieldDesc.GetFieldDescription();
79}
80
81} // anonymous namespace
82
84{
85 std::vector<ColumnInfo> columns;
86 std::vector<ClusterInfo> clusters;
87 std::unordered_map<DescriptorId_t, unsigned int> cluster2Idx;
88 clusters.reserve(fClusterDescriptors.size());
89 for (const auto &cluster : fClusterDescriptors) {
90 ClusterInfo info;
91 info.fFirstEntry = cluster.second.GetFirstEntryIndex();
92 info.fNEntries = cluster.second.GetNEntries();
93 cluster2Idx[cluster.first] = clusters.size();
94 clusters.emplace_back(info);
95 }
96
97 std::uint64_t bytesOnStorage = 0;
98 std::uint64_t bytesInMemory = 0;
99 std::uint64_t nPages = 0;
100 int compression = -1;
101 for (const auto &column : fColumnDescriptors) {
102 // Alias columns (columns of projected fields) don't contribute to the storage consumption. Count them
103 // but don't add the the page sizes to the overall volume.
104 if (column.second.IsAliasColumn())
105 continue;
106
107 // We generate the default memory representation for the given column type in order
108 // to report the size _in memory_ of column elements
109 auto elementSize = Internal::RColumnElementBase::Generate(column.second.GetType())->GetSize();
110
111 ColumnInfo info;
112 info.fPhysicalColumnId = column.second.GetPhysicalId();
113 info.fLogicalColumnId = column.second.GetLogicalId();
114 info.fFieldId = column.second.GetFieldId();
115 info.fColumnIndex = column.second.GetIndex();
116 info.fElementSize = elementSize;
117 info.fType = column.second.GetType();
118 info.fRepresentationIndex = column.second.GetRepresentationIndex();
119
120 for (const auto &cluster : fClusterDescriptors) {
121 auto columnRange = cluster.second.GetColumnRange(column.second.GetPhysicalId());
122 if (columnRange.fIsSuppressed)
123 continue;
124
125 info.fNElements += columnRange.fNElements;
126 if (compression == -1) {
127 compression = columnRange.fCompressionSettings;
128 }
129 const auto &pageRange = cluster.second.GetPageRange(column.second.GetPhysicalId());
130 auto idx = cluster2Idx[cluster.first];
131 for (const auto &page : pageRange.fPageInfos) {
132 bytesOnStorage += page.fLocator.fBytesOnStorage;
133 bytesInMemory += page.fNElements * elementSize;
134 clusters[idx].fBytesOnStorage += page.fLocator.fBytesOnStorage;
135 clusters[idx].fBytesInMemory += page.fNElements * elementSize;
136 ++clusters[idx].fNPages;
137 info.fBytesOnStorage += page.fLocator.fBytesOnStorage;
138 ++info.fNPages;
139 ++nPages;
140 }
141 }
142 columns.emplace_back(info);
143 }
144 auto headerSize = GetOnDiskHeaderSize();
145 auto footerSize = GetOnDiskFooterSize();
146 output << "============================================================\n";
147 output << "NTUPLE: " << GetName() << "\n";
148 output << "Compression: " << compression << "\n";
149 output << "------------------------------------------------------------\n";
150 output << " # Entries: " << GetNEntries() << "\n";
151 output << " # Fields: " << GetNFields() << "\n";
152 output << " # Columns: " << GetNPhysicalColumns() << "\n";
153 output << " # Alias Columns: " << GetNLogicalColumns() - GetNPhysicalColumns() << "\n";
154 output << " # Pages: " << nPages << "\n";
155 output << " # Clusters: " << GetNClusters() << "\n";
156 output << " Size on storage: " << bytesOnStorage << " B"
157 << "\n";
158 output << " Compression rate: " << std::fixed << std::setprecision(2)
159 << float(bytesInMemory) / float(bytesOnStorage) << "\n";
160 output << " Header size: " << headerSize << " B"
161 << "\n";
162 output << " Footer size: " << footerSize << " B"
163 << "\n";
164 output << " Meta-data / data: " << std::fixed << std::setprecision(3)
165 << float(headerSize + footerSize) / float(bytesOnStorage) << "\n";
166 output << "------------------------------------------------------------\n";
167 output << "CLUSTER DETAILS\n";
168 output << "------------------------------------------------------------" << std::endl;
169
170 std::sort(clusters.begin(), clusters.end());
171 for (unsigned int i = 0; i < clusters.size(); ++i) {
172 output << " # " << std::setw(5) << i << " Entry range: [" << clusters[i].fFirstEntry << ".."
173 << clusters[i].fFirstEntry + clusters[i].fNEntries - 1 << "] -- " << clusters[i].fNEntries << "\n";
174 output << " "
175 << " # Pages: " << clusters[i].fNPages << "\n";
176 output << " "
177 << " Size on storage: " << clusters[i].fBytesOnStorage << " B\n";
178 output << " "
179 << " Compression: " << std::fixed << std::setprecision(2)
180 << float(clusters[i].fBytesInMemory) / float(float(clusters[i].fBytesOnStorage)) << std::endl;
181 }
182
183 output << "------------------------------------------------------------\n";
184 output << "COLUMN DETAILS\n";
185 output << "------------------------------------------------------------\n";
186 for (auto &col : columns) {
187 col.fFieldName = GetFieldName(col.fFieldId, *this).substr(1);
188 col.fFieldDescription = GetFieldDescription(col.fFieldId, *this);
189 }
190 std::sort(columns.begin(), columns.end());
191 for (const auto &col : columns) {
192 auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages);
193 auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
194 std::string nameAndType = std::string(" ") + col.fFieldName + " [#" + std::to_string(col.fColumnIndex);
195 if (col.fRepresentationIndex > 0)
196 nameAndType += " / R." + std::to_string(col.fRepresentationIndex);
197 nameAndType += "] -- " + Internal::RColumnElementBase::GetTypeName(col.fType);
198 std::string id = std::string("{id:") + std::to_string(col.fLogicalColumnId) + "}";
199 if (col.fLogicalColumnId != col.fPhysicalColumnId)
200 id += " --alias--> " + std::to_string(col.fPhysicalColumnId);
201 output << nameAndType << std::setw(60 - nameAndType.length()) << id << "\n";
202 if (!col.fFieldDescription.empty())
203 output << " Description: " << col.fFieldDescription << "\n";
204 output << " # Elements: " << col.fNElements << "\n";
205 output << " # Pages: " << col.fNPages << "\n";
206 output << " Avg elements / page: " << avgElementsPerPage << "\n";
207 output << " Avg page size: " << avgPageSize << " B\n";
208 output << " Size on storage: " << col.fBytesOnStorage << " B\n";
209 output << " Compression: " << std::fixed << std::setprecision(2)
210 << float(col.fElementSize * col.fNElements) / float(col.fBytesOnStorage) << "\n";
211 output << "............................................................" << std::endl;
212 }
213}
Bool_t operator<(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:106
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:102
The available trivial, native content types of a column.
static std::string GetTypeName(EColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
If CppT == void, use the default C++ type for the given column type.
const std::string & GetFieldName() const
const std::string & GetFieldDescription() const
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
void PrintInfo(std::ostream &output) const
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
static void output()