24#include <unordered_map>
30 std::uint64_t fFirstEntry = 0;
31 std::uint32_t fNPages = 0;
32 std::uint32_t fNEntries = 0;
33 std::uint32_t fBytesOnStorage = 0;
34 std::uint32_t fBytesInMemory = 0;
37 return fFirstEntry == other.fFirstEntry;
40 bool operator <(
const ClusterInfo &other)
const {
41 return fFirstEntry < other.fFirstEntry;
48 std::uint64_t fLocalOrder = 0;
49 std::uint64_t fNElements = 0;
50 std::uint64_t fNPages = 0;
51 std::uint64_t fBytesOnStorage = 0;
52 std::uint32_t fElementSize = 0;
54 std::string fFieldName;
55 std::string fFieldDescription;
57 bool operator <(
const ColumnInfo &other)
const {
58 if (fFieldName == other.fFieldName)
59 return fLocalOrder < other.fLocalOrder;
60 return fFieldName < other.fFieldName;
70 return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) +
"." + fieldDesc.GetFieldName();
84 std::vector<ColumnInfo> columns;
85 std::vector<ClusterInfo> clusters;
86 std::unordered_map<DescriptorId_t, unsigned int> cluster2Idx;
89 info.fFirstEntry = cluster.second.GetFirstEntryIndex();
90 info.fNEntries = cluster.second.GetNEntries();
91 cluster2Idx[cluster.first] = clusters.size();
92 clusters.emplace_back(info);
95 std::uint64_t bytesOnStorage = 0;
96 std::uint64_t bytesInMemory = 0;
97 std::uint64_t nPages = 0;
105 info.fColumnId = column.second.GetId();
106 info.fFieldId = column.second.GetFieldId();
107 info.fLocalOrder = column.second.GetIndex();
108 info.fElementSize = elementSize;
109 info.fType = column.second.GetModel().GetType();
112 auto columnRange = cluster.second.GetColumnRange(column.first);
113 info.fNElements += columnRange.fNElements;
114 if (compression == -1) {
115 compression = columnRange.fCompressionSettings;
117 const auto &pageRange = cluster.second.GetPageRange(column.first);
118 auto idx = cluster2Idx[cluster.first];
119 for (
const auto &page : pageRange.fPageInfos) {
120 bytesOnStorage += page.fLocator.fBytesOnStorage;
121 bytesInMemory += page.fNElements * elementSize;
122 clusters[idx].fBytesOnStorage += page.fLocator.fBytesOnStorage;
123 clusters[idx].fBytesInMemory += page.fNElements * elementSize;
124 ++clusters[idx].fNPages;
125 info.fBytesOnStorage += page.fLocator.fBytesOnStorage;
130 columns.emplace_back(info);
134 output <<
"============================================================" << std::endl;
136 output <<
"Compression: " << compression << std::endl;
137 output <<
"------------------------------------------------------------" << std::endl;
141 output <<
" # Pages: " << nPages << std::endl;
143 output <<
" Size on storage: " << bytesOnStorage <<
" B" << std::endl;
144 output <<
" Compression rate: " << std::fixed << std::setprecision(2)
145 << float(bytesInMemory) / float(bytesOnStorage) << std::endl;
146 output <<
" Header size: " << headerSize <<
" B" << std::endl;
147 output <<
" Footer size: " << footerSize <<
" B" << std::endl;
148 output <<
" Meta-data / data: " << std::fixed << std::setprecision(3)
149 << float(headerSize + footerSize) / float(bytesOnStorage) << std::endl;
150 output <<
"------------------------------------------------------------" << std::endl;
151 output <<
"CLUSTER DETAILS" << std::endl;
152 output <<
"------------------------------------------------------------" << std::endl;
154 std::sort(clusters.begin(), clusters.end());
155 for (
unsigned int i = 0; i < clusters.size(); ++i) {
156 output <<
" # " << std::setw(5) << i
157 <<
" Entry range: [" << clusters[i].fFirstEntry <<
".."
158 << clusters[i].fFirstEntry + clusters[i].fNEntries - 1 <<
"] -- " << clusters[i].fNEntries << std::endl;
160 <<
" # Pages: " << clusters[i].fNPages << std::endl;
162 <<
" Size on storage: " << clusters[i].fBytesOnStorage <<
" B" << std::endl;
164 <<
" Compression: " << std::fixed << std::setprecision(2)
165 << float(clusters[i].fBytesInMemory) / float(
float(clusters[i].fBytesOnStorage)) << std::endl;
168 output <<
"------------------------------------------------------------" << std::endl;
169 output <<
"COLUMN DETAILS" << std::endl;
170 output <<
"------------------------------------------------------------" << std::endl;
171 for (
auto &col : columns) {
172 col.fFieldName = GetFieldName(col.fFieldId, *
this).substr(1);
173 col.fFieldDescription = GetFieldDescription(col.fFieldId, *
this);
175 std::sort(columns.begin(), columns.end());
176 for (
const auto &col : columns) {
177 auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages);
178 auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
179 std::string nameAndType = std::string(
" ") + col.fFieldName +
" [#" + std::to_string(col.fLocalOrder) +
"]"
181 std::string
id = std::string(
"{id:") + std::to_string(col.fColumnId) +
"}";
182 output << nameAndType << std::setw(60 - nameAndType.length()) <<
id << std::endl;
183 if (!col.fFieldDescription.empty())
184 output <<
" Description: " << col.fFieldDescription << std::endl;
185 output <<
" # Elements: " << col.fNElements << std::endl;
186 output <<
" # Pages: " << col.fNPages << std::endl;
187 output <<
" Avg elements / page: " << avgElementsPerPage << std::endl;
188 output <<
" Avg page size: " << avgPageSize <<
" B" << std::endl;
189 output <<
" Size on storage: " << col.fBytesOnStorage <<
" B" << std::endl;
190 output <<
" Compression: " << std::fixed << std::setprecision(2)
191 << float(col.fElementSize * col.fNElements) / float(col.fBytesOnStorage) << std::endl;
192 output <<
"............................................................" << std::endl;
Bool_t operator<(const TDatime &d1, const TDatime &d2)
Bool_t operator==(const TDatime &d1, const TDatime &d2)
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
static std::string GetTypeName(EColumnType type)
The available trivial, native content types of a column.
std::string GetFieldName() const
std::string GetFieldDescription() const
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::string GetName() const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNClusters() const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
std::size_t GetNFields() const
std::uint64_t GetOnDiskFooterSize() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::size_t GetNColumns() const
void PrintInfo(std::ostream &output) const
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId