23#include <unordered_map>
29 std::uint64_t fFirstEntry = 0;
30 std::uint32_t fNPages = 0;
31 std::uint32_t fNEntries = 0;
32 std::uint32_t fNBytesOnStorage = 0;
33 std::uint32_t fNBytesInMemory = 0;
35 bool operator==(
const ClusterInfo &other)
const {
return fFirstEntry == other.fFirstEntry; }
37 bool operator<(
const ClusterInfo &other)
const {
return fFirstEntry < other.fFirstEntry; }
44 std::uint64_t fNElements = 0;
45 std::uint64_t fNPages = 0;
46 std::uint64_t fNBytesOnStorage = 0;
47 std::uint32_t fElementSize = 0;
48 std::uint32_t fColumnIndex = 0;
49 std::uint16_t fRepresentationIndex = 0;
51 std::string fFieldName;
52 std::string fFieldDescription;
54 bool operator<(
const ColumnInfo &other)
const
56 if (fFieldName == other.fFieldName) {
57 if (fRepresentationIndex == other.fRepresentationIndex)
58 return fColumnIndex < other.fColumnIndex;
59 return fRepresentationIndex < other.fRepresentationIndex;
61 return fFieldName < other.fFieldName;
71 return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) +
"." + fieldDesc.GetFieldName();
85 std::vector<ColumnInfo> columns;
86 std::vector<ClusterInfo> clusters;
87 std::unordered_map<DescriptorId_t, unsigned int> cluster2Idx;
91 info.fFirstEntry = cluster.second.GetFirstEntryIndex();
92 info.fNEntries = cluster.second.GetNEntries();
93 cluster2Idx[cluster.first] = clusters.size();
94 clusters.emplace_back(info);
97 std::uint64_t nBytesOnStorage = 0;
98 std::uint64_t nBytesInMemory = 0;
99 std::uint64_t nPages = 0;
100 int compression = -1;
104 if (column.second.IsAliasColumn())
112 info.fPhysicalColumnId = column.second.GetPhysicalId();
113 info.fLogicalColumnId = column.second.GetLogicalId();
114 info.fFieldId = column.second.GetFieldId();
115 info.fColumnIndex = column.second.GetIndex();
116 info.fElementSize = elementSize;
117 info.fType = column.second.GetType();
118 info.fRepresentationIndex = column.second.GetRepresentationIndex();
121 auto columnRange = cluster.second.GetColumnRange(column.second.GetPhysicalId());
122 if (columnRange.fIsSuppressed)
125 info.fNElements += columnRange.fNElements;
126 if (compression == -1) {
127 compression = columnRange.fCompressionSettings;
129 const auto &pageRange = cluster.second.GetPageRange(column.second.GetPhysicalId());
130 auto idx = cluster2Idx[cluster.first];
131 for (
const auto &page : pageRange.fPageInfos) {
132 nBytesOnStorage += page.fLocator.GetNBytesOnStorage();
133 nBytesInMemory += page.fNElements * elementSize;
134 clusters[idx].fNBytesOnStorage += page.fLocator.GetNBytesOnStorage();
135 clusters[idx].fNBytesInMemory += page.fNElements * elementSize;
136 ++clusters[idx].fNPages;
137 info.fNBytesOnStorage += page.fLocator.GetNBytesOnStorage();
142 columns.emplace_back(info);
146 output <<
"============================================================\n";
148 output <<
"Compression: " << compression <<
"\n";
149 output <<
"------------------------------------------------------------\n";
154 output <<
" # Pages: " << nPages <<
"\n";
156 output <<
" Size on storage: " << nBytesOnStorage <<
" B" <<
"\n";
157 output <<
" Compression rate: " << std::fixed << std::setprecision(2)
158 << float(nBytesInMemory) / float(nBytesOnStorage) <<
"\n";
159 output <<
" Header size: " << headerSize <<
" B"
161 output <<
" Footer size: " << footerSize <<
" B"
163 output <<
" Meta-data / data: " << std::fixed << std::setprecision(3)
164 << float(headerSize + footerSize) / float(nBytesOnStorage) <<
"\n";
165 output <<
"------------------------------------------------------------\n";
166 output <<
"CLUSTER DETAILS\n";
167 output <<
"------------------------------------------------------------" << std::endl;
169 std::sort(clusters.begin(), clusters.end());
170 for (
unsigned int i = 0; i < clusters.size(); ++i) {
171 output <<
" # " << std::setw(5) << i <<
" Entry range: [" << clusters[i].fFirstEntry <<
".."
172 << clusters[i].fFirstEntry + clusters[i].fNEntries - 1 <<
"] -- " << clusters[i].fNEntries <<
"\n";
173 output <<
" " <<
" # Pages: " << clusters[i].fNPages <<
"\n";
174 output <<
" " <<
" Size on storage: " << clusters[i].fNBytesOnStorage <<
" B\n";
175 output <<
" " <<
" Compression: " << std::fixed << std::setprecision(2)
176 << float(clusters[i].fNBytesInMemory) / float(
float(clusters[i].fNBytesOnStorage)) << std::endl;
179 output <<
"------------------------------------------------------------\n";
180 output <<
"COLUMN DETAILS\n";
181 output <<
"------------------------------------------------------------\n";
182 for (
auto &col : columns) {
183 col.fFieldName = GetFieldName(col.fFieldId, *
this).substr(1);
184 col.fFieldDescription = GetFieldDescription(col.fFieldId, *
this);
186 std::sort(columns.begin(), columns.end());
187 for (
const auto &col : columns) {
188 auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fNBytesOnStorage / col.fNPages);
189 auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
190 std::string nameAndType = std::string(
" ") + col.fFieldName +
" [#" + std::to_string(col.fColumnIndex);
191 if (col.fRepresentationIndex > 0)
192 nameAndType +=
" / R." + std::to_string(col.fRepresentationIndex);
194 std::string
id = std::string(
"{id:") + std::to_string(col.fLogicalColumnId) +
"}";
195 if (col.fLogicalColumnId != col.fPhysicalColumnId)
196 id +=
" --alias--> " + std::to_string(col.fPhysicalColumnId);
197 output << nameAndType << std::setw(60 - nameAndType.length()) <<
id <<
"\n";
198 if (!col.fFieldDescription.empty())
199 output <<
" Description: " << col.fFieldDescription <<
"\n";
200 output <<
" # Elements: " << col.fNElements <<
"\n";
201 output <<
" # Pages: " << col.fNPages <<
"\n";
202 output <<
" Avg elements / page: " << avgElementsPerPage <<
"\n";
203 output <<
" Avg page size: " << avgPageSize <<
" B\n";
204 output <<
" Size on storage: " << col.fNBytesOnStorage <<
" B\n";
205 output <<
" Compression: " << std::fixed << std::setprecision(2)
206 << float(col.fElementSize * col.fNElements) / float(col.fNBytesOnStorage) <<
"\n";
207 output <<
"............................................................" << std::endl;
Bool_t operator<(const TDatime &d1, const TDatime &d2)
Bool_t operator==(const TDatime &d1, const TDatime &d2)
The available trivial, native content types of a column.
static const char * GetColumnTypeName(EColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
If CppT == void, use the default C++ type for the given column type.
const std::string & GetFieldName() const
const std::string & GetFieldDescription() const
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::size_t GetNLogicalColumns() const
const std::string & GetName() const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNClusters() const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
std::size_t GetNFields() const
std::uint64_t GetOnDiskFooterSize() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::size_t GetNPhysicalColumns() const
void PrintInfo(std::ostream &output) const
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId