Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptorFmt.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptorFmt.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-08-25
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
17#include <ROOT/RColumnModel.hxx>
19#include <ROOT/RNTupleUtil.hxx>
20
21#include <algorithm>
22#include <iomanip>
23#include <ostream>
24#include <unordered_map>
25#include <vector>
26
27namespace {
28
29struct ClusterInfo {
30 std::uint64_t fFirstEntry = 0;
31 std::uint32_t fNPages = 0;
32 std::uint32_t fNEntries = 0;
33 std::uint32_t fBytesOnStorage = 0;
34 std::uint32_t fBytesInMemory = 0;
35
36 bool operator ==(const ClusterInfo &other) const {
37 return fFirstEntry == other.fFirstEntry;
38 }
39
40 bool operator <(const ClusterInfo &other) const {
41 return fFirstEntry < other.fFirstEntry;
42 }
43};
44
45struct ColumnInfo {
48 std::uint64_t fLocalOrder = 0;
49 std::uint64_t fNElements = 0;
50 std::uint64_t fNPages = 0;
51 std::uint64_t fBytesOnStorage = 0;
52 std::uint32_t fElementSize = 0;
54 std::string fFieldName;
55
56 bool operator <(const ColumnInfo &other) const {
57 if (fFieldName == other.fFieldName)
58 return fLocalOrder < other.fLocalOrder;
59 return fFieldName < other.fFieldName;
60 }
61};
62
63static std::string GetFieldName(ROOT::Experimental::DescriptorId_t fieldId,
65{
66 const auto &fieldDesc = ntupleDesc.GetFieldDescriptor(fieldId);
67 if (fieldDesc.GetParentId() == ROOT::Experimental::kInvalidDescriptorId)
68 return fieldDesc.GetFieldName();
69 return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) + "." + fieldDesc.GetFieldName();
70}
71
72static std::string GetColumnTypeName(ROOT::Experimental::EColumnType type)
73{
74 switch (type) {
76 return "Bit";
78 return "Byte";
80 return "Int32";
82 return "Int64";
84 return "Real32";
86 return "Real64";
88 return "Index";
90 return "Switch";
91 default:
92 return "UNKNOWN";
93 }
94}
95
96} // anonymous namespace
97
99{
100 std::vector<ColumnInfo> columns;
101 std::vector<ClusterInfo> clusters;
102 std::unordered_map<DescriptorId_t, unsigned int> cluster2Idx;
103 for (const auto &cluster : fClusterDescriptors) {
104 ClusterInfo info;
105 info.fFirstEntry = cluster.second.GetFirstEntryIndex();
106 info.fNEntries = cluster.second.GetNEntries();
107 cluster2Idx[cluster.first] = clusters.size();
108 clusters.emplace_back(info);
109 }
110
111 std::uint64_t bytesOnStorage = 0;
112 std::uint64_t bytesInMemory = 0;
113 std::uint64_t nPages = 0;
114 int compression = -1;
115 for (const auto &column : fColumnDescriptors) {
116 // We generate the default memory representation for the given column type in order
117 // to report the size _in memory_ of column elements
118 auto elementSize = Detail::RColumnElementBase::Generate(column.second.GetModel().GetType())->GetSize();
119
120 ColumnInfo info;
121 info.fColumnId = column.second.GetId();
122 info.fFieldId = column.second.GetFieldId();
123 info.fLocalOrder = column.second.GetIndex();
124 info.fElementSize = elementSize;
125 info.fType = column.second.GetModel().GetType();
126
127 for (const auto &cluster : fClusterDescriptors) {
128 auto columnRange = cluster.second.GetColumnRange(column.first);
129 info.fNElements += columnRange.fNElements;
130 if (compression == -1) {
131 compression = columnRange.fCompressionSettings;
132 }
133 const auto &pageRange = cluster.second.GetPageRange(column.first);
134 auto idx = cluster2Idx[cluster.first];
135 for (const auto &page : pageRange.fPageInfos) {
136 bytesOnStorage += page.fLocator.fBytesOnStorage;
137 bytesInMemory += page.fNElements * elementSize;
138 clusters[idx].fBytesOnStorage += page.fLocator.fBytesOnStorage;
139 clusters[idx].fBytesInMemory += page.fNElements * elementSize;
140 ++clusters[idx].fNPages;
141 info.fBytesOnStorage += page.fLocator.fBytesOnStorage;
142 ++info.fNPages;
143 ++nPages;
144 }
145 }
146 columns.emplace_back(info);
147 }
148 auto headerSize = GetHeaderSize();
149 auto footerSize = GetFooterSize();
150 output << "============================================================" << std::endl;
151 output << "NTUPLE: " << GetName() << std::endl;
152 output << "Compression: " << compression << std::endl;
153 output << "------------------------------------------------------------" << std::endl;
154 output << " # Entries: " << GetNEntries() << std::endl;
155 output << " # Fields: " << GetNFields() << std::endl;
156 output << " # Columns: " << GetNColumns() << std::endl;
157 output << " # Pages: " << nPages << std::endl;
158 output << " # Clusters: " << GetNClusters() << std::endl;
159 output << " Size on storage: " << bytesOnStorage << " B" << std::endl;
160 output << " Compression rate: " << std::fixed << std::setprecision(2)
161 << float(bytesInMemory) / float(bytesOnStorage) << std::endl;
162 output << " Header size: " << headerSize << " B" << std::endl;
163 output << " Footer size: " << footerSize << " B" << std::endl;
164 output << " Meta-data / data: " << std::fixed << std::setprecision(3)
165 << float(headerSize + footerSize) / float(bytesOnStorage) << std::endl;
166 output << "------------------------------------------------------------" << std::endl;
167 output << "CLUSTER DETAILS" << std::endl;
168 output << "------------------------------------------------------------" << std::endl;
169
170 std::sort(clusters.begin(), clusters.end());
171 for (unsigned int i = 0; i < clusters.size(); ++i) {
172 output << " # " << std::setw(5) << i
173 << " Entry range: [" << clusters[i].fFirstEntry << ".."
174 << clusters[i].fFirstEntry + clusters[i].fNEntries - 1 << "] -- " << clusters[i].fNEntries << std::endl;
175 output << " "
176 << " # Pages: " << clusters[i].fNPages << std::endl;
177 output << " "
178 << " Size on storage: " << clusters[i].fBytesOnStorage << " B" << std::endl;
179 output << " "
180 << " Compression: " << std::fixed << std::setprecision(2)
181 << float(clusters[i].fBytesInMemory) / float(float(clusters[i].fBytesOnStorage)) << std::endl;
182 }
183
184 output << "------------------------------------------------------------" << std::endl;
185 output << "COLUMN DETAILS" << std::endl;
186 output << "------------------------------------------------------------" << std::endl;
187 for (auto &col : columns)
188 col.fFieldName = GetFieldName(col.fFieldId, *this).substr(1);
189 std::sort(columns.begin(), columns.end());
190 for (const auto &col : columns) {
191 auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages);
192 auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
193 std::string nameAndType = std::string(" ") + col.fFieldName + " [#" + std::to_string(col.fLocalOrder) + "]"
194 + " -- " + GetColumnTypeName(col.fType);
195 std::string id = std::string("{id:") + std::to_string(col.fColumnId) + "}";
196 output << nameAndType << std::setw(60 - nameAndType.length()) << id << std::endl;
197 output << " # Elements: " << col.fNElements << std::endl;
198 output << " # Pages: " << col.fNPages << std::endl;
199 output << " Avg elements / page: " << avgElementsPerPage << std::endl;
200 output << " Avg page size: " << avgPageSize << " B" << std::endl;
201 output << " Size on storage: " << col.fBytesOnStorage << " B" << std::endl;
202 output << " Compression: " << std::fixed << std::setprecision(2)
203 << float(col.fElementSize * col.fNElements) / float(col.fBytesOnStorage) << std::endl;
204 output << "............................................................" << std::endl;
205 }
206}
Bool_t operator<(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:106
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:102
int type
Definition TGX11.cxx:121
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
The available trivial, native content types of a column.
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
void PrintInfo(std::ostream &output) const
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
static void output(int code)
Definition gifencode.c:226