33 std::unique_ptr<ROOT::Experimental::Internal::RPageSource> pageSource)
34 : fPageSource(std::move(pageSource))
37 auto descriptorGuard =
fPageSource->GetSharedDescriptorGuard();
47 fUncompressedSize = 0;
49 for (
const auto &colDesc : fDescriptor->GetColumnIterable()) {
50 if (colDesc.IsAliasColumn())
53 auto colId = colDesc.GetPhysicalId();
57 auto colType = colDesc.GetModel().GetType();
59 std::uint64_t nElems = 0;
60 std::vector<std::uint64_t> compressedPageSizes{};
62 for (
const auto &clusterDescriptor : fDescriptor->GetClusterIterable()) {
63 if (!clusterDescriptor.ContainsColumn(colId)) {
67 auto columnRange = clusterDescriptor.GetColumnRange(colId);
68 nElems += columnRange.fNElements;
70 if (fCompressionSettings == -1) {
71 fCompressionSettings = columnRange.fCompressionSettings;
72 }
else if (fCompressionSettings != columnRange.fCompressionSettings &&
78 std::to_string(fCompressionSettings) +
" vs " +
79 std::to_string(columnRange.fCompressionSettings) +
80 ") for column with physical ID " + std::to_string(colId)));
83 const auto &pageRange = clusterDescriptor.GetPageRange(colId);
85 for (
const auto &page : pageRange.fPageInfos) {
86 compressedPageSizes.emplace_back(page.fLocator.fBytesOnStorage);
87 fUncompressedSize += page.fNElements * elemSize;
91 fCompressedSize += std::accumulate(compressedPageSizes.begin(), compressedPageSizes.end(), 0);
92 fColumnInfo.emplace(colId,
RColumnInspector(colDesc, compressedPageSizes, elemSize, nElems));
99 std::uint64_t compressedSize = 0;
100 std::uint64_t uncompressedSize = 0;
102 for (
const auto &colDescriptor : fDescriptor->GetColumnIterable(fieldId)) {
103 auto colInfo = GetColumnInspector(colDescriptor.GetPhysicalId());
104 compressedSize += colInfo.GetCompressedSize();
105 uncompressedSize += colInfo.GetUncompressedSize();
108 for (
const auto &subFieldDescriptor : fDescriptor->GetFieldIterable(fieldId)) {
111 auto subFieldInfo = CollectFieldTreeInfo(subFieldId);
113 compressedSize += subFieldInfo.GetCompressedSize();
114 uncompressedSize += subFieldInfo.GetUncompressedSize();
117 auto fieldInfo =
RFieldTreeInspector(fDescriptor->GetFieldDescriptor(fieldId), compressedSize, uncompressedSize);
118 fFieldTreeInfo.emplace(fieldId, fieldInfo);
122std::vector<ROOT::Experimental::DescriptorId_t>
125 std::vector<DescriptorId_t> colIds;
126 std::deque<DescriptorId_t> fieldIdQueue{fieldId};
128 while (!fieldIdQueue.empty()) {
129 auto currId = fieldIdQueue.front();
130 fieldIdQueue.pop_front();
132 for (
const auto &col : fDescriptor->GetColumnIterable(currId)) {
133 if (col.IsAliasColumn()) {
137 colIds.emplace_back(col.GetPhysicalId());
140 for (
const auto &fld : fDescriptor->GetFieldIterable(currId)) {
141 fieldIdQueue.push_back(fld.GetId());
148std::unique_ptr<ROOT::Experimental::RNTupleInspector>
156 return std::unique_ptr<RNTupleInspector>(
new RNTupleInspector(std::move(pageSource)));
159std::unique_ptr<ROOT::Experimental::RNTupleInspector>
163 return std::unique_ptr<RNTupleInspector>(
new RNTupleInspector(std::move(pageSource)));
168 int algorithm = fCompressionSettings / 100;
169 int level = fCompressionSettings - (algorithm * 100);
172 " (level " + std::to_string(level) +
")";
180 if (physicalColumnId > fDescriptor->GetNPhysicalColumns()) {
181 throw RException(
R__FAIL(
"No column with physical ID " + std::to_string(physicalColumnId) +
" present"));
184 return fColumnInfo.at(physicalColumnId);
189 size_t typeCount = 0;
191 for (
auto &[colId, colInfo] : fColumnInfo) {
192 if (colInfo.GetType() == colType) {
200const std::vector<ROOT::Experimental::DescriptorId_t>
203 std::vector<DescriptorId_t> colIds;
205 for (
const auto &[colId, colInfo] : fColumnInfo) {
206 if (colInfo.GetType() == colType)
207 colIds.emplace_back(colId);
215 std::set<EColumnType> colTypes;
217 for (
const auto &[colId, colInfo] : fColumnInfo) {
218 colTypes.emplace(colInfo.GetType());
221 return std::vector(colTypes.begin(), colTypes.end());
226 struct ColumnTypeInfo {
228 std::uint64_t nElems, compressedSize, uncompressedSize;
239 std::map<EColumnType, ColumnTypeInfo> colTypeInfo;
241 for (
const auto &[colId, colInfo] : fColumnInfo) {
242 colTypeInfo[colInfo.GetType()] += colInfo;
247 output <<
" column type | count | # elements | compressed bytes | uncompressed bytes\n"
248 <<
"----------------|---------|-----------------|-------------------|--------------------" << std::endl;
249 for (
const auto &[colType, typeInfo] : colTypeInfo) {
251 << typeInfo.count <<
" |" << std::setw(16) << typeInfo.nElems <<
" |" << std::setw(18)
252 << typeInfo.compressedSize <<
" |" << std::setw(18) << typeInfo.uncompressedSize <<
" " << std::endl;
256 output <<
"columnType,count,nElements,compressedSize,uncompressedSize" << std::endl;
257 for (
const auto &[colType, typeInfo] : colTypeInfo) {
259 <<
"," << typeInfo.compressedSize <<
"," << typeInfo.uncompressedSize << std::endl;
268 std::string_view histName, std::string_view histTitle)
270 if (histName.empty()) {
280 if (histTitle.empty()) {
290 auto hist = std::make_unique<TH1D>(std::string(histName).c_str(), std::string(histTitle).c_str(), 1, 0, 1);
293 for (
const auto &[colId, colInfo] : fColumnInfo) {
310 std::string histName,
311 std::string histTitle,
size_t nBins)
313 if (histTitle.empty())
314 histTitle =
"Page size distribution for column with ID " + std::to_string(physicalColumnId);
316 return GetPageSizeDistribution({physicalColumnId}, histName, histTitle, nBins);
321 std::string histName, std::string histTitle,
size_t nBins)
323 if (histName.empty())
325 if (histTitle.empty())
328 auto perTypeHist = GetPageSizeDistribution({colType}, histName, histTitle, nBins);
330 if (perTypeHist->GetNhists() < 1)
331 return std::make_unique<TH1D>(histName.c_str(), histTitle.c_str(), 64, 0, 0);
333 auto hist = std::unique_ptr<TH1D>(
dynamic_cast<TH1D *
>(perTypeHist->GetHists()->First()));
335 hist->SetName(histName.c_str());
336 hist->SetTitle(histTitle.c_str());
337 hist->SetXTitle(
"Page size (B)");
338 hist->SetYTitle(
"N_{pages}");
344 std::string histName, std::string histTitle,
size_t nBins)
346 auto hist = std::make_unique<TH1D>();
348 if (histName.empty())
349 histName =
"pageSizeHist";
350 hist->SetName(histName.c_str());
351 if (histTitle.empty())
352 histTitle =
"Page size distribution";
353 hist->SetTitle(histTitle.c_str());
354 hist->SetXTitle(
"Page size (B)");
355 hist->SetYTitle(
"N_{pages}");
357 std::vector<std::uint64_t> pageSizes;
358 std::for_each(colIds.begin(), colIds.end(), [
this, &pageSizes](
const auto colId) {
359 auto colInfo = GetColumnInspector(colId);
360 pageSizes.insert(pageSizes.end(), colInfo.GetCompressedPageSizes().begin(),
361 colInfo.GetCompressedPageSizes().end());
364 auto histMinMax = std::minmax_element(pageSizes.begin(), pageSizes.end());
365 hist->SetBins(nBins, *histMinMax.first,
366 *histMinMax.second + ((*histMinMax.second - *histMinMax.first) /
static_cast<double>(nBins)));
368 for (
const auto pageSize : pageSizes) {
369 hist->Fill(pageSize);
376 std::initializer_list<ROOT::Experimental::EColumnType> colTypes, std::string histName, std::string histTitle,
379 if (histName.empty())
380 histName =
"pageSizeHist";
381 if (histTitle.empty())
382 histTitle =
"Per-column type page size distribution";
384 auto stackedHist = std::make_unique<THStack>(histName.c_str(), histTitle.c_str());
386 double histMin = std::numeric_limits<double>::max();
388 std::map<EColumnType, std::vector<std::uint64_t>> pageSizes;
390 std::vector<EColumnType> colTypeVec = colTypes;
391 if (std::empty(colTypes)) {
392 colTypeVec = GetColumnTypes();
395 for (
const auto colType : colTypeVec) {
396 auto colIds = GetColumnsByType(colType);
401 std::vector<std::uint64_t> pageSizesForColType;
402 std::for_each(colIds.cbegin(), colIds.cend(), [
this, &pageSizesForColType](
const auto colId) {
403 auto colInfo = GetColumnInspector(colId);
404 pageSizesForColType.insert(pageSizesForColType.end(), colInfo.GetCompressedPageSizes().begin(),
405 colInfo.GetCompressedPageSizes().end());
407 pageSizes.emplace(colType, pageSizesForColType);
409 auto histMinMax = std::minmax_element(pageSizesForColType.begin(), pageSizesForColType.end());
410 histMin = std::min(histMin,
static_cast<double>(*histMinMax.first));
411 histMax = std::max(histMax,
static_cast<double>(*histMinMax.second));
414 for (
const auto &[colType, pageSizesForColType] : pageSizes) {
415 auto hist = std::make_unique<TH1D>(
418 histMax + ((histMax - histMin) /
static_cast<double>(nBins)));
420 for (
const auto pageSize : pageSizesForColType) {
421 hist->Fill(pageSize);
424 stackedHist->Add(hist.release());
435 if (fieldId >= fDescriptor->GetNFields()) {
436 throw RException(
R__FAIL(
"No field with ID " + std::to_string(fieldId) +
" present"));
439 return fFieldTreeInfo.at(fieldId);
448 throw RException(
R__FAIL(
"Could not find field `" + std::string(fieldName) +
"`"));
451 return GetFieldTreeInspector(fieldId);
455 bool includeSubFields)
const
457 size_t typeCount = 0;
459 for (
auto &[fldId, fldInfo] : fFieldTreeInfo) {
460 if (!includeSubFields && fldInfo.GetDescriptor().GetParentId() != fDescriptor->GetFieldZeroId()) {
464 if (std::regex_match(fldInfo.GetDescriptor().GetTypeName(), typeNamePattern)) {
472const std::vector<ROOT::Experimental::DescriptorId_t>
475 std::vector<DescriptorId_t> fieldIds;
477 for (
auto &[fldId, fldInfo] : fFieldTreeInfo) {
479 if (!searchInSubFields && fldInfo.GetDescriptor().GetParentId() != fDescriptor->GetFieldZeroId()) {
483 if (std::regex_match(fldInfo.GetDescriptor().GetFieldName(), fieldNamePattern)) {
484 fieldIds.emplace_back(fldId);
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
std::string & operator+=(std::string &left, const TString &right)
The available trivial, native content types of a column.
static std::string GetTypeName(EColumnType type)
static std::unique_ptr< RColumnElementBase > Generate(EColumnType type)
If CppT == void, use the default C++ type for the given column type.
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options=RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
Base class for all ROOT issued exceptions.
Provides column-level storage information.
std::uint64_t GetCompressedSize() const
std::uint64_t GetUncompressedSize() const
std::uint64_t GetNElements() const
Provides field-level storage information.
Inspect on-disk and storage-related information of an RNTuple.
const RColumnInspector & GetColumnInspector(DescriptorId_t physicalColumnId) const
Get storage information for a given column.
const std::vector< DescriptorId_t > GetFieldsByName(const std::regex &fieldNamePattern, bool searchInSubFields=true) const
Get the IDs of (sub-)fields whose name matches the given string.
std::unique_ptr< Internal::RPageSource > fPageSource
const std::vector< EColumnType > GetColumnTypes()
Get all column types present in the RNTuple being inspected.
size_t GetColumnCountByType(EColumnType colType) const
Get the number of columns of a given type present in the RNTuple.
RNTupleInspector(std::unique_ptr< Internal::RPageSource > pageSource)
std::string GetCompressionSettingsAsString() const
Get a string describing compression settings of the RNTuple being inspected.
static std::unique_ptr< RNTupleInspector > Create(RNTuple *sourceNTuple)
Create a new RNTupleInspector.
std::unique_ptr< TH1D > GetPageSizeDistribution(DescriptorId_t physicalColumnId, std::string histName="", std::string histTitle="", size_t nBins=64)
Get a histogram containing the size distribution of the compressed pages for an individual column.
const std::vector< DescriptorId_t > GetColumnsByType(EColumnType colType)
Get the IDs of all columns with the given type.
void PrintColumnTypeInfo(ENTupleInspectorPrintFormat format=ENTupleInspectorPrintFormat::kTable, std::ostream &output=std::cout)
Print storage information per column type.
RFieldTreeInspector CollectFieldTreeInfo(DescriptorId_t fieldId)
Recursively gather field-level information.
std::unique_ptr< RNTupleDescriptor > fDescriptor
std::vector< DescriptorId_t > GetColumnsByFieldId(DescriptorId_t fieldId) const
Get the columns that make up the given field, including its subfields.
void CollectColumnInfo()
Gather column-level and RNTuple-level information.
size_t GetFieldCountByType(const std::regex &typeNamePattern, bool searchInSubFields=true) const
Get the number of fields of a given type or class present in the RNTuple.
const RFieldTreeInspector & GetFieldTreeInspector(DescriptorId_t fieldId) const
Get storage information for a given (sub)field by ID.
std::unique_ptr< TH1D > GetColumnTypeInfoAsHist(ENTupleInspectorHist histKind, std::string_view histName="", std::string_view histTitle="")
Get a histogram showing information for each column type present,.
Representation of an RNTuple data set in a ROOT file.
1-D histogram with a double per channel (see TH1 documentation)
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
ENTupleInspectorPrintFormat
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
static std::string AlgorithmToString(EAlgorithm::EValues algorithm)