doc/hackathon/RNTupleMerger_8cxx_source.html

/// \file RNTupleMerger.cxx

/// \author Jakob Blomer <jblomer@cern.ch>, Max Orok <maxwellorok@gmail.com>, Alaettin Serhan Mete <amete@anl.gov>,

/// Giacomo Parolini <giacomo.parolini@cern.ch>

/// \date 2020-07-08

/// \warning This is part of the ROOT 7 prototype! It will

/// change without notice. It might trigger earthquakes. Feedback is welcome!


/*************************************************************************

 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers.               *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


#include <ROOT/RError.hxx>

#include <ROOT/RNTuple.hxx>

#include <ROOT/RNTupleDescriptor.hxx>

#include <ROOT/RNTupleMerger.hxx>

#include <ROOT/RNTupleModel.hxx>

#include <ROOT/RNTupleTypes.hxx>

#include <ROOT/RNTupleUtils.hxx>

#include <ROOT/RPageAllocator.hxx>

#include <ROOT/RPageStorageFile.hxx>

#include <ROOT/RPageStorage.hxx>

#include <ROOT/RClusterPool.hxx>

#include <ROOT/RNTupleSerialize.hxx>

#include <ROOT/RNTupleZip.hxx>

#include <ROOT/RColumnElementBase.hxx>

#include <TROOT.h>

#include <TFileMergeInfo.h>

#include <TFile.h>

#include <TKey.h>


#include <algorithm>

#include <deque>

#include <initializer_list>

#include <unordered_map>

#include <vector>


using ROOT::ENTupleColumnType;

using ROOT::RNTupleModel;

using ROOT::Internal::MakeUninitArray;

using ROOT::Internal::RCluster;

using ROOT::Internal::RColumnElementBase;

using ROOT::Internal::RNTupleSerializer;

using ROOT::Internal::RPageSink;

using ROOT::Internal::RPageSource;

using ROOT::Internal::RPageSourceFile;

using ROOT::Internal::RPageStorage;


using namespace ROOT::Experimental::Internal;


static ROOT::RLogChannel &NTupleMergeLog()

{

   static ROOT::RLogChannel sLog("ROOT.NTuple.Merge");

   return sLog;

}


// TFile options parsing

// -------------------------------------------------------------------------------------


static bool BeginsWithDelimitedWord(const TString &str, const char *word)

{

   const Ssiz_t wordLen = strlen(word);

   if (str.Length() < wordLen)

      return false;

   if (!str.BeginsWith(word, TString::ECaseCompare::kIgnoreCase))

      return false;

   return str.Length() == wordLen || str(wordLen) == ' ';

}


template <typename T>


static std::optional<T> ParseStringOption(const TString &opts, const char *pattern,

                                          std::initializer_list<std::pair<const char *, T>> validValues)

{

   const Ssiz_t patternLen = strlen(pattern);

   assert(pattern[patternLen - 1] == '='); // we want to parse options with the format `option=Value`

   if (auto idx = opts.Index(pattern, 0, TString::ECaseCompare::kIgnoreCase);

       idx >= 0 && opts.Length() > idx + patternLen) {

      auto sub = TString(opts(idx + patternLen, opts.Length() - idx - patternLen));

      for (const auto &[name, value] : validValues) {

         if (BeginsWithDelimitedWord(sub, name)) {

            return value;

         }

      }

   }

   return std::nullopt;

}


static std::optional<ENTupleMergingMode> ParseOptionMergingMode(const TString &opts)

{

   return ParseStringOption<ENTupleMergingMode>(opts, "rntuple.MergingMode=",

                                                {

                                                   {"Filter", ENTupleMergingMode::kFilter},

                                                   {"Union", ENTupleMergingMode::kUnion},

                                                   {"Strict", ENTupleMergingMode::kStrict},

                                                });

}


static std::optional<ENTupleMergeErrBehavior> ParseOptionErrBehavior(const TString &opts)

{

   return ParseStringOption<ENTupleMergeErrBehavior>(opts, "rntuple.ErrBehavior=",

                                                     {

                                                        {"Abort", ENTupleMergeErrBehavior::kAbort},

                                                        {"Skip", ENTupleMergeErrBehavior::kSkip},

                                                     });

}


static std::optional<ENTupleMergeVersionBehavior> ParseOptionVersionBehavior(const TString &opts)

{

   return ParseStringOption<ENTupleMergeVersionBehavior>(

      opts, "rntuple.VersionBehavior=",

      {

         {"WarnOnHigherVersion", ENTupleMergeVersionBehavior::kWarnOnHigherVersion},

         {"AbortOnHigherVersion", ENTupleMergeVersionBehavior::kAbortOnHigherVersion},

      });

}


// -------------------------------------------------------------------------------------


// Entry point for TFileMerger. Internally calls RNTupleMerger::Merge().


Long64_t ROOT::RNTuple::Merge(TCollection *inputs, TFileMergeInfo *mergeInfo)

// IMPORTANT: this function must not throw, as it is used in exception-unsafe code (TFileMerger).

try {

   // Check the inputs

   if (!inputs || inputs->GetEntries() < 3 || !mergeInfo) {

      R__LOG_ERROR(NTupleMergeLog()) << "Invalid inputs.";

      return -1;

   }


   // Parse the input parameters

   TIter itr(inputs);


   // First entry is the RNTuple name

   std::string ntupleName = std::string(itr()->GetName());


   // Second entry is the output file

   TObject *secondArg = itr();

   TFile *outFile = dynamic_cast<TFile *>(secondArg);

   if (!outFile) {

      R__LOG_ERROR(NTupleMergeLog()) << "Second input parameter should be a TFile, but it's a "

                                     << secondArg->ClassName() << ".";

      return -1;

   }


   // Check if the output file already has a key with that name

   TKey *outKey = outFile->FindKey(ntupleName.c_str());

   ROOT::RNTuple *outNTuple = nullptr;

   if (outKey) {

      outNTuple = outKey->ReadObject<ROOT::RNTuple>();

      if (!outNTuple) {

         R__LOG_ERROR(NTupleMergeLog()) << "Output file already has key, but not of type RNTuple!";

         return -1;

      }

      // In principle, we should already be working on the RNTuple object from the output file, but just continue with

      // pointer we just got.

   }


   const bool defaultComp = mergeInfo->fOptions.Contains("DefaultCompression");

   const bool firstSrcComp = mergeInfo->fOptions.Contains("FirstSrcCompression");

   const bool extraVerbose = mergeInfo->fOptions.Contains("rntuple.ExtraVerbose");

   if (defaultComp && firstSrcComp) {

      // this should never happen through hadd, but a user may call RNTuple::Merge() from custom code.

      R__LOG_WARNING(NTupleMergeLog()) << "Passed both options \"DefaultCompression\" and \"FirstSrcCompression\": "

                                          "only the latter will apply.";

   }

   std::optional<std::uint32_t> compression;

   if (firstSrcComp) {

      // user passed -ff or -fk: use the same compression as the first RNTuple we find in the sources.

      // (do nothing here, the compression will be fetched below)

   } else if (!defaultComp) {

      // compression was explicitly passed by the user: use it.

      compression = outFile->GetCompressionSettings();

   } else {

      // user passed no compression-related options: use default

      compression = RCompressionSetting::EDefaults::kUseGeneralPurpose;

      R__LOG_INFO(NTupleMergeLog()) << "Using the default compression: " << *compression;

   }


   // The remaining entries are the input files

   std::vector<std::unique_ptr<RPageSourceFile>> sources;

   std::vector<RPageSource *> sourcePtrs;


   while (const auto &pitr = itr()) {

      TFile *inFile = dynamic_cast<TFile *>(pitr);

      ROOT::RNTuple *anchor = inFile ? inFile->Get<ROOT::RNTuple>(ntupleName.c_str()) : nullptr;

      if (!anchor) {

         R__LOG_INFO(NTupleMergeLog()) << "No RNTuple anchor named '" << ntupleName << "' from file '"

                                       << inFile->GetName() << "'";

         continue;

      }


      auto source = RPageSourceFile::CreateFromAnchor(*anchor);

      if (!compression) {

         // Get the compression of this RNTuple and use it as the output compression.

         // We currently assume all column ranges have the same compression, so we just peek at the first one.

         source->Attach(RNTupleSerializer::EDescriptorDeserializeMode::kRaw);

         auto descriptor = source->GetSharedDescriptorGuard();

         auto clusterIter = descriptor->GetClusterIterable();

         auto firstCluster = clusterIter.begin();

         if (firstCluster == clusterIter.end()) {

            R__LOG_ERROR(NTupleMergeLog())

               << "Asked to use the first source's compression as the output compression, but the "

                  "first source (file '"

               << inFile->GetName()

               << "') has an empty RNTuple, therefore the output compression could not be "

                  "determined.";

            return -1;

         }

         auto colRangeIter = (*firstCluster).GetColumnRangeIterable();

         auto firstColRange = colRangeIter.begin();

         if (firstColRange == colRangeIter.end()) {

            R__LOG_ERROR(NTupleMergeLog())

               << "Asked to use the first source's compression as the output compression, but the "

                  "first source (file '"

               << inFile->GetName()

               << "') has an empty RNTuple, therefore the output compression could not be "

                  "determined.";

            return -1;

         }

         compression = (*firstColRange).GetCompressionSettings().value();

         R__LOG_INFO(NTupleMergeLog()) << "Using the first RNTuple's compression: " << *compression;

      }

      sources.push_back(std::move(source));

   }


   RNTupleWriteOptions writeOpts;

   assert(compression);

   writeOpts.SetCompression(*compression);

   auto destination = std::make_unique<ROOT::Internal::RPageSinkFile>(ntupleName, *outFile, writeOpts);

   std::unique_ptr<ROOT::RNTupleModel> model;

   // If we already have an existing RNTuple, copy over its descriptor to support incremental merging

   if (outNTuple) {

      auto outSource = RPageSourceFile::CreateFromAnchor(*outNTuple);

      outSource->Attach(RNTupleSerializer::EDescriptorDeserializeMode::kForWriting);

      auto desc = outSource->GetSharedDescriptorGuard();

      model = destination->InitFromDescriptor(desc.GetRef(), true /* copyClusters */);

   }


   // Interface conversion

   sourcePtrs.reserve(sources.size());

   for (const auto &s : sources) {

      sourcePtrs.push_back(s.get());

   }


   // Now merge

   RNTupleMerger merger{std::move(destination), std::move(model)};

   RNTupleMergeOptions mergerOpts;

   mergerOpts.fCompressionSettings = *compression;

   mergerOpts.fExtraVerbose = extraVerbose;

   if (auto mergingMode = ParseOptionMergingMode(mergeInfo->fOptions)) {

      mergerOpts.fMergingMode = *mergingMode;

   }

   if (auto errBehavior = ParseOptionErrBehavior(mergeInfo->fOptions)) {

      mergerOpts.fErrBehavior = *errBehavior;

   }

   if (auto versionBehavior = ParseOptionVersionBehavior(mergeInfo->fOptions)) {

      mergerOpts.fVersionBehavior = *versionBehavior;

   }

   merger.Merge(sourcePtrs, mergerOpts).ThrowOnError();


   // Provide the caller with a merged anchor object (even though we've already

   // written it).

   *this = *outFile->Get<ROOT::RNTuple>(ntupleName.c_str());


   return 0;

} catch (const std::exception &ex) {

   R__LOG_ERROR(NTupleMergeLog()) << "Exception thrown while merging: " << ex.what();

   return -1;

}


namespace {

// Functor used to change the compression of a page to `options.fCompressionSettings`.

struct RChangeCompressionFunc {

   const RColumnElementBase &fSrcColElement;

   const RColumnElementBase &fDstColElement;

   const RNTupleMergeOptions &fMergeOptions;


   RPageStorage::RSealedPage &fSealedPage;

   ROOT::Internal::RPageAllocator &fPageAlloc;

   std::byte *fBuffer;

   std::size_t fBufSize;

   const ROOT::RNTupleWriteOptions &fWriteOpts;


   void operator()() const

   {

      assert(fSrcColElement.GetIdentifier() == fDstColElement.GetIdentifier());


      fSealedPage.VerifyChecksumIfEnabled().ThrowOnError();


      const auto bytesPacked = fSrcColElement.GetPackedSize(fSealedPage.GetNElements());

      const auto compression = fMergeOptions.fCompressionSettings.value_or(0);

      // TODO: this buffer could be kept and reused across pages

      std::unique_ptr<std::byte[]> unzipBufOwned;

      std::byte *unzipBuf;

      if (compression != 0) {

         unzipBufOwned = MakeUninitArray<std::byte>(bytesPacked);

         unzipBuf = unzipBufOwned.get();

      } else {

         unzipBuf = fBuffer;

      }

      ROOT::Internal::RNTupleDecompressor::Unzip(fSealedPage.GetBuffer(), fSealedPage.GetDataSize(), bytesPacked,

                                                 unzipBuf);


      const auto checksumSize = fWriteOpts.GetEnablePageChecksums() * sizeof(std::uint64_t);

      std::size_t nBytesZipped;

      if (compression != 0) {

         assert(fBuffer != unzipBuf);

         assert(fBufSize >= bytesPacked + checksumSize);

         nBytesZipped = ROOT::Internal::RNTupleCompressor::Zip(unzipBuf, bytesPacked, compression, fBuffer);

      } else {

         nBytesZipped = bytesPacked;

      }


      fSealedPage = {fBuffer, nBytesZipped + checksumSize, fSealedPage.GetNElements(), fSealedPage.GetHasChecksum()};

      fSealedPage.ChecksumIfEnabled();

   }

};


struct RResealFunc {

   const RColumnElementBase &fSrcColElement;

   const RColumnElementBase &fDstColElement;

   const RNTupleMergeOptions &fMergeOptions;


   RPageStorage::RSealedPage &fSealedPage;

   ROOT::Internal::RPageAllocator &fPageAlloc;

   std::byte *fBuffer;

   std::size_t fBufSize;

   const ROOT::RNTupleWriteOptions &fWriteOpts;


   void operator()() const

   {

      auto page = RPageSource::UnsealPage(fSealedPage, fSrcColElement, fPageAlloc).Unwrap();

      RPageSink::RSealPageConfig sealConf;

      sealConf.fElement = &fDstColElement;

      sealConf.fPage = &page;

      sealConf.fBuffer = fBuffer;

      sealConf.fCompressionSettings = *fMergeOptions.fCompressionSettings;

      sealConf.fWriteChecksum = fWriteOpts.GetEnablePageChecksums();

      assert(fBufSize >= fSealedPage.GetDataSize() + fSealedPage.GetHasChecksum() * sizeof(std::uint64_t));

      auto refSealedPage = RPageSink::SealPage(sealConf);

      fSealedPage = refSealedPage;

   }

};


struct RTaskVisitor {

   std::optional<ROOT::Experimental::TTaskGroup> &fGroup;


   template <typename T>

   void operator()(T &&f)

   {

      if (fGroup)

         fGroup->Run(f);

      else

         f();

   }

};


struct RCommonField {

   const ROOT::RFieldDescriptor *fSrc;

   const ROOT::RFieldDescriptor *fDst;


   RCommonField(const ROOT::RFieldDescriptor &src, const ROOT::RFieldDescriptor &dst) : fSrc(&src), fDst(&dst) {}

};


struct RDescriptorsComparison {

   std::vector<const ROOT::RFieldDescriptor *> fExtraDstFields;

   std::vector<const ROOT::RFieldDescriptor *> fExtraSrcFields;

   std::vector<RCommonField> fCommonFields;

};


struct RColumnOutInfo {

   ROOT::DescriptorId_t fColumnId;

   ENTupleColumnType fColumnType;

};


// { fully.qualified.fieldName.colInputId => colOutputInfo }

using ColumnIdMap_t = std::unordered_map<std::string, RColumnOutInfo>;


struct RColumnInfoGroup {

   std::vector<RColumnMergeInfo> fExtraDstColumns;

   // These are sorted by InputId

   std::vector<RColumnMergeInfo> fCommonColumns;

};


} // namespace


// These structs cannot be in the anon namespace becase they're used in RNTupleMerger's private interface.

namespace ROOT::Experimental::Internal {


struct RColumnMergeInfo {

   // This column name is built as a dot-separated concatenation of the ancestry of

   // the columns' parent fields' names plus the index of the column itself.

   // e.g. "Muon.pt.x._0"

   std::string fColumnName;

   // The column id in the source RNTuple

   ROOT::DescriptorId_t fInputId;

   // The corresponding column id in the destination RNTuple (the mapping happens in AddColumnsFromField())

   ROOT::DescriptorId_t fOutputId;

   ENTupleColumnType fColumnType;

   // If nullopt, use the default in-memory type

   std::optional<std::type_index> fInMemoryType;

   const ROOT::RFieldDescriptor *fParentFieldDescriptor;

   const ROOT::RNTupleDescriptor *fParentNTupleDescriptor;

};


// Data related to a single call of RNTupleMerger::Merge()


struct RNTupleMergeData {

   std::span<RPageSource *> fSources;

   RPageSink &fDestination;

   const RNTupleMergeOptions &fMergeOpts;

   const ROOT::RNTupleDescriptor &fDstDescriptor;

   const ROOT::RNTupleDescriptor *fSrcDescriptor = nullptr;


   std::vector<RColumnMergeInfo> fColumns;

   ColumnIdMap_t fColumnIdMap;


   ROOT::NTupleSize_t fNumDstEntries = 0;


   RNTupleMergeData(std::span<RPageSource *> sources, RPageSink &destination, const RNTupleMergeOptions &mergeOpts)

      : fSources{sources}, fDestination{destination}, fMergeOpts{mergeOpts}, fDstDescriptor{destination.GetDescriptor()}

   {

   }


};


struct RSealedPageMergeData {

   // We use a std::deque so that references to the contained SealedPageSequence_t, and its iterators, are

   // never invalidated.

   std::deque<RPageStorage::SealedPageSequence_t> fPagesV;

   std::vector<RPageStorage::RSealedPageGroup> fGroups;

   std::vector<std::unique_ptr<std::byte[]>> fBuffers;

};


std::ostream &operator<<(std::ostream &os, const std::optional<ROOT::RColumnDescriptor::RValueRange> &x)

{

   if (x) {

      os << '(' << x->fMin << ", " << x->fMax << ')';

   } else {

      os << "(null)";

   }

   return os;

}


} // namespace ROOT::Experimental::Internal


static bool IsSplitOrUnsplitVersionOf(ENTupleColumnType a, ENTupleColumnType b)

{

   // clang-format off

   if (a == ENTupleColumnType::kInt16 && b == ENTupleColumnType::kSplitInt16) return true;

   if (a == ENTupleColumnType::kSplitInt16 && b == ENTupleColumnType::kInt16) return true;

   if (a == ENTupleColumnType::kInt32 && b == ENTupleColumnType::kSplitInt32) return true;

   if (a == ENTupleColumnType::kSplitInt32 && b == ENTupleColumnType::kInt32) return true;

   if (a == ENTupleColumnType::kInt64 && b == ENTupleColumnType::kSplitInt64) return true;

   if (a == ENTupleColumnType::kSplitInt64 && b == ENTupleColumnType::kInt64) return true;

   if (a == ENTupleColumnType::kUInt16 && b == ENTupleColumnType::kSplitUInt16) return true;

   if (a == ENTupleColumnType::kSplitUInt16 && b == ENTupleColumnType::kUInt16) return true;

   if (a == ENTupleColumnType::kUInt32 && b == ENTupleColumnType::kSplitUInt32) return true;

   if (a == ENTupleColumnType::kSplitUInt32 && b == ENTupleColumnType::kUInt32) return true;

   if (a == ENTupleColumnType::kUInt64 && b == ENTupleColumnType::kSplitUInt64) return true;

   if (a == ENTupleColumnType::kSplitUInt64 && b == ENTupleColumnType::kUInt64) return true;

   if (a == ENTupleColumnType::kIndex32 && b == ENTupleColumnType::kSplitIndex32) return true;

   if (a == ENTupleColumnType::kSplitIndex32 && b == ENTupleColumnType::kIndex32) return true;

   if (a == ENTupleColumnType::kIndex64 && b == ENTupleColumnType::kSplitIndex64) return true;

   if (a == ENTupleColumnType::kSplitIndex64 && b == ENTupleColumnType::kIndex64) return true;

   if (a == ENTupleColumnType::kReal32 && b == ENTupleColumnType::kSplitReal32) return true;

   if (a == ENTupleColumnType::kSplitReal32 && b == ENTupleColumnType::kReal32) return true;

   if (a == ENTupleColumnType::kReal64 && b == ENTupleColumnType::kSplitReal64) return true;

   if (a == ENTupleColumnType::kSplitReal64 && b == ENTupleColumnType::kReal64) return true;

   // clang-format on

   return false;

}


/// Compares the top level fields of `dst` and `src` and determines whether they can be merged or not.

/// In addition, returns the differences between `dst` and `src`'s structures

static ROOT::RResult<RDescriptorsComparison>


CompareDescriptorStructure(const ROOT::RNTupleDescriptor &dst, const ROOT::RNTupleDescriptor &src)

{

   // Cases:

   // 1. dst == src

   // 2. dst has fields that src hasn't

   // 3. src has fields that dst hasn't

   // 4. dst and src have fields that differ (compatible or incompatible)


   std::vector<std::string> errors;

   RDescriptorsComparison res;


   std::vector<RCommonField> commonFields;


   for (const auto &dstField : dst.GetTopLevelFields()) {

      const auto srcFieldId = src.FindFieldId(dstField.GetFieldName());

      if (srcFieldId != ROOT::kInvalidDescriptorId) {

         const auto &srcField = src.GetFieldDescriptor(srcFieldId);

         commonFields.push_back({srcField, dstField});

      } else {

         res.fExtraDstFields.emplace_back(&dstField);

      }

   }

   for (const auto &srcField : src.GetTopLevelFields()) {

      const auto dstFieldId = dst.FindFieldId(srcField.GetFieldName());

      if (dstFieldId == ROOT::kInvalidDescriptorId)

         res.fExtraSrcFields.push_back(&srcField);

   }


   // Check compatibility of common fields

   auto fieldsToCheck = commonFields;

   // NOTE: using index-based for loop because the collection may get extended by the iteration

   for (std::size_t fieldIdx = 0; fieldIdx < fieldsToCheck.size(); ++fieldIdx) {

      const auto &field = fieldsToCheck[fieldIdx];


      // NOTE: field.fSrc and field.fDst have the same name by construction

      const auto &fieldName = field.fSrc->GetFieldName();


      // Require that fields are both projected or both not projected

      bool projCompatible = field.fSrc->IsProjectedField() == field.fDst->IsProjectedField();

      if (!projCompatible) {

         std::stringstream ss;

         ss << "Field `" << fieldName << "` is incompatible with previously-seen field with that name because the "

            << (field.fSrc->IsProjectedField() ? "new" : "old") << " one is projected and the other isn't";

         errors.push_back(ss.str());

      } else if (field.fSrc->IsProjectedField()) {

         // if both fields are projected, verify that they point to the same real field

         const auto srcName = src.GetQualifiedFieldName(field.fSrc->GetProjectionSourceId());

         const auto dstName = dst.GetQualifiedFieldName(field.fDst->GetProjectionSourceId());

         if (srcName != dstName) {

            std::stringstream ss;

            ss << "Field `" << fieldName

               << "` is projected to a different field than a previously-seen field with the same name (old: "

               << dstName << ", new: " << srcName << ")";

            errors.push_back(ss.str());

         }

      }


      // Require that fields types match

      // TODO(gparolini): allow non-identical but compatible types

      const auto &srcTyName = field.fSrc->GetTypeName();

      const auto &dstTyName = field.fDst->GetTypeName();

      if (srcTyName != dstTyName) {

         std::stringstream ss;

         ss << "Field `" << fieldName

            << "` has a type incompatible with a previously-seen field with the same name: (old: " << dstTyName

            << ", new: " << srcTyName << ")";

         errors.push_back(ss.str());

      }


      // Require that type checksums match

      const auto srcTyChk = field.fSrc->GetTypeChecksum();

      const auto dstTyChk = field.fDst->GetTypeChecksum();

      if (srcTyChk && dstTyChk && *srcTyChk != *dstTyChk) {

         std::stringstream ss;

         ss << "Field `" << field.fSrc->GetFieldName()

            << "` has a different type checksum than previously-seen field with the same name";

         errors.push_back(ss.str());

      }


      // Require that type versions match

      const auto srcTyVer = field.fSrc->GetTypeVersion();

      const auto dstTyVer = field.fDst->GetTypeVersion();

      if (srcTyVer != dstTyVer) {

         std::stringstream ss;

         ss << "Field `" << field.fSrc->GetFieldName()

            << "` has a different type version than previously-seen field with the same name (old: " << dstTyVer

            << ", new: " << srcTyVer << ")";

         errors.push_back(ss.str());

      }


      // Require that field versions match

      const auto srcFldVer = field.fSrc->GetFieldVersion();

      const auto dstFldVer = field.fDst->GetFieldVersion();

      if (srcFldVer != dstFldVer) {

         std::stringstream ss;

         ss << "Field `" << field.fSrc->GetFieldName()

            << "` has a different field version than previously-seen field with the same name (old: " << dstFldVer

            << ", new: " << srcFldVer << ")";

         errors.push_back(ss.str());

      }


      const auto srcRole = field.fSrc->GetStructure();

      const auto dstRole = field.fDst->GetStructure();

      if (srcRole != dstRole) {

         std::stringstream ss;

         ss << "Field `" << field.fSrc->GetFieldName()

            << "` has a different structural role than previously-seen field with the same name (old: " << dstRole

            << ", new: " << srcRole << ")";

         errors.push_back(ss.str());

      }


      // Require that column representations match

      const auto srcNCols = field.fSrc->GetLogicalColumnIds().size();

      const auto dstNCols = field.fDst->GetLogicalColumnIds().size();

      if (srcNCols != dstNCols) {

         std::stringstream ss;

         ss << "Field `" << field.fSrc->GetFieldName()

            << "` has a different number of columns than previously-seen field with the same name (old: " << dstNCols

            << ", new: " << srcNCols << ")";

         errors.push_back(ss.str());

      } else {

         for (auto i = 0u; i < srcNCols; ++i) {

            const auto srcColId = field.fSrc->GetLogicalColumnIds()[i];

            const auto dstColId = field.fDst->GetLogicalColumnIds()[i];

            const auto &srcCol = src.GetColumnDescriptor(srcColId);

            const auto &dstCol = dst.GetColumnDescriptor(dstColId);

            // TODO(gparolini): currently we refuse to merge columns of different types unless they are Split/non-Split

            // version of the same type, because we know how to treat that specific case. We should also properly handle

            // different but compatible types.

            if (srcCol.GetType() != dstCol.GetType() &&

                !IsSplitOrUnsplitVersionOf(srcCol.GetType(), dstCol.GetType())) {

               std::stringstream ss;

               ss << i << "-th column of field `" << field.fSrc->GetFieldName()

                  << "` has a different column type of the same column on the previously-seen field with the same name "

                     "(old: "

                  << RColumnElementBase::GetColumnTypeName(srcCol.GetType())

                  << ", new: " << RColumnElementBase::GetColumnTypeName(dstCol.GetType()) << ")";

               errors.push_back(ss.str());

            }

            if (srcCol.GetBitsOnStorage() != dstCol.GetBitsOnStorage()) {

               std::stringstream ss;

               ss << i << "-th column of field `" << field.fSrc->GetFieldName()

                  << "` has a different number of bits of the same column on the previously-seen field with the same "

                     "name "

                     "(old: "

                  << srcCol.GetBitsOnStorage() << ", new: " << dstCol.GetBitsOnStorage() << ")";

               errors.push_back(ss.str());

            }

            if (srcCol.GetValueRange() != dstCol.GetValueRange()) {

               std::stringstream ss;

               ss << i << "-th column of field `" << field.fSrc->GetFieldName()

                  << "` has a different value range of the same column on the previously-seen field with the same name "

                     "(old: "

                  << srcCol.GetValueRange() << ", new: " << dstCol.GetValueRange() << ")";

               errors.push_back(ss.str());

            }

            if (srcCol.GetRepresentationIndex() > 0) {

               std::stringstream ss;

               ss << i << "-th column of field `" << field.fSrc->GetFieldName()

                  << "` has a representation index higher than 0. This is not supported yet by the merger.";

               errors.push_back(ss.str());

            }

         }

      }


      // Require that subfields are compatible

      const auto &srcLinks = field.fSrc->GetLinkIds();

      const auto &dstLinks = field.fDst->GetLinkIds();

      if (srcLinks.size() != dstLinks.size()) {

         std::stringstream ss;

         ss << "Field `" << field.fSrc->GetFieldName()

            << "` has a different number of children than previously-seen field with the same name (old: "

            << dstLinks.size() << ", new: " << srcLinks.size() << ")";

         errors.push_back(ss.str());

      } else {

         for (std::size_t linkIdx = 0, linkNum = srcLinks.size(); linkIdx < linkNum; ++linkIdx) {

            const auto &srcSubfield = src.GetFieldDescriptor(srcLinks[linkIdx]);

            const auto &dstSubfield = dst.GetFieldDescriptor(dstLinks[linkIdx]);

            fieldsToCheck.push_back(RCommonField{srcSubfield, dstSubfield});

         }

      }

   }


   std::string errMsg;

   for (const auto &err : errors)

      errMsg += std::string("\n  * ") + err;


   if (!errMsg.empty())

      errMsg = errMsg.substr(1); // strip initial newline


   if (errMsg.length())

      return R__FAIL(errMsg);


   res.fCommonFields = std::move(commonFields);


   return ROOT::RResult(res);

}


// Applies late model extension to `destination`, adding all `newFields` to it.

[[nodiscard]]

static ROOT::RResult<void>


ExtendDestinationModel(std::span<const ROOT::RFieldDescriptor *> newFields, ROOT::RNTupleModel &dstModel,

                       RNTupleMergeData &mergeData, std::vector<RCommonField> &commonFields)

{

   assert(newFields.size() > 0); // no point in calling this with 0 new cols


   dstModel.Unfreeze();

   ROOT::Internal::RNTupleModelChangeset changeset{dstModel};


   if (mergeData.fMergeOpts.fExtraVerbose) {

      std::string msg = "destination doesn't contain field";

      if (newFields.size() > 1)

         msg += 's';

      msg += ' ';

      msg += std::accumulate(newFields.begin(), newFields.end(), std::string{}, [](const auto &acc, const auto *field) {

         return acc + (acc.length() ? ", " : "") + '`' + field->GetFieldName() + '`';

      });

      R__LOG_INFO(NTupleMergeLog()) << msg << ": adding " << (newFields.size() > 1 ? "them" : "it")

                                    << " to the destination model (entry #" << mergeData.fNumDstEntries << ").";

   }


   changeset.fAddedFields.reserve(newFields.size());

   // First add all non-projected fields...

   for (const auto *fieldDesc : newFields) {

      if (!fieldDesc->IsProjectedField()) {

         auto field = fieldDesc->CreateField(*mergeData.fSrcDescriptor);

         changeset.AddField(std::move(field));

      }

   }

   // ...then add all projected fields.

   for (const auto *fieldDesc : newFields) {

      if (!fieldDesc->IsProjectedField())

         continue;


      ROOT::Internal::RProjectedFields::FieldMap_t fieldMap;

      auto field = fieldDesc->CreateField(*mergeData.fSrcDescriptor);

      const auto sourceId = fieldDesc->GetProjectionSourceId();

      const auto &sourceField = dstModel.GetConstField(mergeData.fSrcDescriptor->GetQualifiedFieldName(sourceId));

      fieldMap[field.get()] = &sourceField;


      for (const auto &subfield : *field) {

         const auto &subFieldDesc = mergeData.fSrcDescriptor->GetFieldDescriptor(subfield.GetOnDiskId());

         const auto subSourceId = subFieldDesc.GetProjectionSourceId();

         const auto &subSourceField =

            dstModel.GetConstField(mergeData.fSrcDescriptor->GetQualifiedFieldName(subSourceId));

         fieldMap[&subfield] = &subSourceField;

      }

      changeset.fAddedProjectedFields.emplace_back(field.get());

      ROOT::Internal::GetProjectedFieldsOfModel(dstModel).Add(std::move(field), fieldMap);

   }

   dstModel.Freeze();

   try {

      mergeData.fDestination.UpdateSchema(changeset, mergeData.fNumDstEntries);

   } catch (const ROOT::RException &ex) {

      return R__FAIL(ex.GetError().GetReport());

   }


   commonFields.reserve(commonFields.size() + newFields.size());

   for (const auto *field : newFields) {

      const auto newFieldInDstId = mergeData.fDstDescriptor.FindFieldId(field->GetFieldName());

      const auto &newFieldInDst = mergeData.fDstDescriptor.GetFieldDescriptor(newFieldInDstId);

      commonFields.emplace_back(*field, newFieldInDst);

   }


   return ROOT::RResult<void>::Success();

}


// Generates default (zero) values for the given columns

[[nodiscard]]

static ROOT::RResult<void>


GenerateZeroPagesForColumns(size_t nEntriesToGenerate, std::span<const RColumnMergeInfo> columns,

                            RSealedPageMergeData &sealedPageData, ROOT::Internal::RPageAllocator &pageAlloc,

                            const ROOT::RNTupleDescriptor &dstDescriptor, const RNTupleMergeData &mergeData)

{

   if (!nEntriesToGenerate)

      return ROOT::RResult<void>::Success();


   for (const auto &column : columns) {

      const ROOT::RFieldDescriptor *field = column.fParentFieldDescriptor;


      // Skip all auxiliary columns

      assert(!field->GetLogicalColumnIds().empty());

      if (field->GetLogicalColumnIds()[0] != column.fInputId)

         continue;


      // Check if this column is a child of a Collection or a Variant. If so, it has no data

      // and can be skipped.

      bool skipColumn = false;

      auto nRepetitions = std::max<std::uint64_t>(field->GetNRepetitions(), 1);

      for (auto parentId = field->GetParentId(); parentId != ROOT::kInvalidDescriptorId;) {

         const ROOT::RFieldDescriptor &parent = column.fParentNTupleDescriptor->GetFieldDescriptor(parentId);

         if (parent.GetStructure() == ROOT::ENTupleStructure::kCollection ||

             parent.GetStructure() == ROOT::ENTupleStructure::kVariant) {

            skipColumn = true;

            break;

         }

         nRepetitions *= std::max<std::uint64_t>(parent.GetNRepetitions(), 1);

         parentId = parent.GetParentId();

      }

      if (skipColumn)

         continue;


      const auto structure = field->GetStructure();


      if (structure == ROOT::ENTupleStructure::kStreamer) {

         return R__FAIL(

            "Destination RNTuple contains a streamer field (" + field->GetFieldName() +

            ") that is not present in one of the sources. "

            "Creating a default value for a streamer field is ill-defined, therefore the merging process will abort.");

      }


      // NOTE: we cannot have a Record here because it has no associated columns.

      R__ASSERT(structure == ROOT::ENTupleStructure::kCollection || structure == ROOT::ENTupleStructure::kVariant ||

                structure == ROOT::ENTupleStructure::kPlain);


      const auto &columnDesc = dstDescriptor.GetColumnDescriptor(column.fOutputId);

      const auto colElement = RColumnElementBase::Generate(columnDesc.GetType());

      const auto nElements = nEntriesToGenerate * nRepetitions;

      const auto nBytesOnStorage = colElement->GetPackedSize(nElements);

      // TODO(gparolini): make this configurable

      constexpr auto kPageSizeLimit = 256 * 1024;

      // TODO(gparolini): consider coalescing the last page if its size is less than some threshold

      const size_t nPages = nBytesOnStorage / kPageSizeLimit + !!(nBytesOnStorage % kPageSizeLimit);

      for (size_t i = 0; i < nPages; ++i) {

         const auto pageSize = (i < nPages - 1) ? kPageSizeLimit : nBytesOnStorage - kPageSizeLimit * (nPages - 1);

         const auto checksumSize = RPageStorage::kNBytesPageChecksum;

         const auto bufSize = pageSize + checksumSize;

         assert(pageSize % colElement->GetSize() == 0);

         const auto nElementsPerPage = pageSize / colElement->GetSize();

         auto page = pageAlloc.NewPage(colElement->GetSize(), nElementsPerPage);

         page.GrowUnchecked(nElementsPerPage);

         memset(page.GetBuffer(), 0, page.GetNBytes());


         auto &buffer = sealedPageData.fBuffers.emplace_back(new std::byte[bufSize]);

         RPageSink::RSealPageConfig sealConf;

         sealConf.fElement = colElement.get();

         sealConf.fPage = &page;

         sealConf.fBuffer = buffer.get();

         sealConf.fCompressionSettings = mergeData.fMergeOpts.fCompressionSettings.value();

         sealConf.fWriteChecksum = mergeData.fDestination.GetWriteOptions().GetEnablePageChecksums();

         auto sealedPage = RPageSink::SealPage(sealConf);


         sealedPageData.fPagesV.push_back({sealedPage});

         sealedPageData.fGroups.emplace_back(column.fOutputId, sealedPageData.fPagesV.back().cbegin(),

                                             sealedPageData.fPagesV.back().cend());

      }

   }

   return ROOT::RResult<void>::Success();

}


// Merges all columns appearing both in the source and destination RNTuples, just copying them if their

// compression matches ("fast merge") or by unsealing and resealing them with the proper compression.

ROOT::RResult<void>


RNTupleMerger::MergeCommonColumns(ROOT::Internal::RClusterPool &clusterPool,

                                  const ROOT::RClusterDescriptor &clusterDesc,

                                  std::span<const RColumnMergeInfo> commonColumns,

                                  const RCluster::ColumnSet_t &commonColumnSet, std::size_t nCommonColumnsInCluster,

                                  RSealedPageMergeData &sealedPageData, const RNTupleMergeData &mergeData,

                                  ROOT::Internal::RPageAllocator &pageAlloc)

{

   assert(nCommonColumnsInCluster == commonColumnSet.size());

   assert(nCommonColumnsInCluster <= commonColumns.size());

   if (nCommonColumnsInCluster == 0)

      return ROOT::RResult<void>::Success();


   const RCluster *cluster = clusterPool.GetCluster(clusterDesc.GetId(), commonColumnSet);

   // we expect the cluster pool to contain the requested set of columns, since they were

   // validated by CompareDescriptorStructure().

   assert(cluster);


   for (size_t colIdx = 0; colIdx < nCommonColumnsInCluster; ++colIdx) {

      const auto &column = commonColumns[colIdx];

      const auto &columnId = column.fInputId;

      R__ASSERT(clusterDesc.ContainsColumn(columnId));


      const auto &columnDesc = mergeData.fSrcDescriptor->GetColumnDescriptor(columnId);

      const auto srcColElement = column.fInMemoryType

                                    ? ROOT::Internal::GenerateColumnElement(*column.fInMemoryType, columnDesc.GetType())

                                    : RColumnElementBase::Generate(columnDesc.GetType());

      const auto dstColElement = column.fInMemoryType

                                    ? ROOT::Internal::GenerateColumnElement(*column.fInMemoryType, column.fColumnType)

                                    : RColumnElementBase::Generate(column.fColumnType);


      // Now get the pages for this column in this cluster

      const auto &pages = clusterDesc.GetPageRange(columnId);


      RPageStorage::SealedPageSequence_t sealedPages;

      sealedPages.resize(pages.GetPageInfos().size());


      // Each column range potentially has a distinct compression settings

      const auto colRangeCompressionSettings = clusterDesc.GetColumnRange(columnId).GetCompressionSettings().value();


      // Select "merging level". There are 3 levels, from fastest to slowest, depending on the case:

      // L1: compression and encoding of src and dest both match: we can simply copy the page

      // L2: compression of dest doesn't match the src but encoding does: we must recompress the page but can avoid

      //     resealing it.

      // L3: on-disk encoding doesn't match: we need to reseal the page, which implies decompressing and recompressing

      //     it.

      const bool compressionIsDifferent =

         colRangeCompressionSettings != mergeData.fMergeOpts.fCompressionSettings.value();

      const bool needsResealing =

         srcColElement->GetIdentifier().fOnDiskType != dstColElement->GetIdentifier().fOnDiskType;

      const bool needsRecompressing = compressionIsDifferent || needsResealing;


      if (needsRecompressing && mergeData.fMergeOpts.fExtraVerbose) {

         R__LOG_INFO(NTupleMergeLog())

            << (needsResealing ? "Resealing" : "Recompressing") << " column " << column.fColumnName

            << ": { compression: " << colRangeCompressionSettings << " => "

            << mergeData.fMergeOpts.fCompressionSettings.value()

            << ", onDiskType: " << RColumnElementBase::GetColumnTypeName(srcColElement->GetIdentifier().fOnDiskType)

            << " => " << RColumnElementBase::GetColumnTypeName(dstColElement->GetIdentifier().fOnDiskType);

      }


      size_t pageBufferBaseIdx = sealedPageData.fBuffers.size();

      // If the column range already has the right compression we don't need to allocate any new buffer, so we don't

      // bother reserving memory for them.

      if (needsRecompressing)

         sealedPageData.fBuffers.resize(sealedPageData.fBuffers.size() + pages.GetPageInfos().size());


      // If this column is deferred, we may need to fill "holes" until its real start. We fill any missing entry

      // with zeroes, like we do for extraDstColumns.

      // As an optimization, we don't do this for the first source (since we can rely on the FirstElementIndex and

      // deferred column mechanism in that case).

      // TODO: also avoid doing this if we added no real page of this column to the destination yet.

      if (columnDesc.GetFirstElementIndex() > clusterDesc.GetFirstEntryIndex() && mergeData.fNumDstEntries > 0) {

         const auto nMissingEntries = columnDesc.GetFirstElementIndex() - clusterDesc.GetFirstEntryIndex();

         auto res = GenerateZeroPagesForColumns(nMissingEntries, {&column, 1}, sealedPageData, pageAlloc,

                                                mergeData.fDstDescriptor, mergeData);

         if (!res)

            return R__FORWARD_ERROR(res);

      }


      // Loop over the pages

      std::uint64_t pageIdx = 0;

      for (const auto &pageInfo : pages.GetPageInfos()) {

         assert(pageIdx < sealedPages.size());

         assert(sealedPageData.fBuffers.size() == 0 || pageIdx < sealedPageData.fBuffers.size());

         assert(pageInfo.GetLocator().GetType() != RNTupleLocator::kTypePageZero);


         ROOT::Internal::ROnDiskPage::Key key{columnId, pageIdx};

         auto onDiskPage = cluster->GetOnDiskPage(key);


         const auto checksumSize = pageInfo.HasChecksum() * RPageStorage::kNBytesPageChecksum;

         RPageStorage::RSealedPage &sealedPage = sealedPages[pageIdx];

         sealedPage.SetNElements(pageInfo.GetNElements());

         sealedPage.SetHasChecksum(pageInfo.HasChecksum());

         sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + checksumSize);

         sealedPage.SetBuffer(onDiskPage->GetAddress());

         // TODO(gparolini): more graceful error handling (skip the page?)

         sealedPage.VerifyChecksumIfEnabled().ThrowOnError();

         R__ASSERT(onDiskPage && (onDiskPage->GetSize() == sealedPage.GetBufferSize()));


         if (needsRecompressing) {

            const auto uncompressedSize = srcColElement->GetSize() * sealedPage.GetNElements();

            auto &buffer = sealedPageData.fBuffers[pageBufferBaseIdx + pageIdx];

            const auto bufSize = uncompressedSize + checksumSize;

            // NOTE: we currently allocate the max possible size for this buffer and don't shrink it afterward.

            // We might want to introduce an option that trades speed for memory usage and shrink the buffer to fit

            // the actual data size after recompressing.

            buffer = MakeUninitArray<std::byte>(bufSize);


            // clang-format off

            if (needsResealing) {

               RTaskVisitor{fTaskGroup}(RResealFunc{

                  *srcColElement,

                  *dstColElement,

                  mergeData.fMergeOpts,

                  sealedPage,

                  *fPageAlloc,

                  buffer.get(),

                  bufSize,

                  mergeData.fDestination.GetWriteOptions()

               });

            } else {

               RTaskVisitor{fTaskGroup}(RChangeCompressionFunc{

                  *srcColElement,

                  *dstColElement,

                  mergeData.fMergeOpts,

                  sealedPage,

                  *fPageAlloc,

                  buffer.get(),

                  bufSize,

                  mergeData.fDestination.GetWriteOptions()

               });

            }

            // clang-format on

         }


         ++pageIdx;


      } // end of loop over pages


      if (fTaskGroup)

         fTaskGroup->Wait();


      sealedPageData.fPagesV.push_back(std::move(sealedPages));

      sealedPageData.fGroups.emplace_back(column.fOutputId, sealedPageData.fPagesV.back().cbegin(),

                                          sealedPageData.fPagesV.back().cend());

   } // end loop over common columns


   return ROOT::RResult<void>::Success();

}


// Iterates over all clusters of `source` and merges their pages into `destination`.

// It is assumed that all columns in `commonColumns` are present (and compatible) in both the source and

// the destination's schemas.

// The pages may be "fast-merged" (i.e. simply copied with no decompression/recompression) if the target

// compression is unspecified or matches the original compression settings.

ROOT::RResult<void>


RNTupleMerger::MergeSourceClusters(RPageSource &source, std::span<const RColumnMergeInfo> commonColumns,

                                   std::span<const RColumnMergeInfo> extraDstColumns, RNTupleMergeData &mergeData)

{

   ROOT::Internal::RClusterPool clusterPool{source};


   std::vector<RColumnMergeInfo> missingColumns{extraDstColumns.begin(), extraDstColumns.end()};


   // Loop over all clusters in this file.

   // descriptor->GetClusterIterable() doesn't guarantee any specific order, so we explicitly

   // request the first cluster.

   ROOT::DescriptorId_t clusterId = mergeData.fSrcDescriptor->FindClusterId(0, 0);

   while (clusterId != ROOT::kInvalidDescriptorId) {

      const auto &clusterDesc = mergeData.fSrcDescriptor->GetClusterDescriptor(clusterId);

      const auto nClusterEntries = clusterDesc.GetNEntries();

      R__ASSERT(nClusterEntries > 0);


      // NOTE: just because a column is in `commonColumns` it doesn't mean that each cluster in the source contains it,

      // as it may be a deferred column that only has real data in a future cluster.

      // We need to figure out which columns are actually present in this cluster so we only merge their pages (the

      // missing columns are handled by synthesizing zero pages - see below).

      size_t nCommonColumnsInCluster = commonColumns.size();

      while (nCommonColumnsInCluster > 0) {

         // Since `commonColumns` is sorted by column input id, we can simply traverse it from the back and stop as

         // soon as we find a common column that appears in this cluster: we know that in that case all previous

         // columns must appear as well.

         if (clusterDesc.ContainsColumn(commonColumns[nCommonColumnsInCluster - 1].fInputId))

            break;

         --nCommonColumnsInCluster;

      }


      // Convert columns to a ColumnSet for the ClusterPool query

      RCluster::ColumnSet_t commonColumnSet;

      commonColumnSet.reserve(nCommonColumnsInCluster);

      for (size_t i = 0; i < nCommonColumnsInCluster; ++i)

         commonColumnSet.emplace(commonColumns[i].fInputId);


      // For each cluster, the "missing columns" are the union of the extraDstColumns and the common columns

      // that are not present in the cluster. We generate zero pages for all of them.

      missingColumns.resize(extraDstColumns.size());

      for (size_t i = nCommonColumnsInCluster; i < commonColumns.size(); ++i)

         missingColumns.push_back(commonColumns[i]);


      RSealedPageMergeData sealedPageData;

      auto res = MergeCommonColumns(clusterPool, clusterDesc, commonColumns, commonColumnSet, nCommonColumnsInCluster,

                                    sealedPageData, mergeData, *fPageAlloc);

      if (!res)

         return R__FORWARD_ERROR(res);


      res = GenerateZeroPagesForColumns(nClusterEntries, missingColumns, sealedPageData, *fPageAlloc,

                                        mergeData.fDstDescriptor, mergeData);

      if (!res)

         return R__FORWARD_ERROR(res);


      // Commit the pages and the clusters

      mergeData.fDestination.CommitSealedPageV(sealedPageData.fGroups);

      mergeData.fDestination.CommitCluster(nClusterEntries);

      mergeData.fNumDstEntries += nClusterEntries;


      // Go to the next cluster

      clusterId = mergeData.fSrcDescriptor->FindNextClusterId(clusterId);

   }


   // TODO(gparolini): when we get serious about huge file support (>~ 100GB) we might want to check here

   // the size of the running page list and commit a cluster group when it exceeds some threshold,

   // which would prevent the page list from getting too large.

   // However, as of today, we aren't really handling such huge files, and even relatively big ones

   // such as the CMS dataset have a page list size of about only 2 MB.

   // So currently we simply merge all cluster groups into one.

   return ROOT::RResult<void>::Success();

}


static std::optional<std::type_index> ColumnInMemoryType(std::string_view fieldType, ENTupleColumnType onDiskType)

{

   if (onDiskType == ENTupleColumnType::kIndex32 || onDiskType == ENTupleColumnType::kSplitIndex32 ||

       onDiskType == ENTupleColumnType::kIndex64 || onDiskType == ENTupleColumnType::kSplitIndex64)

      return typeid(ROOT::Internal::RColumnIndex);


   if (onDiskType == ENTupleColumnType::kSwitch)

      return typeid(ROOT::Internal::RColumnSwitch);


   // clang-format off

   if (fieldType == "bool")          return typeid(bool);

   if (fieldType == "std::byte")     return typeid(std::byte);

   if (fieldType == "char")          return typeid(char);

   if (fieldType == "std::int8_t")   return typeid(std::int8_t);

   if (fieldType == "std::uint8_t")  return typeid(std::uint8_t);

   if (fieldType == "std::int16_t")  return typeid(std::int16_t);

   if (fieldType == "std::uint16_t") return typeid(std::uint16_t);

   if (fieldType == "std::int32_t")  return typeid(std::int32_t);

   if (fieldType == "std::uint32_t") return typeid(std::uint32_t);

   if (fieldType == "std::int64_t")  return typeid(std::int64_t);

   if (fieldType == "std::uint64_t") return typeid(std::uint64_t);

   if (fieldType == "float")         return typeid(float);

   if (fieldType == "double")        return typeid(double);

   // clang-format on


   // if the type is not one of those above, we use the default in-memory type.

   return std::nullopt;

}


// Given a field, fill `columns` and `mergeData.fColumnIdMap` with information about all columns belonging to it and its

// subfields. `mergeData.fColumnIdMap` is used to map matching columns from different sources to the same output column

// in the destination. We match columns by their "fully qualified name", which is the concatenation of their ancestor

// fields' names and the column index. By this point, since we called `CompareDescriptorStructure()` earlier, we should

// be guaranteed that two matching columns will have at least compatible representations. NOTE: srcFieldDesc and

// dstFieldDesc may alias.


static void AddColumnsFromField(std::vector<RColumnMergeInfo> &columns, const ROOT::RNTupleDescriptor &srcDesc,

                                RNTupleMergeData &mergeData, const ROOT::RFieldDescriptor &srcFieldDesc,

                                const ROOT::RFieldDescriptor &dstFieldDesc, const std::string &prefix = "")

{

   std::string name = prefix + '.' + srcFieldDesc.GetFieldName();


   const auto &columnIds = srcFieldDesc.GetLogicalColumnIds();

   columns.reserve(columns.size() + columnIds.size());

   // NOTE: here we can match the src and dst columns by column index because we forbid merging fields with

   // different column representations.

   for (auto i = 0u; i < srcFieldDesc.GetLogicalColumnIds().size(); ++i) {

      // We don't want to try and merge alias columns

      if (srcFieldDesc.IsProjectedField())

         continue;


      auto srcColumnId = srcFieldDesc.GetLogicalColumnIds()[i];

      const auto &srcColumn = srcDesc.GetColumnDescriptor(srcColumnId);


      RColumnMergeInfo info{};

      info.fColumnName = name + '.' + std::to_string(srcColumn.GetIndex());

      info.fInputId = srcColumn.GetPhysicalId();

      // NOTE(gparolini): the parent field is used when synthesizing zero pages, which happens in 2 situations:

      // 1. when adding extra dst columns (in which case we need to synthesize zero pages for the incoming src), and

      // 2. when merging a deferred column into an existing column (in which case we need to fill the "hole" with

      // zeroes). For the first case srcFieldDesc and dstFieldDesc are the same (see the calling site of this function),

      // but for the second case they're not, and we need to pick the source field because we will then check the

      // column's *input* id inside fParentFieldDescriptor to see if it's a suppressed column (see

      // GenerateZeroPagesForColumns()).

      info.fParentFieldDescriptor = &srcFieldDesc;

      // Save the parent field descriptor since this may be either the source or destination descriptor depending on

      // whether this is an extraDstField or a commonField. We will need this in GenerateZeroPagesForColumns() to

      // properly walk up the field hierarchy.

      info.fParentNTupleDescriptor = &srcDesc;


      if (auto it = mergeData.fColumnIdMap.find(info.fColumnName); it != mergeData.fColumnIdMap.end()) {

         info.fOutputId = it->second.fColumnId;

         info.fColumnType = it->second.fColumnType;

      } else {

         info.fOutputId = mergeData.fColumnIdMap.size();

         // NOTE(gparolini): map the type of src column to the type of dst column.

         // This mapping is only relevant for common columns and it's done to ensure we keep a consistent

         // on-disk representation of the same column.

         // This is also important to do for first source when it is used to generate the destination sink,

         // because even in that case their column representations may differ.

         // e.g. if the destination has a different compression than the source, an integer column might be

         // zigzag-encoded in the source but not in the destination.

         auto dstColumnId = dstFieldDesc.GetLogicalColumnIds()[i];

         const auto &dstColumn = mergeData.fDstDescriptor.GetColumnDescriptor(dstColumnId);

         info.fColumnType = dstColumn.GetType();

         mergeData.fColumnIdMap[info.fColumnName] = {info.fOutputId, info.fColumnType};

      }


      if (mergeData.fMergeOpts.fExtraVerbose) {

         R__LOG_INFO(NTupleMergeLog()) << "Adding column " << info.fColumnName << " with log.id " << srcColumnId

                                       << ", phys.id " << srcColumn.GetPhysicalId() << ", type "

                                       << RColumnElementBase::GetColumnTypeName(srcColumn.GetType()) << " -> log.id "

                                       << info.fOutputId << ", type "

                                       << RColumnElementBase::GetColumnTypeName(info.fColumnType);

      }


      // Since we disallow merging fields of different types, src and dstFieldDesc must have the same type name.

      assert(srcFieldDesc.GetTypeName() == dstFieldDesc.GetTypeName());

      info.fInMemoryType = ColumnInMemoryType(srcFieldDesc.GetTypeName(), info.fColumnType);

      columns.emplace_back(info);

   }


   const auto &srcChildrenIds = srcFieldDesc.GetLinkIds();

   const auto &dstChildrenIds = dstFieldDesc.GetLinkIds();

   assert(srcChildrenIds.size() == dstChildrenIds.size());

   for (auto i = 0u; i < srcChildrenIds.size(); ++i) {

      const auto &srcChild = srcDesc.GetFieldDescriptor(srcChildrenIds[i]);

      const auto &dstChild = mergeData.fDstDescriptor.GetFieldDescriptor(dstChildrenIds[i]);

      AddColumnsFromField(columns, srcDesc, mergeData, srcChild, dstChild, name);

   }

}


// Converts the fields comparison data to the corresponding column information.

// While doing so, it collects such information in `mergeData.fColumnIdMap`, which is used by later calls to this

// function to map already-seen column names to their chosen outputId, type and so on.


static RColumnInfoGroup GatherColumnInfos(const RDescriptorsComparison &descCmp, const ROOT::RNTupleDescriptor &srcDesc,

                                          RNTupleMergeData &mergeData)

{

   RColumnInfoGroup res;

   for (const ROOT::RFieldDescriptor *field : descCmp.fExtraDstFields) {

      AddColumnsFromField(res.fExtraDstColumns, mergeData.fDstDescriptor, mergeData, *field, *field);

   }

   for (const auto &[srcField, dstField] : descCmp.fCommonFields) {

      AddColumnsFromField(res.fCommonColumns, srcDesc, mergeData, *srcField, *dstField);

   }


   // Sort the commonColumns by ID so we can more easily tell how many common columns each cluster has

   // (since each cluster must contain all columns of the previous cluster plus potentially some new ones)

   std::sort(res.fCommonColumns.begin(), res.fCommonColumns.end(),

             [](const auto &a, const auto &b) { return a.fInputId < b.fInputId; });


   return res;

}


static void PrefillColumnMap(const ROOT::RNTupleDescriptor &desc, const ROOT::RFieldDescriptor &fieldDesc,

                             ColumnIdMap_t &colIdMap, const std::string &prefix = "")

{

   std::string name = prefix + '.' + fieldDesc.GetFieldName();

   for (const auto &colId : fieldDesc.GetLogicalColumnIds()) {

      const auto &colDesc = desc.GetColumnDescriptor(colId);

      RColumnOutInfo info{};

      const auto colName = name + '.' + std::to_string(colDesc.GetIndex());

      info.fColumnId = colDesc.GetLogicalId();

      info.fColumnType = colDesc.GetType();

      colIdMap[colName] = info;

   }


   for (const auto &subId : fieldDesc.GetLinkIds()) {

      const auto &subfield = desc.GetFieldDescriptor(subId);

      PrefillColumnMap(desc, subfield, colIdMap, name);

   }

}


RNTupleMerger::RNTupleMerger(std::unique_ptr<ROOT::Internal::RPagePersistentSink> destination,

                             std::unique_ptr<ROOT::RNTupleModel> model)

   // TODO(gparolini): consider using an arena allocator instead, since we know the precise lifetime

   // of the RNTuples we are going to handle (e.g. we can reset the arena at every source)

   : fDestination(std::move(destination)),

     fPageAlloc(std::make_unique<ROOT::Internal::RPageAllocatorHeap>()),

     fModel(std::move(model))

{

   R__ASSERT(fDestination);


#ifdef R__USE_IMT

   if (ROOT::IsImplicitMTEnabled())

      fTaskGroup = TTaskGroup();

#endif

}


RNTupleMerger::RNTupleMerger(std::unique_ptr<ROOT::Internal::RPagePersistentSink> destination)

   : RNTupleMerger(std::move(destination), nullptr)

{

}


ROOT::RResult<void> RNTupleMerger::Merge(std::span<RPageSource *> sources, const RNTupleMergeOptions &mergeOptsIn)

{

   RNTupleMergeOptions mergeOpts = mergeOptsIn;


   assert(fDestination);


   // Set compression settings if unset and verify it's compatible with the sink

   {

      const auto dstCompSettings = fDestination->GetWriteOptions().GetCompression();

      if (!mergeOpts.fCompressionSettings) {

         mergeOpts.fCompressionSettings = dstCompSettings;

      } else if (*mergeOpts.fCompressionSettings != dstCompSettings) {

         return R__FAIL(std::string("The compression given to RNTupleMergeOptions is different from that of the "

                                    "sink! (opts: ") +

                        std::to_string(*mergeOpts.fCompressionSettings) + ", sink: " + std::to_string(dstCompSettings) +

                        ") This is currently unsupported.");

      }

   }


   // we should have a model if and only if the destination is initialized.

   if (!!fModel != fDestination->IsInitialized()) {

      return R__FAIL(

         "passing an already-initialized destination to RNTupleMerger::Merge (i.e. trying to do incremental "

         "merging) can only be done by providing a valid ROOT::RNTupleModel when constructing the RNTupleMerger.");

   }


   RNTupleMergeData mergeData{sources, *fDestination, mergeOpts};

   mergeData.fNumDstEntries = mergeData.fDestination.GetNEntries();


   if (fModel) {

      // If this is an incremental merging, pre-fill the column id map with the existing destination ids.

      // Otherwise we would generate new output ids that may not match the ones in the destination!

      for (const auto &field : mergeData.fDstDescriptor.GetTopLevelFields()) {

         PrefillColumnMap(fDestination->GetDescriptor(), field, mergeData.fColumnIdMap);

      }

   }


#define SKIP_OR_ABORT(errMsg)                                                         \

   do {                                                                               \

      if (mergeOpts.fErrBehavior == ENTupleMergeErrBehavior::kSkip) {                 \

         R__LOG_WARNING(NTupleMergeLog()) << "Skipping RNTuple due to: " << (errMsg); \

         continue;                                                                    \

      } else {                                                                        \

         return R__FAIL(errMsg);                                                      \

      }                                                                               \

   } while (0)


   // Merge main loop

   for (RPageSource *source : sources) {

      // We need to make sure the streamer info from the source files is loaded otherwise we may not be able

      // to build the streamer info of user-defined types unless we have their dictionaries available.

      source->LoadStreamerInfo();


      source->Attach(RNTupleSerializer::EDescriptorDeserializeMode::kForWriting);

      auto srcDescriptor = source->GetSharedDescriptorGuard();

      mergeData.fSrcDescriptor = &srcDescriptor.GetRef();


      if (mergeData.fSrcDescriptor->GetVersion() > ROOT::RNTuple::GetCurrentVersion()) {

         if (mergeOpts.fVersionBehavior == ENTupleMergeVersionBehavior::kWarnOnHigherVersion) {

            R__LOG_WARNING(NTupleMergeLog())

               << "RNTuple '" << mergeData.fSrcDescriptor->GetName()

               << "' has a higher format version than the latest supported by this version "

                  "of ROOT. Merging will work but some features may be dropped.";

         } else {

            return R__FAIL("RNTuple '" + mergeData.fSrcDescriptor->GetName() +

                           "' has a higher format version than the latest supported by this version. Refusing to "

                           "merge, since RNTupleMergeOptions::fVersionBehavior is set to AbortOnHigherVersion.");

         }

      }


      // Create sink from the input model if not initialized

      if (!fModel) {

         fModel = fDestination->InitFromDescriptor(srcDescriptor.GetRef(), false /* copyClusters */);

      }


      for (const auto &extraTypeInfoDesc : srcDescriptor->GetExtraTypeInfoIterable())

         fDestination->UpdateExtraTypeInfo(extraTypeInfoDesc);


      auto descCmpRes = CompareDescriptorStructure(mergeData.fDstDescriptor, srcDescriptor.GetRef());

      if (!descCmpRes) {

         SKIP_OR_ABORT(std::string("Source RNTuple has an incompatible schema with the destination:\n") +

                       descCmpRes.GetError()->GetReport());

      }

      auto descCmp = descCmpRes.Unwrap();


      // If the current source is missing some fields and we're not in Union mode, error

      // (if we are in Union mode, MergeSourceClusters will fill the missing fields with default values).

      if (mergeOpts.fMergingMode != ENTupleMergingMode::kUnion && !descCmp.fExtraDstFields.empty()) {

         std::string msg = "Source RNTuple is missing the following fields:";

         for (const auto *field : descCmp.fExtraDstFields) {

            msg += "\n  " + field->GetFieldName() + " : " + field->GetTypeName();

         }

         SKIP_OR_ABORT(msg);

      }


      // handle extra src fields

      if (descCmp.fExtraSrcFields.size()) {

         if (mergeOpts.fMergingMode == ENTupleMergingMode::kUnion) {

            // late model extension for all fExtraSrcFields in Union mode

            auto res = ExtendDestinationModel(descCmp.fExtraSrcFields, *fModel, mergeData, descCmp.fCommonFields);

            if (!res)

               return R__FORWARD_ERROR(res);

         } else if (mergeOpts.fMergingMode == ENTupleMergingMode::kStrict) {

            // If the current source has extra fields and we're in Strict mode, error

            std::string msg = "Source RNTuple has extra fields that the destination RNTuple doesn't have:";

            for (const auto *field : descCmp.fExtraSrcFields) {

               msg += "\n  " + field->GetFieldName() + " : " + field->GetTypeName();

            }

            SKIP_OR_ABORT(msg);

         }

      }


      // handle extra dst fields & common fields

      auto columnInfos = GatherColumnInfos(descCmp, srcDescriptor.GetRef(), mergeData);

      auto res = MergeSourceClusters(*source, columnInfos.fCommonColumns, columnInfos.fExtraDstColumns, mergeData);

      if (!res)

         return R__FORWARD_ERROR(res);

   } // end loop over sources


   if (fDestination->GetNEntries() == 0)

      R__LOG_WARNING(NTupleMergeLog()) << "Output RNTuple '" << fDestination->GetNTupleName() << "' has no entries.";


   // Commit the output

   fDestination->CommitClusterGroup();

   fDestination->CommitDataset();


   return RResult<void>::Success();

}


fBuffer
fBuffer
Definition Converters.cxx:3162

RClusterPool.hxx

RColumnElementBase.hxx

RError.hxx

R__FORWARD_ERROR
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking).
Definition RError.hxx:303

R__FAIL
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299

R__LOG_WARNING
#define R__LOG_WARNING(...)
Definition RLogger.hxx:357

R__LOG_ERROR
#define R__LOG_ERROR(...)
Definition RLogger.hxx:356

R__LOG_INFO
#define R__LOG_INFO(...)
Definition RLogger.hxx:358

MakeUninitArray
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
Definition RNTupleUtils.hxx:42

RNTupleDescriptor.hxx

ColumnInMemoryType
static std::optional< std::type_index > ColumnInMemoryType(std::string_view fieldType, ENTupleColumnType onDiskType)
Definition RNTupleMerger.cxx:1054

CompareDescriptorStructure
static ROOT::RResult< RDescriptorsComparison > CompareDescriptorStructure(const ROOT::RNTupleDescriptor &dst, const ROOT::RNTupleDescriptor &src)
Compares the top level fields of dst and src and determines whether they can be merged or not.
Definition RNTupleMerger.cxx:474

GenerateZeroPagesForColumns
static ROOT::RResult< void > GenerateZeroPagesForColumns(size_t nEntriesToGenerate, std::span< const RColumnMergeInfo > columns, RSealedPageMergeData &sealedPageData, ROOT::Internal::RPageAllocator &pageAlloc, const ROOT::RNTupleDescriptor &dstDescriptor, const RNTupleMergeData &mergeData)
Definition RNTupleMerger.cxx:744

ParseOptionErrBehavior
static std::optional< ENTupleMergeErrBehavior > ParseOptionErrBehavior(const TString &opts)
Definition RNTupleMerger.cxx:100

NTupleMergeLog
static ROOT::RLogChannel & NTupleMergeLog()
Definition RNTupleMerger.cxx:54

SKIP_OR_ABORT
#define SKIP_OR_ABORT(errMsg)

ParseStringOption
static std::optional< T > ParseStringOption(const TString &opts, const char *pattern, std::initializer_list< std::pair< const char *, T > > validValues)
Definition RNTupleMerger.cxx:73

IsSplitOrUnsplitVersionOf
static bool IsSplitOrUnsplitVersionOf(ENTupleColumnType a, ENTupleColumnType b)
Definition RNTupleMerger.cxx:444

AddColumnsFromField
static void AddColumnsFromField(std::vector< RColumnMergeInfo > &columns, const ROOT::RNTupleDescriptor &srcDesc, RNTupleMergeData &mergeData, const ROOT::RFieldDescriptor &srcFieldDesc, const ROOT::RFieldDescriptor &dstFieldDesc, const std::string &prefix="")
Definition RNTupleMerger.cxx:1089

ParseOptionMergingMode
static std::optional< ENTupleMergingMode > ParseOptionMergingMode(const TString &opts)
Definition RNTupleMerger.cxx:90

PrefillColumnMap
static void PrefillColumnMap(const ROOT::RNTupleDescriptor &desc, const ROOT::RFieldDescriptor &fieldDesc, ColumnIdMap_t &colIdMap, const std::string &prefix="")
Definition RNTupleMerger.cxx:1187

GatherColumnInfos
static RColumnInfoGroup GatherColumnInfos(const RDescriptorsComparison &descCmp, const ROOT::RNTupleDescriptor &srcDesc, RNTupleMergeData &mergeData)
Definition RNTupleMerger.cxx:1168

ExtendDestinationModel
static ROOT::RResult< void > ExtendDestinationModel(std::span< const ROOT::RFieldDescriptor * > newFields, ROOT::RNTupleModel &dstModel, RNTupleMergeData &mergeData, std::vector< RCommonField > &commonFields)
Definition RNTupleMerger.cxx:675

ParseOptionVersionBehavior
static std::optional< ENTupleMergeVersionBehavior > ParseOptionVersionBehavior(const TString &opts)
Definition RNTupleMerger.cxx:109

BeginsWithDelimitedWord
static bool BeginsWithDelimitedWord(const TString &str, const char *word)
Definition RNTupleMerger.cxx:62

RNTupleMerger.hxx

RNTupleModel.hxx

RNTupleSerialize.hxx

RNTupleTypes.hxx

RNTupleUtils.hxx

RNTupleZip.hxx

RNTuple.hxx

RPageAllocator.hxx

RPageStorageFile.hxx

RPageStorage.hxx

b
#define b(i)
Definition RSha256.hxx:100

f
#define f(i)
Definition RSha256.hxx:104

a
#define a(i)
Definition RSha256.hxx:99

size
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix

Ssiz_t
int Ssiz_t
String size (currently int).
Definition RtypesCore.h:81

operator<<
TBuffer & operator<<(TBuffer &buf, const Tmpl *obj)
Definition TBuffer.h:397

R__ASSERT
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125

TFileMergeInfo.h

TFile.h

name
char name[80]
Definition TGX11.cxx:148

TKey.h

err
Double_t err
Definition TMultiDimFit.cxx:1939

operator()
TRObject operator()(const T1 &t1) const
Definition TRFunctionImport__oprtr.h:14

TROOT.h

Long64_t

ENTupleColumnType
The available trivial, native content types of a column.

ROOT::Experimental::Internal::RNTupleMerger
Given a set of RPageSources merge them into an RPagePersistentSink, optionally changing their compres...
Definition RNTupleMerger.hxx:111

ROOT::Experimental::Internal::RNTupleMerger::MergeCommonColumns
ROOT::RResult< void > MergeCommonColumns(ROOT::Internal::RClusterPool &clusterPool, const ROOT::RClusterDescriptor &clusterDesc, std::span< const RColumnMergeInfo > commonColumns, const ROOT::Internal::RCluster::ColumnSet_t &commonColumnSet, std::size_t nCommonColumnsInCluster, RSealedPageMergeData &sealedPageData, const RNTupleMergeData &mergeData, ROOT::Internal::RPageAllocator &pageAlloc)
Definition RNTupleMerger.cxx:827

ROOT::Experimental::Internal::RNTupleMerger::fTaskGroup
std::optional< TTaskGroup > fTaskGroup
Definition RNTupleMerger.hxx:116

ROOT::Experimental::Internal::RNTupleMerger::fModel
std::unique_ptr< ROOT::RNTupleModel > fModel
Definition RNTupleMerger.hxx:117

ROOT::Experimental::Internal::RNTupleMerger::RNTupleMerger
RNTupleMerger(std::unique_ptr< ROOT::Internal::RPagePersistentSink > destination, std::unique_ptr< ROOT::RNTupleModel > model)
Creates a RNTupleMerger with the given destination.
Definition RNTupleMerger.cxx:1206

ROOT::Experimental::Internal::RNTupleMerger::MergeSourceClusters
ROOT::RResult< void > MergeSourceClusters(ROOT::Internal::RPageSource &source, std::span< const RColumnMergeInfo > commonColumns, std::span< const RColumnMergeInfo > extraDstColumns, RNTupleMergeData &mergeData)
Definition RNTupleMerger.cxx:983

ROOT::Experimental::Internal::RNTupleMerger::fDestination
std::unique_ptr< ROOT::Internal::RPagePersistentSink > fDestination
Definition RNTupleMerger.hxx:114

ROOT::Experimental::Internal::RNTupleMerger::fPageAlloc
std::unique_ptr< ROOT::Internal::RPageAllocator > fPageAlloc
Definition RNTupleMerger.hxx:115

ROOT::Experimental::Internal::RNTupleMerger::Merge
RResult< void > Merge(std::span< ROOT::Internal::RPageSource * > sources, const RNTupleMergeOptions &mergeOpts=RNTupleMergeOptions())
Merge a given set of sources into the destination.
Definition RNTupleMerger.cxx:1227

ROOT::Experimental::TTaskGroup
A class to manage the asynchronous execution of work items.
Definition TTaskGroup.hxx:26

ROOT::Internal::RClusterPool
Managed a set of clusters containing compressed and packed pages.
Definition RClusterPool.hxx:55

ROOT::Internal::RClusterPool::GetCluster
RCluster * GetCluster(ROOT::DescriptorId_t clusterId, const RCluster::ColumnSet_t &physicalColumns)
Returns the requested cluster either from the pool or, in case of a cache miss, lets the I/O thread l...
Definition RClusterPool.cxx:178

ROOT::Internal::RCluster
An in-memory subset of the packed and compressed pages of a cluster.
Definition RCluster.hxx:147

ROOT::Internal::RCluster::ColumnSet_t
std::unordered_set< ROOT::DescriptorId_t > ColumnSet_t
Definition RCluster.hxx:149

ROOT::Internal::RCluster::GetOnDiskPage
const ROnDiskPage * GetOnDiskPage(const ROnDiskPage::Key &key) const
Definition RCluster.cxx:30

ROOT::Internal::RColumnElementBase
A column element encapsulates the translation between basic C++ types and their column representation...
Definition RColumnElementBase.hxx:51

ROOT::Internal::RColumnElementBase::GetColumnTypeName
static const char * GetColumnTypeName(ROOT::ENTupleColumnType type)
Definition RColumnElement.cxx:69

ROOT::Internal::RColumnElementBase::Generate
static std::unique_ptr< RColumnElementBase > Generate(ROOT::ENTupleColumnType type)
If CppT == void, use the default C++ type for the given column type.
Definition RColumnElementBase.hxx:142

ROOT::Internal::RColumnElementBase::GetIdentifier
virtual RIdentifier GetIdentifier() const =0

ROOT::Internal::RColumnElementBase::GetPackedSize
std::size_t GetPackedSize(std::size_t nElements=1U) const
Definition RColumnElementBase.hxx:127

ROOT::Internal::RColumnIndex
The in-memory representation of a 32bit or 64bit on-disk index column.
Definition RNTupleTypes.hxx:300

ROOT::Internal::RColumnSwitch
Holds the index and the tag of a kSwitch column.
Definition RNTupleTypes.hxx:330

ROOT::Internal::RNTupleCompressor::Zip
static std::size_t Zip(const void *from, std::size_t nbytes, int compression, void *to)
Returns the size of the compressed data, written into the provided output buffer.
Definition RNTupleZip.hxx:47

ROOT::Internal::RNTupleDecompressor::Unzip
static void Unzip(const void *from, size_t nbytes, size_t dataLen, void *to)
The nbytes parameter provides the size ls of the from buffer.
Definition RNTupleZip.hxx:105

ROOT::Internal::RNTupleSerializer
A helper class for serializing and deserialization of the RNTuple binary format.
Definition RNTupleSerialize.hxx:64

ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode::kRaw
@ kRaw
Deserializes the descriptor as-is without performing any additional fixup.
Definition RNTupleSerialize.hxx:301

ROOT::Internal::RNTupleSerializer::EDescriptorDeserializeMode::kForWriting
@ kForWriting
Deserializes the descriptor and performs fixup on the suppressed column ranges.
Definition RNTupleSerialize.hxx:304

ROOT::Internal::RPageAllocatorHeap
Uses standard C++ memory allocation for the column data pages.
Definition RPageAllocator.hxx:59

ROOT::Internal::RPageAllocator
Abstract interface to allocate and release pages.
Definition RPageAllocator.hxx:36

ROOT::Internal::RPageAllocator::NewPage
virtual RPage NewPage(std::size_t elementSize, std::size_t nElements)=0
Reserves memory large enough to hold nElements of the given size.

ROOT::Internal::RPageSink
Abstract interface to write data into an ntuple.
Definition RPageStorage.hxx:256

ROOT::Internal::RPageSink::GetWriteOptions
const ROOT::RNTupleWriteOptions & GetWriteOptions() const
Returns the sink's write options.
Definition RPageStorage.hxx:305

ROOT::Internal::RPageSink::CommitCluster
virtual std::uint64_t CommitCluster(ROOT::NTupleSize_t nNewEntries)
Finalize the current cluster and create a new one for the following data.
Definition RPageStorage.hxx:379

ROOT::Internal::RPageSink::UpdateSchema
virtual void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry)=0
Incorporate incremental changes to the model into the ntuple descriptor.

ROOT::Internal::RPageSink::SealPage
RSealedPage SealPage(const ROOT::Internal::RPage &page, const ROOT::Internal::RColumnElementBase &element)
Helper for streaming a page.
Definition RPageStorage.cxx:757

ROOT::Internal::RPageSink::CommitSealedPageV
virtual void CommitSealedPageV(std::span< RPageStorage::RSealedPageGroup > ranges)=0
Write a vector of preprocessed pages to storage. The corresponding columns must have been added befor...

ROOT::Internal::RPageSink::GetNEntries
virtual ROOT::NTupleSize_t GetNEntries() const =0

ROOT::Internal::RPageSourceFile
Storage provider that reads ntuple pages from a file.
Definition RPageStorageFile.hxx:124

ROOT::Internal::RPageSourceFile::CreateFromAnchor
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Used from the RNTuple class to build a datasource if the anchor is already available.
Definition RPageStorageFile.cxx:389

ROOT::Internal::RPageSource
Abstract interface to read data from an ntuple.
Definition RPageStorage.hxx:569

ROOT::Internal::RPageStorage
Common functionality of an ntuple storage for both reading and writing.
Definition RPageStorage.hxx:70

ROOT::Internal::RPageStorage::kNBytesPageChecksum
static constexpr std::size_t kNBytesPageChecksum
The page checksum is a 64bit xxhash3.
Definition RPageStorage.hxx:73

ROOT::Internal::RPageStorage::SealedPageSequence_t
std::deque< RSealedPage > SealedPageSequence_t
Definition RPageStorage.hxx:130

ROOT::Internal::RPage::GrowUnchecked
void * GrowUnchecked(std::uint32_t nElements)
Increases the number elements in the page.
Definition RPage.hxx:149

ROOT::Internal::RProjectedFields::FieldMap_t
std::unordered_map< const ROOT::RFieldBase *, const ROOT::RFieldBase * > FieldMap_t
The map keys are the projected target fields, the map values are the backing source fields Note that ...
Definition RNTupleModel.hxx:66

ROOT::Internal::RProjectedFields::Add
RResult< void > Add(std::unique_ptr< ROOT::RFieldBase > field, const FieldMap_t &fieldMap)
Adds a new projected field.
Definition RNTupleModel.cxx:147

ROOT::RClusterDescriptor::RColumnRange::GetCompressionSettings
std::optional< std::uint32_t > GetCompressionSettings() const
Definition RNTupleDescriptor.hxx:334

ROOT::RClusterDescriptor
Metadata for RNTuple clusters.
Definition RNTupleDescriptor.hxx:280

ROOT::RClusterDescriptor::GetNEntries
ROOT::NTupleSize_t GetNEntries() const
Definition RNTupleDescriptor.hxx:515

ROOT::RClusterDescriptor::GetId
ROOT::DescriptorId_t GetId() const
Definition RNTupleDescriptor.hxx:513

ROOT::RClusterDescriptor::GetPageRange
const RPageRange & GetPageRange(ROOT::DescriptorId_t physicalId) const
Definition RNTupleDescriptor.hxx:517

ROOT::RClusterDescriptor::ContainsColumn
bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
Definition RNTupleDescriptor.hxx:520

ROOT::RClusterDescriptor::GetColumnRange
const RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId) const
Definition RNTupleDescriptor.hxx:516

ROOT::RClusterDescriptor::GetFirstEntryIndex
ROOT::NTupleSize_t GetFirstEntryIndex() const
Definition RNTupleDescriptor.hxx:514

ROOT::RException
Base class for all ROOT issued exceptions.
Definition RError.hxx:78

ROOT::RFieldDescriptor
Metadata stored for every field of an RNTuple.
Definition RNTupleDescriptor.hxx:120

ROOT::RFieldDescriptor::GetLogicalColumnIds
const std::vector< ROOT::DescriptorId_t > & GetLogicalColumnIds() const
Definition RNTupleDescriptor.hxx:189

ROOT::RFieldDescriptor::GetStructure
ROOT::ENTupleStructure GetStructure() const
Definition RNTupleDescriptor.hxx:185

ROOT::RFieldDescriptor::GetLinkIds
const std::vector< ROOT::DescriptorId_t > & GetLinkIds() const
Definition RNTupleDescriptor.hxx:188

ROOT::RFieldDescriptor::GetParentId
ROOT::DescriptorId_t GetParentId() const
Definition RNTupleDescriptor.hxx:186

ROOT::RFieldDescriptor::GetNRepetitions
std::uint64_t GetNRepetitions() const
Definition RNTupleDescriptor.hxx:184

ROOT::RFieldDescriptor::IsProjectedField
bool IsProjectedField() const
Definition RNTupleDescriptor.hxx:192

ROOT::RFieldDescriptor::GetProjectionSourceId
ROOT::DescriptorId_t GetProjectionSourceId() const
Definition RNTupleDescriptor.hxx:187

ROOT::RFieldDescriptor::GetFieldName
const std::string & GetFieldName() const
Definition RNTupleDescriptor.hxx:180

ROOT::RFieldDescriptor::GetTypeName
const std::string & GetTypeName() const
Definition RNTupleDescriptor.hxx:182

ROOT::RLogChannel
A log configuration for a channel, e.g.
Definition RLogger.hxx:97

ROOT::RNTupleDescriptor
The on-storage metadata of an RNTuple.
Definition RNTupleDescriptor.hxx:699

ROOT::RNTupleDescriptor::GetColumnDescriptor
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
Definition RNTupleDescriptor.hxx:844

ROOT::RNTupleDescriptor::FindNextClusterId
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
Definition RNTupleDescriptor.cxx:551

ROOT::RNTupleDescriptor::GetFieldDescriptor
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
Definition RNTupleDescriptor.hxx:840

ROOT::RNTupleDescriptor::GetName
const std::string & GetName() const
Definition RNTupleDescriptor.hxx:882

ROOT::RNTupleDescriptor::FindClusterId
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
Definition RNTupleDescriptor.cxx:499

ROOT::RNTupleDescriptor::GetVersion
std::uint64_t GetVersion() const
Definition RNTupleDescriptor.hxx:834

ROOT::RNTupleDescriptor::GetClusterDescriptor
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
Definition RNTupleDescriptor.hxx:852

ROOT::RNTupleDescriptor::FindFieldId
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
Definition RNTupleDescriptor.cxx:362

ROOT::RNTupleDescriptor::GetTopLevelFields
RFieldDescriptorIterable GetTopLevelFields() const
Definition RNTupleDescriptor.cxx:1430

ROOT::RNTupleDescriptor::GetQualifiedFieldName
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
Definition RNTupleDescriptor.cxx:381

ROOT::RNTupleLocator::kTypePageZero
@ kTypePageZero
Definition RNTupleTypes.hxx:240

ROOT::RNTupleModel
The RNTupleModel encapulates the schema of an RNTuple.
Definition RNTupleModel.hxx:137

ROOT::RNTupleModel::Unfreeze
void Unfreeze()
Transitions an RNTupleModel from the frozen state back to the building state, invalidating all previo...
Definition RNTupleModel.cxx:614

ROOT::RNTupleModel::Freeze
void Freeze()
Transitions an RNTupleModel from the building state to the frozen state, disabling adding additional ...
Definition RNTupleModel.cxx:631

ROOT::RNTupleModel::GetConstField
const ROOT::RFieldBase & GetConstField(std::string_view fieldName) const
Definition RNTupleModel.cxx:500

ROOT::RNTupleWriteOptions
Common user-tunable settings for storing RNTuples.
Definition RNTupleWriteOptions.hxx:181

ROOT::RNTupleWriteOptions::GetEnablePageChecksums
bool GetEnablePageChecksums() const
Definition RNTupleWriteOptions.hxx:251

ROOT::RNTupleWriteOptions::SetCompression
void SetCompression(std::uint32_t val)
Definition RNTupleWriteOptions.hxx:218

ROOT::RNTuple
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67

ROOT::RNTuple::Merge
Long64_t Merge(TCollection *input, TFileMergeInfo *mergeInfo)
RNTuple implements the hadd MergeFile interface Merge this NTuple with the input list entries.
Definition RNTupleMerger.cxx:121

ROOT::RNTuple::GetCurrentVersion
static constexpr std::uint64_t GetCurrentVersion()
Returns the RNTuple version in the following form: Epoch: 2 most significant bytes Major: next 2 byte...
Definition RNTuple.hxx:89

ROOT::RResult< void >::ThrowOnError
void ThrowOnError()
Short-hand method to throw an exception in the case of errors.
Definition RError.hxx:289

ROOT::RResult
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197

RPageSink::SealPage
RSealedPage SealPage(const ROOT::Internal::RPage &page, const ROOT::Internal::RColumnElementBase &element)
Helper for streaming a page.
Definition RPageStorage.cxx:757

RPageSource::UnsealPage
static RResult< ROOT::Internal::RPage > UnsealPage(const RSealedPage &sealedPage, const ROOT::Internal::RColumnElementBase &element, ROOT::Internal::RPageAllocator &pageAlloc)
Helper for unstreaming a page.
Definition RPageStorage.cxx:569

RPageStorage::kNBytesPageChecksum
static constexpr std::size_t kNBytesPageChecksum
The page checksum is a 64bit xxhash3.
Definition RPageStorage.hxx:73

TCollection
Collection abstract base class.
Definition TCollection.h:65

TCollection::GetEntries
virtual Int_t GetEntries() const
Definition TCollection.h:180

TDirectoryFile::FindKey
TKey * FindKey(const char *keyname) const override

TDirectoryFile::Get
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.

TFileMergeInfo
A class to pass information from the TFileMerger to the objects being merged.
Definition TFileMergeInfo.h:36

TFileMergeInfo::fOptions
TString fOptions
Additional text based option being passed down to customize the merge.
Definition TFileMergeInfo.h:47

TFile
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130

TFile::GetCompressionSettings
Int_t GetCompressionSettings() const
Definition TFile.h:489

TIter
Definition TCollection.h:237

TKey
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28

TKey::ReadObject
T * ReadObject()
To read an object (non deriving from TObject) from the file.
Definition TKey.h:105

TNamed::GetName
const char * GetName() const override
Returns name of object.
Definition TNamed.h:49

TObject
Mother of all ROOT objects.
Definition TObject.h:42

TObject::ClassName
virtual const char * ClassName() const
Returns name of class to which the object belongs.
Definition TObject.cxx:227

TString
Basic string class.
Definition TString.h:138

TString::Length
Ssiz_t Length() const
Definition TString.h:425

TString::kIgnoreCase
@ kIgnoreCase
Definition TString.h:285

TString::Contains
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:641

TString::Index
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:660

bool

double

x
Double_t x[n]
Definition legend1.C:17

ex
Double_t ex[n]
Definition legend1.C:17

ROOT::Experimental::Internal
Definition RDrawable.hxx:34

ROOT::Experimental::Internal::ENTupleMergingMode::kStrict
@ kStrict
The merger will refuse to merge any 2 RNTuples whose schema doesn't match exactly.
Definition RNTupleMerger.hxx:45

ROOT::Experimental::Internal::ENTupleMergingMode::kUnion
@ kUnion
The merger will update the output model to include all columns from all sources.
Definition RNTupleMerger.hxx:48

ROOT::Experimental::Internal::ENTupleMergingMode::kFilter
@ kFilter
The merger will discard all columns that aren't present in the prototype model (i....
Definition RNTupleMerger.hxx:43

ROOT::Experimental::Internal::ENTupleMergeVersionBehavior::kAbortOnHigherVersion
@ kAbortOnHigherVersion
The merger will refuse to merge RNTuples with higher versions than the latest supported by this ROOT ...
Definition RNTupleMerger.hxx:71

ROOT::Experimental::Internal::ENTupleMergeVersionBehavior::kWarnOnHigherVersion
@ kWarnOnHigherVersion
The merger will emit a warning when merging RNTuples with higher version than the latest supported by...
Definition RNTupleMerger.hxx:68

ROOT::Experimental::Internal::ENTupleMergeErrBehavior::kAbort
@ kAbort
The merger will abort merging as soon as an error is encountered.
Definition RNTupleMerger.hxx:59

ROOT::Experimental::Internal::ENTupleMergeErrBehavior::kSkip
@ kSkip
Upon errors, the merger will skip the current source and continue.
Definition RNTupleMerger.hxx:61

ROOT::Internal::MakeUninitArray
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
Definition RNTupleUtils.hxx:42

ROOT::Internal::GetProjectedFieldsOfModel
RProjectedFields & GetProjectedFieldsOfModel(RNTupleModel &model)
Definition RNTupleModel.cxx:50

ROOT::Internal::GenerateColumnElement
std::unique_ptr< RColumnElementBase > GenerateColumnElement(std::type_index inMemoryType, ROOT::ENTupleColumnType onDiskType)

ROOT
Definition EExecutionPolicy.hxx:4

ROOT::IsImplicitMTEnabled
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:669

ROOT::DescriptorId_t
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition RNTupleTypes.hxx:133

ROOT::NTupleSize_t
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition RNTupleTypes.hxx:129

ROOT::kInvalidDescriptorId
constexpr DescriptorId_t kInvalidDescriptorId
Definition RNTupleTypes.hxx:134

ROOT::ENTupleStructure::kStreamer
@ kStreamer
Definition RNTupleTypes.hxx:112

ROOT::ENTupleStructure::kVariant
@ kVariant
Definition RNTupleTypes.hxx:111

ROOT::ENTupleStructure::kCollection
@ kCollection
Definition RNTupleTypes.hxx:109

ROOT::ENTupleStructure::kPlain
@ kPlain
Definition RNTupleTypes.hxx:108

ROOT::ENTupleColumnType
ENTupleColumnType
Definition RNTupleTypes.hxx:63

ROOT::ENTupleColumnType::kInt32
@ kInt32
Definition RNTupleTypes.hxx:79

ROOT::ENTupleColumnType::kSplitReal64
@ kSplitReal64
Definition RNTupleTypes.hxx:87

ROOT::ENTupleColumnType::kInt64
@ kInt64
Definition RNTupleTypes.hxx:77

ROOT::ENTupleColumnType::kReal64
@ kReal64
Definition RNTupleTypes.hxx:74

ROOT::ENTupleColumnType::kSplitInt64
@ kSplitInt64
Definition RNTupleTypes.hxx:89

ROOT::ENTupleColumnType::kUInt32
@ kUInt32
Definition RNTupleTypes.hxx:80

ROOT::ENTupleColumnType::kInt16
@ kInt16
Definition RNTupleTypes.hxx:81

ROOT::ENTupleColumnType::kSplitIndex64
@ kSplitIndex64
Definition RNTupleTypes.hxx:85

ROOT::ENTupleColumnType::kSplitIndex32
@ kSplitIndex32
Definition RNTupleTypes.hxx:86

ROOT::ENTupleColumnType::kIndex32
@ kIndex32
Definition RNTupleTypes.hxx:67

ROOT::ENTupleColumnType::kIndex64
@ kIndex64
Definition RNTupleTypes.hxx:66

ROOT::ENTupleColumnType::kSplitInt32
@ kSplitInt32
Definition RNTupleTypes.hxx:91

ROOT::ENTupleColumnType::kUInt16
@ kUInt16
Definition RNTupleTypes.hxx:82

ROOT::ENTupleColumnType::kSplitUInt16
@ kSplitUInt16
Definition RNTupleTypes.hxx:94

ROOT::ENTupleColumnType::kUInt64
@ kUInt64
Definition RNTupleTypes.hxx:78

ROOT::ENTupleColumnType::kSplitInt16
@ kSplitInt16
Definition RNTupleTypes.hxx:93

ROOT::ENTupleColumnType::kSplitUInt64
@ kSplitUInt64
Definition RNTupleTypes.hxx:90

ROOT::ENTupleColumnType::kSwitch
@ kSwitch
Definition RNTupleTypes.hxx:70

ROOT::ENTupleColumnType::kReal32
@ kReal32
Definition RNTupleTypes.hxx:75

ROOT::ENTupleColumnType::kSplitReal32
@ kSplitReal32
Definition RNTupleTypes.hxx:88

ROOT::ENTupleColumnType::kSplitUInt32
@ kSplitUInt32
Definition RNTupleTypes.hxx:92

ROOT::Experimental::Internal::RColumnMergeInfo
Definition RNTupleMerger.cxx:389

ROOT::Experimental::Internal::RColumnMergeInfo::fOutputId
ROOT::DescriptorId_t fOutputId
Definition RNTupleMerger.cxx:397

ROOT::Experimental::Internal::RColumnMergeInfo::fColumnName
std::string fColumnName
Definition RNTupleMerger.cxx:393

ROOT::Experimental::Internal::RColumnMergeInfo::fParentFieldDescriptor
const ROOT::RFieldDescriptor * fParentFieldDescriptor
Definition RNTupleMerger.cxx:401

ROOT::Experimental::Internal::RColumnMergeInfo::fInputId
ROOT::DescriptorId_t fInputId
Definition RNTupleMerger.cxx:395

ROOT::Experimental::Internal::RColumnMergeInfo::fInMemoryType
std::optional< std::type_index > fInMemoryType
Definition RNTupleMerger.cxx:400

ROOT::Experimental::Internal::RColumnMergeInfo::fParentNTupleDescriptor
const ROOT::RNTupleDescriptor * fParentNTupleDescriptor
Definition RNTupleMerger.cxx:402

ROOT::Experimental::Internal::RColumnMergeInfo::fColumnType
ENTupleColumnType fColumnType
Definition RNTupleMerger.cxx:398

ROOT::Experimental::Internal::RNTupleMergeData
Definition RNTupleMerger.cxx:406

ROOT::Experimental::Internal::RNTupleMergeData::fSrcDescriptor
const ROOT::RNTupleDescriptor * fSrcDescriptor
Definition RNTupleMerger.cxx:411

ROOT::Experimental::Internal::RNTupleMergeData::fColumnIdMap
ColumnIdMap_t fColumnIdMap
Definition RNTupleMerger.cxx:414

ROOT::Experimental::Internal::RNTupleMergeData::RNTupleMergeData
RNTupleMergeData(std::span< RPageSource * > sources, RPageSink &destination, const RNTupleMergeOptions &mergeOpts)
Definition RNTupleMerger.cxx:418

ROOT::Experimental::Internal::RNTupleMergeData::fSources
std::span< RPageSource * > fSources
Definition RNTupleMerger.cxx:407

ROOT::Experimental::Internal::RNTupleMergeData::fDestination
RPageSink & fDestination
Definition RNTupleMerger.cxx:408

ROOT::Experimental::Internal::RNTupleMergeData::fNumDstEntries
ROOT::NTupleSize_t fNumDstEntries
Definition RNTupleMerger.cxx:416

ROOT::Experimental::Internal::RNTupleMergeData::fMergeOpts
const RNTupleMergeOptions & fMergeOpts
Definition RNTupleMerger.cxx:409

ROOT::Experimental::Internal::RNTupleMergeData::fColumns
std::vector< RColumnMergeInfo > fColumns
Definition RNTupleMerger.cxx:413

ROOT::Experimental::Internal::RNTupleMergeData::fDstDescriptor
const ROOT::RNTupleDescriptor & fDstDescriptor
Definition RNTupleMerger.cxx:410

ROOT::Experimental::Internal::RNTupleMergeOptions
Set of merging options to pass to RNTupleMerger.
Definition RNTupleMerger.hxx:88

ROOT::Experimental::Internal::RNTupleMergeOptions::fMergingMode
ENTupleMergingMode fMergingMode
Determines how the merging treats sources with different models (.
Definition RNTupleMerger.hxx:94

ROOT::Experimental::Internal::RNTupleMergeOptions::fVersionBehavior
ENTupleMergeVersionBehavior fVersionBehavior
Determines how the Merge function behaves depending on the RNTuple sources' version.
Definition RNTupleMerger.hxx:98

ROOT::Experimental::Internal::RNTupleMergeOptions::fErrBehavior
ENTupleMergeErrBehavior fErrBehavior
Determines how the Merge function behaves upon merging errors.
Definition RNTupleMerger.hxx:96

ROOT::Experimental::Internal::RNTupleMergeOptions::fCompressionSettings
std::optional< std::uint32_t > fCompressionSettings
If fCompressionSettings is empty (the default), the merger will not change the compression of any of ...
Definition RNTupleMerger.hxx:92

ROOT::Experimental::Internal::RNTupleMergeOptions::fExtraVerbose
bool fExtraVerbose
If true, the merger will emit further diagnostics and information.
Definition RNTupleMerger.hxx:100

ROOT::Experimental::Internal::RSealedPageMergeData
Definition RNTupleMerger.cxx:424

ROOT::Experimental::Internal::RSealedPageMergeData::fGroups
std::vector< RPageStorage::RSealedPageGroup > fGroups
Definition RNTupleMerger.cxx:428

ROOT::Experimental::Internal::RSealedPageMergeData::fBuffers
std::vector< std::unique_ptr< std::byte[]> > fBuffers
Definition RNTupleMerger.cxx:429

ROOT::Experimental::Internal::RSealedPageMergeData::fPagesV
std::deque< RPageStorage::SealedPageSequence_t > fPagesV
Definition RNTupleMerger.cxx:427

ROOT::Internal::RNTupleModelChangeset
The incremental changes to a RNTupleModel.
Definition RNTupleModel.hxx:403

ROOT::Internal::RNTupleModelChangeset::AddField
void AddField(std::unique_ptr< ROOT::RFieldBase > field, std::string_view parentName="")
Definition RNTupleModel.cxx:256

ROOT::Internal::RNTupleModelChangeset::fAddedProjectedFields
std::vector< ROOT::RFieldBase * > fAddedProjectedFields
Points to the projected fields in fModel that were added as part of an updater transaction.
Definition RNTupleModel.hxx:408

ROOT::Internal::RNTupleModelChangeset::fAddedFields
std::vector< ROOT::RFieldBase * > fAddedFields
Points to the fields in fModel that were added as part of an updater transaction.
Definition RNTupleModel.hxx:406

ROOT::Internal::ROnDiskPage::Key
On-disk pages within a page source are identified by the column and page number.
Definition RCluster.hxx:50

ROOT::Internal::RPageStorage::RSealedPage
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Definition RPageStorage.hxx:89

ROOT::Internal::RPageStorage::RSealedPage::VerifyChecksumIfEnabled
RResult< void > VerifyChecksumIfEnabled() const
Definition RPageStorage.cxx:84

ROOT::Internal::RPageStorage::RSealedPage::SetBuffer
void SetBuffer(const void *buffer)
Definition RPageStorage.hxx:108

ROOT::Internal::RPageStorage::RSealedPage::GetNElements
std::uint32_t GetNElements() const
Definition RPageStorage.hxx:118

ROOT::Internal::RPageStorage::RSealedPage::SetHasChecksum
void SetHasChecksum(bool hasChecksum)
Definition RPageStorage.hxx:122

ROOT::Internal::RPageStorage::RSealedPage::SetNElements
void SetNElements(std::uint32_t nElements)
Definition RPageStorage.hxx:119

ROOT::Internal::RPageStorage::RSealedPage::GetBufferSize
std::size_t GetBufferSize() const
Definition RPageStorage.hxx:115

ROOT::Internal::RPageStorage::RSealedPage::SetBufferSize
void SetBufferSize(std::size_t bufferSize)
Definition RPageStorage.hxx:116

ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
Definition Compression.h:58

RPageSink::RSealPageConfig
Parameters for the SealPage() method.
Definition RPageStorage.hxx:333

RPageSink::RSealPageConfig::fWriteChecksum
bool fWriteChecksum
Adds a 8 byte little-endian xxhash3 checksum to the page payload.
Definition RPageStorage.hxx:340

RPageSink::RSealPageConfig::fCompressionSettings
std::uint32_t fCompressionSettings
Compression algorithm and level to apply.
Definition RPageStorage.hxx:337

RPageSink::RSealPageConfig::fBuffer
void * fBuffer
Location for sealed output. The memory buffer has to be large enough.
Definition RPageStorage.hxx:345

RPageSink::RSealPageConfig::fPage
const ROOT::Internal::RPage * fPage
Input page to be sealed.
Definition RPageStorage.hxx:334

RPageSink::RSealPageConfig::fElement
const ROOT::Internal::RColumnElementBase * fElement
Corresponds to the page's elements, for size calculation etc.
Definition RPageStorage.hxx:335

RPageStorage::RSealedPage
A sealed page contains the bytes of a page as written to storage (packed & compressed).
Definition RPageStorage.hxx:89

RPageStorage::RSealedPage::VerifyChecksumIfEnabled
RResult< void > VerifyChecksumIfEnabled() const
Definition RPageStorage.cxx:84

RPageStorage::RSealedPage::GetNElements
std::uint32_t GetNElements() const
Definition RPageStorage.hxx:118

RPageStorage::RSealedPage::GetBuffer
const void * GetBuffer() const
Definition RPageStorage.hxx:107

RPageStorage::RSealedPage::ChecksumIfEnabled
void ChecksumIfEnabled()
Definition RPageStorage.cxx:73

RPageStorage::RSealedPage::GetHasChecksum
bool GetHasChecksum() const
Definition RPageStorage.hxx:121

RPageStorage::RSealedPage::GetDataSize
std::size_t GetDataSize() const
Definition RPageStorage.hxx:110