doc/master/RNTupleDS_8cxx_source.html

/// \file RNTupleDS.cxx

/// \ingroup NTuple ROOT7

/// \author Jakob Blomer <jblomer@cern.ch>

/// \author Enrico Guiraud <enrico.guiraud@cern.ch>

/// \date 2018-10-04

/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback

/// is welcome!


/*************************************************************************

 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers.               *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


#include <ROOT/RDF/RColumnReaderBase.hxx>

#include <ROOT/RDataFrame.hxx>

#include <ROOT/RField.hxx>

#include <ROOT/RFieldUtils.hxx>

#include <ROOT/RPageStorageFile.hxx>

#include <ROOT/RNTupleDescriptor.hxx>

#include <ROOT/RNTupleDS.hxx>

#include <ROOT/RNTupleUtil.hxx>

#include <ROOT/RPageStorage.hxx>

#include <string_view>


#include <TError.h>

#include <TSystem.h>


#include <cassert>

#include <memory>

#include <mutex>

#include <string>

#include <vector>

#include <typeinfo>

#include <utility>


// clang-format off

/**

* \class ROOT::Experimental::RNTupleDS

* \ingroup dataframe

* \brief The RDataSource implementation for RNTuple. It lets RDataFrame read RNTuple data.

*

* An RDataFrame that reads RNTuple data can be constructed using FromRNTuple().

*

* For each column containing an array or a collection, a corresponding column `#colname` is available to access

* `colname.size()` without reading and deserializing the collection values.

*

**/

// clang-format on


namespace ROOT {

namespace Experimental {

namespace Internal {


/// An artificial field that transforms an RNTuple column that contains the offset of collections into

/// collection sizes. It is used to provide the "number of" RDF columns for collections, e.g.

/// `R_rdf_sizeof_jets` for a collection named `jets`.

///

/// This field owns the collection offset field but instead of exposing the collection offsets it exposes

/// the collection sizes (offset(N+1) - offset(N)).  For the time being, we offer this functionality only in RDataFrame.

/// TODO(jblomer): consider providing a general set of useful virtual fields as part of RNTuple.


class RRDFCardinalityField final : public ROOT::RFieldBase {

protected:


   std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view /* newName */) const final

   {

      return std::make_unique<RRDFCardinalityField>();

   }


   void ConstructValue(void *where) const final { *static_cast<std::size_t *>(where) = 0; }


public:

   RRDFCardinalityField() : ROOT::RFieldBase("", "std::size_t", ROOT::ENTupleStructure::kLeaf, false /* isSimple */) {}

   RRDFCardinalityField(RRDFCardinalityField &&other) = default;

   RRDFCardinalityField &operator=(RRDFCardinalityField &&other) = default;

   ~RRDFCardinalityField() = default;


   const RColumnRepresentations &GetColumnRepresentations() const final

   {

      static RColumnRepresentations representations({{ENTupleColumnType::kSplitIndex64},

                                                     {ENTupleColumnType::kIndex64},

                                                     {ENTupleColumnType::kSplitIndex32},

                                                     {ENTupleColumnType::kIndex32}},

                                                    {});

      return representations;

   }


   // Field is only used for reading

   void GenerateColumns() final { assert(false && "Cardinality fields must only be used for reading"); }


   void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final

   {

      GenerateColumnsImpl<ROOT::Internal::RColumnIndex>(desc);

   }


   size_t GetValueSize() const final { return sizeof(std::size_t); }

   size_t GetAlignment() const final { return alignof(std::size_t); }


   /// Get the number of elements of the collection identified by globalIndex


   void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final

   {

      RNTupleLocalIndex collectionStart;

      ROOT::NTupleSize_t size;

      fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &size);

      *static_cast<std::size_t *>(to) = size;

   }


   /// Get the number of elements of the collection identified by clusterIndex


   void ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) final

   {

      RNTupleLocalIndex collectionStart;

      ROOT::NTupleSize_t size;

      fPrincipalColumn->GetCollectionInfo(localIndex, &collectionStart, &size);

      *static_cast<std::size_t *>(to) = size;

   }


};


/**

 * @brief An artificial field that provides the size of a fixed-size array

 *

 * This is the implementation of `R_rdf_sizeof_column` in case `column` contains

 * fixed-size arrays on disk.

 */


class RArraySizeField final : public ROOT::RFieldBase {

private:

   std::size_t fArrayLength;


   std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view) const final

   {

      return std::make_unique<RArraySizeField>(fArrayLength);

   }


   void GenerateColumns() final { assert(false && "RArraySizeField fields must only be used for reading"); }

   void GenerateColumns(const ROOT::RNTupleDescriptor &) final {}


   void ReadGlobalImpl(ROOT::NTupleSize_t /*globalIndex*/, void *to) final

   {

      *static_cast<std::size_t *>(to) = fArrayLength;

   }


   void ReadInClusterImpl(RNTupleLocalIndex /*localIndex*/, void *to) final

   {

      *static_cast<std::size_t *>(to) = fArrayLength;

   }


public:


   RArraySizeField(std::size_t arrayLength)

      : ROOT::RFieldBase("", "std::size_t", ROOT::ENTupleStructure::kLeaf, false /* isSimple */),

        fArrayLength(arrayLength)

   {

   }


   RArraySizeField(const RArraySizeField &other) = delete;

   RArraySizeField &operator=(const RArraySizeField &other) = delete;

   RArraySizeField(RArraySizeField &&other) = default;

   RArraySizeField &operator=(RArraySizeField &&other) = default;

   ~RArraySizeField() final = default;


   void ConstructValue(void *where) const final { *static_cast<std::size_t *>(where) = 0; }

   std::size_t GetValueSize() const final { return sizeof(std::size_t); }

   std::size_t GetAlignment() const final { return alignof(std::size_t); }

};


/// Every RDF column is represented by exactly one RNTuple field


class RNTupleColumnReader : public ROOT::Detail::RDF::RColumnReaderBase {

   using RFieldBase = ROOT::RFieldBase;

   using RPageSource = ROOT::Experimental::Internal::RPageSource;


   RNTupleDS *fDataSource;                     ///< The data source that owns this column reader

   RFieldBase *fProtoField;                    ///< The prototype field from which fField is cloned

   std::unique_ptr<RFieldBase> fField;         ///< The field backing the RDF column

   std::unique_ptr<RFieldBase::RValue> fValue; ///< The memory location used to read from fField

   std::shared_ptr<void> fValuePtr;            ///< Used to reuse the object created by fValue when reconnecting sources

   Long64_t fLastEntry = -1;                   ///< Last entry number that was read

   /// For chains, the logical entry and the physical entry in any particular file can be different.

   /// The entry offset stores the logical entry number (sum of all previous physical entries) when a file of the corresponding

   /// data source was opened.

   Long64_t fEntryOffset = 0;


public:

   RNTupleColumnReader(RNTupleDS *ds, RFieldBase *protoField) : fDataSource(ds), fProtoField(protoField) {}

   ~RNTupleColumnReader() = default;


   /// Connect the field and its subfields to the page source


   void Connect(RPageSource &source, Long64_t entryOffset)

   {

      assert(fLastEntry == -1);

      fEntryOffset = entryOffset;


      // Create a new, real field from the prototype and set its field ID in the context of the given page source

      fField = fProtoField->Clone(fProtoField->GetFieldName());

      {

         auto descGuard = source.GetSharedDescriptorGuard();

         // Set the on-disk field IDs for the field and the subfield

         fField->SetOnDiskId(

            descGuard->FindFieldId(fDataSource->fFieldId2QualifiedName.at(fProtoField->GetOnDiskId())));

         auto iProto = fProtoField->cbegin();

         auto iReal = fField->begin();

         for (; iReal != fField->end(); ++iProto, ++iReal) {

            iReal->SetOnDiskId(descGuard->FindFieldId(fDataSource->fFieldId2QualifiedName.at(iProto->GetOnDiskId())));

         }

      }


      try {

         ROOT::Internal::CallConnectPageSourceOnField(*fField, source);

      } catch (const ROOT::RException &err) {

         auto onDiskType = source.GetSharedDescriptorGuard()->GetFieldDescriptor(fField->GetOnDiskId()).GetTypeName();

         std::string msg = "RNTupleDS: invalid type \"" + fField->GetTypeName() + "\" for column \"" +

                           fDataSource->fFieldId2QualifiedName[fField->GetOnDiskId()] + "\" with on-disk type \"" +

                           onDiskType + "\"";

         throw std::runtime_error(msg);

      }


      if (fValuePtr) {

         // When the reader reconnects to a new file, the fValuePtr is already set

         fValue = std::make_unique<RFieldBase::RValue>(fField->BindValue(fValuePtr));

         fValuePtr = nullptr;

      } else {

         // For the first file, create a new object for this field (reader)

         fValue = std::make_unique<RFieldBase::RValue>(fField->CreateValue());

      }

   }


   void Disconnect(bool keepValue)

   {

      if (fValue && keepValue) {

         fValuePtr = fValue->GetPtr<void>();

      }

      fValue = nullptr;

      fField = nullptr;

      fLastEntry = -1;

   }


   void *GetImpl(Long64_t entry) final

   {

      if (entry != fLastEntry) {

         fValue->Read(entry - fEntryOffset);

         fLastEntry = entry;

      }

      return fValue->GetPtr<void>().get();

   }


};


} // namespace Internal


RNTupleDS::~RNTupleDS() = default;


void RNTupleDS::AddField(const ROOT::RNTupleDescriptor &desc, std::string_view colName, ROOT::DescriptorId_t fieldId,

                         std::vector<RNTupleDS::RFieldInfo> fieldInfos, bool convertToRVec)

{

   // As an example for the mapping of RNTuple fields to RDF columns, let's consider an RNTuple

   // using the following types and with a top-level field named "event" of type Event:

   //

   // struct Event {

   //    int id;

   //    std::vector<Track> tracks;

   // };

   // struct Track {

   //    std::vector<Hit> hits;

   // };

   // struct Hit {

   //    float x;

   //    float y;

   // };

   //

   // AddField() will be called from the constructor with the RNTuple root field (ENTupleStructure::kRecord).

   // From there, we recurse into the "event" sub field (also ENTupleStructure::kRecord) and further down the

   // tree of sub fields and expose the following RDF columns:

   //

   // "event"                             [Event]

   // "event.id"                          [int]

   // "event.tracks"                      [RVec<Track>]

   // "R_rdf_sizeof_event.tracks"         [unsigned int]

   // "event.tracks.hits"                 [RVec<RVec<Hit>>]

   // "R_rdf_sizeof_event.tracks.hits"    [RVec<unsigned int>]

   // "event.tracks.hits.x"               [RVec<RVec<float>>]

   // "R_rdf_sizeof_event.tracks.hits.x"  [RVec<unsigned int>]

   // "event.tracks.hits.y"               [RVec<RVec<float>>]

   // "R_rdf_sizeof_event.tracks.hits.y"  [RVec<unsigned int>]


   const auto &fieldDesc = desc.GetFieldDescriptor(fieldId);

   const auto &nRepetitions = fieldDesc.GetNRepetitions();

   if ((fieldDesc.GetStructure() == ROOT::ENTupleStructure::kCollection) || (nRepetitions > 0)) {

      // The field is a collection or a fixed-size array.

      // We open a new collection scope with fieldID being the inner most collection. E.g. for "event.tracks.hits",

      // fieldInfos would already contain the fieldID of "event.tracks"

      fieldInfos.emplace_back(fieldId, nRepetitions);

   }


   if (fieldDesc.GetStructure() == ROOT::ENTupleStructure::kCollection) {

      // Inner fields of collections are provided as projected collections of only that inner field,

      // E.g. we provide a projected collection RVec<RVec<float>> for "event.tracks.hits.x" in the example

      // above.

      bool representableAsRVec =

         convertToRVec && (fieldDesc.GetTypeName().substr(0, 19) == "ROOT::VecOps::RVec<" ||

                           fieldDesc.GetTypeName().substr(0, 12) == "std::vector<" || fieldDesc.GetTypeName() == "");

      const auto &f = *desc.GetFieldIterable(fieldDesc.GetId()).begin();

      AddField(desc, colName, f.GetId(), fieldInfos, representableAsRVec);


      // Note that at the end of the recursion, we handled the inner sub collections as well as the

      // collection as whole, so we are done.

      return;


   } else if (nRepetitions > 0) {

      // Fixed-size array, same logic as ROOT::RVec.

      const auto &f = *desc.GetFieldIterable(fieldDesc.GetId()).begin();

      AddField(desc, colName, f.GetId(), fieldInfos);

      return;

   } else if (fieldDesc.GetStructure() == ROOT::ENTupleStructure::kRecord) {

      // Inner fields of records are provided as individual RDF columns, e.g. "event.id"

      for (const auto &f : desc.GetFieldIterable(fieldDesc.GetId())) {

         auto innerName = colName.empty() ? f.GetFieldName() : (std::string(colName) + "." + f.GetFieldName());

         // Inner fields of collections of records are always exposed as ROOT::RVec

         AddField(desc, innerName, f.GetId(), fieldInfos);

      }

   }


   // The fieldID could be the root field or the class of fieldId might not be loaded.

   // In these cases, only the inner fields are exposed as RDF columns.

   auto fieldOrException = ROOT::RFieldBase::Create(fieldDesc.GetFieldName(), fieldDesc.GetTypeName());

   if (!fieldOrException)

      return;

   auto valueField = fieldOrException.Unwrap();

   valueField->SetOnDiskId(fieldId);

   for (auto &f : *valueField) {

      f.SetOnDiskId(desc.FindFieldId(f.GetFieldName(), f.GetParent()->GetOnDiskId()));

   }

   std::unique_ptr<ROOT::RFieldBase> cardinalityField;

   // Collections get the additional "number of" RDF column (e.g. "R_rdf_sizeof_tracks")

   if (!fieldInfos.empty()) {

      const auto &info = fieldInfos.back();

      if (info.fNRepetitions > 0) {

         cardinalityField = std::make_unique<ROOT::Experimental::Internal::RArraySizeField>(info.fNRepetitions);

      } else {

         cardinalityField = std::make_unique<ROOT::Experimental::Internal::RRDFCardinalityField>();

      }

      cardinalityField->SetOnDiskId(info.fFieldId);

   }


   for (auto i = fieldInfos.rbegin(); i != fieldInfos.rend(); ++i) {

      const auto &fieldInfo = *i;


      if (fieldInfo.fNRepetitions > 0) {

         // Fixed-size array, read it as ROOT::RVec in memory

         valueField = std::make_unique<ROOT::RArrayAsRVecField>("", std::move(valueField), fieldInfo.fNRepetitions);

      } else {

         // Actual collection. A std::vector or ROOT::RVec gets added as a ROOT::RVec. All other collection types keep

         // their original type.

         if (convertToRVec) {

            valueField = std::make_unique<ROOT::RRVecField>("", std::move(valueField));

         } else {

            auto outerFieldType = desc.GetFieldDescriptor(fieldInfo.fFieldId).GetTypeName();

            valueField = ROOT::RFieldBase::Create("", outerFieldType).Unwrap();

         }

      }


      valueField->SetOnDiskId(fieldInfo.fFieldId);


      // Skip the inner-most collection level to construct the cardinality column

      // It's taken care of by the `if (!fieldInfos.empty())` scope above

      if (i != fieldInfos.rbegin()) {

         if (fieldInfo.fNRepetitions > 0) {

            // This collection level refers to a fixed-size array

            cardinalityField =

               std::make_unique<ROOT::RArrayAsRVecField>("", std::move(cardinalityField), fieldInfo.fNRepetitions);

         } else {

            // This collection level refers to an RVec

            cardinalityField = std::make_unique<ROOT::RRVecField>("", std::move(cardinalityField));

         }


         cardinalityField->SetOnDiskId(fieldInfo.fFieldId);

      }

   }


   if (cardinalityField) {

      fColumnNames.emplace_back("R_rdf_sizeof_" + std::string(colName));

      fColumnTypes.emplace_back(cardinalityField->GetTypeName());

      fProtoFields.emplace_back(std::move(cardinalityField));

   }


   fieldInfos.emplace_back(fieldId, nRepetitions);

   fColumnNames.emplace_back(colName);

   fColumnTypes.emplace_back(valueField->GetTypeName());

   fProtoFields.emplace_back(std::move(valueField));

}


RNTupleDS::RNTupleDS(std::unique_ptr<Internal::RPageSource> pageSource)

{

   pageSource->Attach();

   fPrincipalDescriptor = pageSource->GetSharedDescriptorGuard()->Clone();

   fStagingArea.emplace_back(std::move(pageSource));


   AddField(fPrincipalDescriptor, "", fPrincipalDescriptor.GetFieldZeroId(),

            std::vector<ROOT::Experimental::RNTupleDS::RFieldInfo>());

}


namespace {


const ROOT::RNTupleReadOptions &GetOpts()

{

   // The setting is for now a global one, must be decided before running the

   // program by setting the appropriate environment variable. Make sure that

   // option configuration is thread-safe and happens only once.

   static ROOT::RNTupleReadOptions opts;

   static std::once_flag flag;

   std::call_once(flag, []() {

      if (auto env = gSystem->Getenv("ROOT_RNTUPLE_CLUSTERBUNCHSIZE"); env != nullptr && strlen(env) > 0) {

         std::string envStr{env};

         auto envNum{std::stoul(envStr)};

         envNum = envNum == 0 ? 1 : envNum;

         ROOT::Internal::RNTupleReadOptionsManip::SetClusterBunchSize(opts, envNum);

      }

   });

   return opts;

}


std::unique_ptr<ROOT::Experimental::Internal::RPageSource>

CreatePageSource(std::string_view ntupleName, std::string_view fileName)

{

   return ROOT::Experimental::Internal::RPageSource::Create(ntupleName, fileName, GetOpts());

}

} // namespace


RNTupleDS::RNTupleDS(std::string_view ntupleName, std::string_view fileName)

   : RNTupleDS(CreatePageSource(ntupleName, fileName))

{

}


RNTupleDS::RNTupleDS(RNTuple *ntuple)

   : RNTupleDS(ROOT::Experimental::Internal::RPageSourceFile::CreateFromAnchor(*ntuple))

{

}


RNTupleDS::RNTupleDS(std::string_view ntupleName, const std::vector<std::string> &fileNames)

   : RNTupleDS(CreatePageSource(ntupleName, fileNames[0]))

{

   fNTupleName = ntupleName;

   fFileNames = fileNames;

   fStagingArea.resize(fFileNames.size());

}


RDF::RDataSource::Record_t RNTupleDS::GetColumnReadersImpl(std::string_view /* name */, const std::type_info & /* ti */)

{

   // This datasource uses the newer GetColumnReaders() API

   return {};

}


std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>


RNTupleDS::GetColumnReaders(unsigned int slot, std::string_view name, const std::type_info &tid)

{

   // At this point we can assume that `name` will be found in fColumnNames

   const auto index = std::distance(fColumnNames.begin(), std::find(fColumnNames.begin(), fColumnNames.end(), name));

   const auto requestedType = ROOT::Internal::GetRenormalizedTypeName(ROOT::Internal::RDF::TypeID2TypeName(tid));


   ROOT::RFieldBase *field;

   // If the field corresponding to the provided name is not a cardinality column and the requested type is different

   // from the proto field that was created when the data source was constructed, we first have to create an

   // alternative proto field for the column reader. Otherwise, we can directly use the existing proto field.

   if (name.substr(0, 13) != "R_rdf_sizeof_" && requestedType != fColumnTypes[index]) {

      auto &altProtoFields = fAlternativeProtoFields[index];

      auto altProtoField = std::find_if(altProtoFields.begin(), altProtoFields.end(),

                                        [&requestedType](const std::unique_ptr<ROOT::RFieldBase> &fld) {

                                           return fld->GetTypeName() == requestedType;

                                        });

      if (altProtoField != altProtoFields.end()) {

         field = altProtoField->get();

      } else {

         auto newAltProtoFieldOrException = ROOT::RFieldBase::Create(std::string(name), requestedType);

         if (!newAltProtoFieldOrException) {

            throw std::runtime_error("RNTupleDS: Could not create field with type \"" + requestedType +

                                     "\" for column \"" + std::string(name));

         }

         auto newAltProtoField = newAltProtoFieldOrException.Unwrap();

         newAltProtoField->SetOnDiskId(fProtoFields[index]->GetOnDiskId());

         field = newAltProtoField.get();

         altProtoFields.emplace_back(std::move(newAltProtoField));

      }

   } else {

      field = fProtoFields[index].get();

   }


   // Map the field's and subfields' IDs to qualified names so that we can later connect the fields to

   // other page sources from the chain

   fFieldId2QualifiedName[field->GetOnDiskId()] = fPrincipalDescriptor.GetQualifiedFieldName(field->GetOnDiskId());

   for (const auto &s : *field) {

      fFieldId2QualifiedName[s.GetOnDiskId()] = fPrincipalDescriptor.GetQualifiedFieldName(s.GetOnDiskId());

   }


   auto reader = std::make_unique<Internal::RNTupleColumnReader>(this, field);

   fActiveColumnReaders[slot].emplace_back(reader.get());


   return reader;

}


void RNTupleDS::ExecStaging()

{

   while (true) {

      std::unique_lock lock(fMutexStaging);

      fCvStaging.wait(lock, [this] { return fIsReadyForStaging || fStagingThreadShouldTerminate; });

      if (fStagingThreadShouldTerminate)

         return;


      assert(!fHasNextSources);

      StageNextSources();

      fHasNextSources = true;

      fIsReadyForStaging = false;


      lock.unlock();

      fCvStaging.notify_one();

   }

}


void RNTupleDS::StageNextSources()

{

   const auto nFiles = fFileNames.empty() ? 1 : fFileNames.size();

   for (auto i = fNextFileIndex; (i < nFiles) && ((i - fNextFileIndex) < fNSlots); ++i) {

      if (fStagingThreadShouldTerminate)

         return;


      if (fStagingArea[i]) {

         // The first file is already open and was used to read the schema

         assert(i == 0);

      } else {

         fStagingArea[i] = CreatePageSource(fNTupleName, fFileNames[i]);

         fStagingArea[i]->LoadStructure();

      }

   }

}


void RNTupleDS::PrepareNextRanges()

{

   assert(fNextRanges.empty());

   auto nFiles = fFileNames.empty() ? 1 : fFileNames.size();

   auto nRemainingFiles = nFiles - fNextFileIndex;

   if (nRemainingFiles == 0)

      return;


   // Easy work scheduling: one file per slot. We skip empty files (files without entries).

   if (nRemainingFiles >= fNSlots) {

      while ((fNextRanges.size() < fNSlots) && (fNextFileIndex < nFiles)) {

         REntryRangeDS range;


         std::swap(fStagingArea[fNextFileIndex], range.fSource);


         if (!range.fSource) {

            // Typically, the prestaged source should have been present. Only if some of the files are empty, we need

            // to open and attach files here.

            range.fSource = CreatePageSource(fNTupleName, fFileNames[fNextFileIndex]);

         }

         range.fSource->Attach();

         fNextFileIndex++;


         auto nEntries = range.fSource->GetNEntries();

         if (nEntries == 0)

            continue;


         range.fLastEntry = nEntries; // whole file per slot, i.e. entry range [0..nEntries - 1]

         fNextRanges.emplace_back(std::move(range));

      }

      return;

   }


   // Work scheduling of the tail: multiple slots work on the same file.

   // Every slot still has its own page source but these page sources may open the same file.

   // Again, we need to skip empty files.

   unsigned int nSlotsPerFile = fNSlots / nRemainingFiles;

   for (std::size_t i = 0; (fNextRanges.size() < fNSlots) && (fNextFileIndex < nFiles); ++i) {

      std::unique_ptr<Internal::RPageSource> source;

      std::swap(fStagingArea[fNextFileIndex], source);

      if (!source) {

         // Empty files trigger this condition

         source = CreatePageSource(fNTupleName, fFileNames[fNextFileIndex]);

      }

      source->Attach();

      fNextFileIndex++;


      auto nEntries = source->GetNEntries();

      if (nEntries == 0)

         continue;


      // If last file: use all remaining slots

      if (i == (nRemainingFiles - 1))

         nSlotsPerFile = fNSlots - fNextRanges.size();


      std::vector<std::pair<ULong64_t, ULong64_t>> rangesByCluster;

      {

         auto descriptorGuard = source->GetSharedDescriptorGuard();

         auto clusterId = descriptorGuard->FindClusterId(0, 0);

         while (clusterId != kInvalidDescriptorId) {

            const auto &clusterDesc = descriptorGuard->GetClusterDescriptor(clusterId);

            rangesByCluster.emplace_back(std::make_pair<ULong64_t, ULong64_t>(

               clusterDesc.GetFirstEntryIndex(), clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries()));

            clusterId = descriptorGuard->FindNextClusterId(clusterId);

         }

      }

      const unsigned int nRangesByCluster = rangesByCluster.size();


      // Distribute slots equidistantly over the entry range, aligned on cluster boundaries

      const auto nClustersPerSlot = nRangesByCluster / nSlotsPerFile;

      const auto remainder = nRangesByCluster % nSlotsPerFile;

      std::size_t iRange = 0;

      unsigned int iSlot = 0;

      const unsigned int N = std::min(nSlotsPerFile, nRangesByCluster);

      for (; iSlot < N; ++iSlot) {

         auto start = rangesByCluster[iRange].first;

         iRange += nClustersPerSlot + static_cast<int>(iSlot < remainder);

         assert(iRange > 0);

         auto end = rangesByCluster[iRange - 1].second;


         REntryRangeDS range;

         // The last range for this file just takes the already opened page source. All previous ranges clone.

         if (iSlot == N - 1) {

            range.fSource = std::move(source);

         } else {

            range.fSource = source->Clone();

         }

         range.fSource->SetEntryRange({start, end - start});

         range.fFirstEntry = start;

         range.fLastEntry = end;

         fNextRanges.emplace_back(std::move(range));

      }

   } // loop over tail of remaining files

}


std::vector<std::pair<ULong64_t, ULong64_t>> RNTupleDS::GetEntryRanges()

{

   std::vector<std::pair<ULong64_t, ULong64_t>> ranges;


   // We need to distinguish between single threaded and multi-threaded runs.

   // In single threaded mode, InitSlot is only called once and column readers have to be rewired

   // to new page sources of the chain in GetEntryRanges. In multi-threaded mode, on the other hand,

   // InitSlot is called for every returned range, thus rewiring the column readers takes place in

   // InitSlot and FinalizeSlot.


   if (fNSlots == 1) {

      for (auto r : fActiveColumnReaders[0]) {

         r->Disconnect(true /* keepValue */);

      }

   }


   // If we have fewer files than slots and we run multiple event loops, we can reuse fCurrentRanges and don't need

   // to worry about loading the fNextRanges. I.e., in this case we don't enter the if block.

   if (fCurrentRanges.empty() || (fSeenEntries > 0)) {

      // Otherwise, i.e. start of the first event loop or in the middle of the event loop, prepare the next ranges

      // and swap with the current ones.

      {

         std::unique_lock lock(fMutexStaging);

         fCvStaging.wait(lock, [this] { return fHasNextSources; });

      }

      PrepareNextRanges();

      if (fNextRanges.empty()) {

         // No more data

         return ranges;

      }


      assert(fNextRanges.size() <= fNSlots);


      fCurrentRanges.clear();

      std::swap(fCurrentRanges, fNextRanges);

   }


   // Stage next batch of files for the next call to GetEntryRanges()

   {

      std::lock_guard _(fMutexStaging);

      fIsReadyForStaging = true;

      fHasNextSources = false;

   }

   fCvStaging.notify_one();


   // Create ranges for the RDF loop manager from the list of REntryRangeDS records.

   // The entry ranges that are relative to the page source in REntryRangeDS are translated into absolute

   // entry ranges, given the current state of the entry cursor.

   // We remember the connection from first absolute entry index of a range to its REntryRangeDS record

   // so that we can properly rewire the column reader in InitSlot

   fFirstEntry2RangeIdx.clear();

   ULong64_t nEntriesPerSource = 0;

   for (std::size_t i = 0; i < fCurrentRanges.size(); ++i) {

      // Several consecutive ranges may operate on the same file (each with their own page source clone).

      // We can detect a change of file when the first entry number jumps back to 0.

      if (fCurrentRanges[i].fFirstEntry == 0) {

         // New source

         fSeenEntries += nEntriesPerSource;

         nEntriesPerSource = 0;

      }

      auto start = fCurrentRanges[i].fFirstEntry + fSeenEntries;

      auto end = fCurrentRanges[i].fLastEntry + fSeenEntries;

      nEntriesPerSource += end - start;


      fFirstEntry2RangeIdx[start] = i;

      ranges.emplace_back(start, end);

   }

   fSeenEntries += nEntriesPerSource;


   if ((fNSlots == 1) && (fCurrentRanges[0].fSource)) {

      for (auto r : fActiveColumnReaders[0]) {

         r->Connect(*fCurrentRanges[0].fSource, ranges[0].first);

      }

   }


   return ranges;

}


void RNTupleDS::InitSlot(unsigned int slot, ULong64_t firstEntry)

{

   if (fNSlots == 1)

      return;


   auto idxRange = fFirstEntry2RangeIdx.at(firstEntry);

   for (auto r : fActiveColumnReaders[slot]) {

      r->Connect(*fCurrentRanges[idxRange].fSource, firstEntry - fCurrentRanges[idxRange].fFirstEntry);

   }

}


void RNTupleDS::FinalizeSlot(unsigned int slot)

{

   if (fNSlots == 1)

      return;


   for (auto r : fActiveColumnReaders[slot]) {

      r->Disconnect(true /* keepValue */);

   }

}


std::string RNTupleDS::GetTypeName(std::string_view colName) const

{

   auto colNamePos = std::find(fColumnNames.begin(), fColumnNames.end(), colName);


   if (colNamePos == fColumnNames.end()) {

      auto msg = std::string("RNTupleDS: There is no column with name \"") + std::string(colName) + "\"";

      throw std::runtime_error(msg);

   }


   const auto index = std::distance(fColumnNames.begin(), colNamePos);

   return fColumnTypes[index];

}


bool RNTupleDS::HasColumn(std::string_view colName) const

{

   return std::find(fColumnNames.begin(), fColumnNames.end(), colName) != fColumnNames.end();

}


void RNTupleDS::Initialize()

{

   fSeenEntries = 0;

   fNextFileIndex = 0;

   fIsReadyForStaging = fHasNextSources = fStagingThreadShouldTerminate = false;

   fThreadStaging = std::thread(&RNTupleDS::ExecStaging, this);

   assert(fNextRanges.empty());


   if (fCurrentRanges.empty() || (fFileNames.size() > fNSlots)) {

      // First event loop or large number of files: start the staging process.

      {

         std::lock_guard _(fMutexStaging);

         fIsReadyForStaging = true;

      }

      fCvStaging.notify_one();

   } else {

      // Otherwise, we will reuse fCurrentRanges. Make sure that staging and preparing next ranges will be a noop

      // (already at the end of the list of files).

      fNextFileIndex = std::max(fFileNames.size(), std::size_t(1));

   }

}


void RNTupleDS::Finalize()

{

   for (unsigned int i = 0; i < fNSlots; ++i) {

      for (auto r : fActiveColumnReaders[i]) {

         r->Disconnect(false /* keepValue */);

      }

   }

   {

      std::lock_guard _(fMutexStaging);

      fStagingThreadShouldTerminate = true;

   }

   fCvStaging.notify_one();

   fThreadStaging.join();

   // If we have a chain with more files than the number of slots, the files opened at the end of the

   // event loop won't be reused when the event loop restarts, so we can close them.

   if (fFileNames.size() > fNSlots) {

      fCurrentRanges.clear();

      fNextRanges.clear();

      fStagingArea.clear();

      fStagingArea.resize(fFileNames.size());

   }

}


void RNTupleDS::SetNSlots(unsigned int nSlots)

{

   assert(fNSlots == 0);

   assert(nSlots > 0);

   fNSlots = nSlots;

   fActiveColumnReaders.resize(fNSlots);

}


} // namespace Experimental

} // namespace ROOT


ROOT::RDataFrame ROOT::RDF::Experimental::FromRNTuple(std::string_view ntupleName, std::string_view fileName)

{

   return ROOT::RDataFrame(std::make_unique<ROOT::Experimental::RNTupleDS>(ntupleName, fileName));

}


ROOT::RDataFrame


ROOT::RDF::Experimental::FromRNTuple(std::string_view ntupleName, const std::vector<std::string> &fileNames)

{

   return ROOT::RDataFrame(std::make_unique<ROOT::Experimental::RNTupleDS>(ntupleName, fileNames));

}


RColumnReaderBase.hxx

RDataFrame.hxx

RFieldUtils.hxx

RField.hxx

RNTupleDS.hxx

RNTupleDescriptor.hxx

RNTupleUtil.hxx

RPageStorageFile.hxx

RPageStorage.hxx

f
#define f(i)
Definition RSha256.hxx:104

size
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix

Long64_t
long long Long64_t
Definition RtypesCore.h:69

ULong64_t
unsigned long long ULong64_t
Definition RtypesCore.h:70

TRangeDynCast
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Definition TCollection.h:358

TError.h

N
#define N

r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Definition TGWin32VirtualXProxy.cxx:168

index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Definition TGWin32VirtualXProxy.cxx:168

name
char name[80]
Definition TGX11.cxx:110

TSystem.h

gSystem
R__EXTERN TSystem * gSystem
Definition TSystem.h:572

_
#define _(A, B)
Definition cfortran.h:108

ROOT::Detail::RDF::RColumnReaderBase
Definition RColumnReaderBase.hxx:28

ROOT::Detail::TRangeCast
Definition TCollection.h:311

ROOT::Experimental::Internal::RArraySizeField
An artificial field that provides the size of a fixed-size array.
Definition RNTupleDS.cxx:122

ROOT::Experimental::Internal::RArraySizeField::~RArraySizeField
~RArraySizeField() final=default

ROOT::Experimental::Internal::RArraySizeField::RArraySizeField
RArraySizeField(std::size_t arrayLength)
Definition RNTupleDS.cxx:142

ROOT::Experimental::Internal::RArraySizeField::CloneImpl
std::unique_ptr< ROOT::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
Definition RNTupleDS.cxx:126

ROOT::Experimental::Internal::RArraySizeField::operator=
RArraySizeField & operator=(RArraySizeField &&other)=default

ROOT::Experimental::Internal::RArraySizeField::ConstructValue
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
Definition RNTupleDS.cxx:153

ROOT::Experimental::Internal::RArraySizeField::ReadInClusterImpl
void ReadInClusterImpl(RNTupleLocalIndex, void *to) final
Definition RNTupleDS.cxx:136

ROOT::Experimental::Internal::RArraySizeField::GenerateColumns
void GenerateColumns(const ROOT::RNTupleDescriptor &) final
Implementations in derived classes should create the backing columns corresponding to the field type ...
Definition RNTupleDS.cxx:131

ROOT::Experimental::Internal::RArraySizeField::ReadGlobalImpl
void ReadGlobalImpl(ROOT::NTupleSize_t, void *to) final
Definition RNTupleDS.cxx:132

ROOT::Experimental::Internal::RArraySizeField::fArrayLength
std::size_t fArrayLength
Definition RNTupleDS.cxx:124

ROOT::Experimental::Internal::RArraySizeField::RArraySizeField
RArraySizeField(RArraySizeField &&other)=default

ROOT::Experimental::Internal::RArraySizeField::GetValueSize
std::size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
Definition RNTupleDS.cxx:154

ROOT::Experimental::Internal::RArraySizeField::RArraySizeField
RArraySizeField(const RArraySizeField &other)=delete

ROOT::Experimental::Internal::RArraySizeField::GetAlignment
std::size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
Definition RNTupleDS.cxx:155

ROOT::Experimental::Internal::RArraySizeField::GenerateColumns
void GenerateColumns() final
Implementations in derived classes should create the backing columns corresponding to the field type ...
Definition RNTupleDS.cxx:130

ROOT::Experimental::Internal::RArraySizeField::operator=
RArraySizeField & operator=(const RArraySizeField &other)=delete

ROOT::Experimental::Internal::RNTupleColumnReader
Every RDF column is represented by exactly one RNTuple field.
Definition RNTupleDS.cxx:159

ROOT::Experimental::Internal::RNTupleColumnReader::fField
std::unique_ptr< RFieldBase > fField
The field backing the RDF column.
Definition RNTupleDS.cxx:165

ROOT::Experimental::Internal::RNTupleColumnReader::fEntryOffset
Long64_t fEntryOffset
For chains, the logical entry and the physical entry in any particular file can be different.
Definition RNTupleDS.cxx:172

ROOT::Experimental::Internal::RNTupleColumnReader::fValue
std::unique_ptr< RFieldBase::RValue > fValue
The memory location used to read from fField.
Definition RNTupleDS.cxx:166

ROOT::Experimental::Internal::RNTupleColumnReader::RNTupleColumnReader
RNTupleColumnReader(RNTupleDS *ds, RFieldBase *protoField)
Definition RNTupleDS.cxx:175

ROOT::Experimental::Internal::RNTupleColumnReader::fLastEntry
Long64_t fLastEntry
Last entry number that was read.
Definition RNTupleDS.cxx:168

ROOT::Experimental::Internal::RNTupleColumnReader::fDataSource
RNTupleDS * fDataSource
The data source that owns this column reader.
Definition RNTupleDS.cxx:163

ROOT::Experimental::Internal::RNTupleColumnReader::Connect
void Connect(RPageSource &source, Long64_t entryOffset)
Connect the field and its subfields to the page source.
Definition RNTupleDS.cxx:179

ROOT::Experimental::Internal::RNTupleColumnReader::Disconnect
void Disconnect(bool keepValue)
Definition RNTupleDS.cxx:218

ROOT::Experimental::Internal::RNTupleColumnReader::GetImpl
void * GetImpl(Long64_t entry) final
Definition RNTupleDS.cxx:228

ROOT::Experimental::Internal::RNTupleColumnReader::fValuePtr
std::shared_ptr< void > fValuePtr
Used to reuse the object created by fValue when reconnecting sources.
Definition RNTupleDS.cxx:167

ROOT::Experimental::Internal::RNTupleColumnReader::fProtoField
RFieldBase * fProtoField
The prototype field from which fField is cloned.
Definition RNTupleDS.cxx:164

ROOT::Experimental::Internal::RNTupleColumnReader::~RNTupleColumnReader
~RNTupleColumnReader()=default

ROOT::Experimental::Internal::RPageSource
Abstract interface to read data from an ntuple.
Definition RPageStorage.hxx:561

ROOT::Experimental::Internal::RPageSource::Create
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
Definition RPageStorage.cxx:164

ROOT::Experimental::Internal::RRDFCardinalityField
An artificial field that transforms an RNTuple column that contains the offset of collections into co...
Definition RNTupleDS.cxx:64

ROOT::Experimental::Internal::RRDFCardinalityField::ReadInClusterImpl
void ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) final
Get the number of elements of the collection identified by clusterIndex.
Definition RNTupleDS.cxx:107

ROOT::Experimental::Internal::RRDFCardinalityField::ReadGlobalImpl
void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final
Get the number of elements of the collection identified by globalIndex.
Definition RNTupleDS.cxx:98

ROOT::Experimental::Internal::RRDFCardinalityField::RRDFCardinalityField
RRDFCardinalityField(RRDFCardinalityField &&other)=default

ROOT::Experimental::Internal::RRDFCardinalityField::GenerateColumns
void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final
Implementations in derived classes should create the backing columns corresponding to the field type ...
Definition RNTupleDS.cxx:89

ROOT::Experimental::Internal::RRDFCardinalityField::CloneImpl
std::unique_ptr< ROOT::RFieldBase > CloneImpl(std::string_view) const final
Called by Clone(), which additionally copies the on-disk ID.
Definition RNTupleDS.cxx:66

ROOT::Experimental::Internal::RRDFCardinalityField::GetColumnRepresentations
const RColumnRepresentations & GetColumnRepresentations() const final
Implementations in derived classes should return a static RColumnRepresentations object.
Definition RNTupleDS.cxx:78

ROOT::Experimental::Internal::RRDFCardinalityField::GenerateColumns
void GenerateColumns() final
Implementations in derived classes should create the backing columns corresponding to the field type ...
Definition RNTupleDS.cxx:88

ROOT::Experimental::Internal::RRDFCardinalityField::operator=
RRDFCardinalityField & operator=(RRDFCardinalityField &&other)=default

ROOT::Experimental::Internal::RRDFCardinalityField::GetValueSize
size_t GetValueSize() const final
The number of bytes taken by a value of the appropriate type.
Definition RNTupleDS.cxx:94

ROOT::Experimental::Internal::RRDFCardinalityField::GetAlignment
size_t GetAlignment() const final
As a rule of thumb, the alignment is equal to the size of the type.
Definition RNTupleDS.cxx:95

ROOT::Experimental::Internal::RRDFCardinalityField::ConstructValue
void ConstructValue(void *where) const final
Constructs value in a given location of size at least GetValueSize(). Called by the base class' Creat...
Definition RNTupleDS.cxx:70

ROOT::Experimental::Internal::RRDFCardinalityField::RRDFCardinalityField
RRDFCardinalityField()
Definition RNTupleDS.cxx:73

ROOT::Experimental::Internal::RRDFCardinalityField::~RRDFCardinalityField
~RRDFCardinalityField()=default

ROOT::Experimental::RNTupleDS
The RDataSource implementation for RNTuple.
Definition RNTupleDS.hxx:45

ROOT::Experimental::RNTupleDS::GetColumnReaders
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > GetColumnReaders(unsigned int, std::string_view, const std::type_info &) final
If the other GetColumnReaders overload returns an empty vector, this overload will be called instead.
Definition RNTupleDS.cxx:443

ROOT::Experimental::RNTupleDS::SetNSlots
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
Definition RNTupleDS.cxx:781

ROOT::Experimental::RNTupleDS::AddField
void AddField(const ROOT::RNTupleDescriptor &desc, std::string_view colName, ROOT::DescriptorId_t fieldId, std::vector< RFieldInfo > fieldInfos, bool convertToRVec=true)
Provides the RDF column "colName" given the field identified by fieldID.
Definition RNTupleDS.cxx:242

ROOT::Experimental::RNTupleDS::ExecStaging
void ExecStaging()
The main function of the fThreadStaging background thread.
Definition RNTupleDS.cxx:489

ROOT::Experimental::RNTupleDS::fPrincipalDescriptor
ROOT::RNTupleDescriptor fPrincipalDescriptor
A clone of the first pages source's descriptor.
Definition RNTupleDS.hxx:59

ROOT::Experimental::RNTupleDS::fProtoFields
std::vector< std::unique_ptr< ROOT::RFieldBase > > fProtoFields
We prepare a prototype field for every column.
Definition RNTupleDS.hxx:87

ROOT::Experimental::RNTupleDS::GetColumnReadersImpl
Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
Definition RNTupleDS.cxx:436

ROOT::Experimental::RNTupleDS::fThreadStaging
std::thread fThreadStaging
The background thread that runs StageNextSources()
Definition RNTupleDS.hxx:112

ROOT::Experimental::RNTupleDS::Initialize
void Initialize() final
Convenience method called before starting an event-loop.
Definition RNTupleDS.cxx:736

ROOT::Experimental::RNTupleDS::fNextFileIndex
std::size_t fNextFileIndex
Index into fFileNames to the next file to process.
Definition RNTupleDS.hxx:81

ROOT::Experimental::RNTupleDS::fColumnNames
std::vector< std::string > fColumnNames
Definition RNTupleDS.hxx:97

ROOT::Experimental::RNTupleDS::fFirstEntry2RangeIdx
std::unordered_map< ULong64_t, std::size_t > fFirstEntry2RangeIdx
Maps the first entries from the ranges of the last GetEntryRanges() call to their corresponding index...
Definition RNTupleDS.hxx:109

ROOT::Experimental::RNTupleDS::Finalize
void Finalize() final
Convenience method called after concluding an event-loop.
Definition RNTupleDS.cxx:758

ROOT::Experimental::RNTupleDS::GetTypeName
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition RNTupleDS.cxx:718

ROOT::Experimental::RNTupleDS::~RNTupleDS
~RNTupleDS() final

ROOT::Experimental::RNTupleDS::InitSlot
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
Definition RNTupleDS.cxx:697

ROOT::Experimental::RNTupleDS::FinalizeSlot
void FinalizeSlot(unsigned int slot) final
Convenience method called at the end of the data processing associated to a slot.
Definition RNTupleDS.cxx:708

ROOT::Experimental::RNTupleDS::fActiveColumnReaders
std::vector< std::vector< Internal::RNTupleColumnReader * > > fActiveColumnReaders
List of column readers returned by GetColumnReaders() organized by slot.
Definition RNTupleDS.hxx:101

ROOT::Experimental::RNTupleDS::GetEntryRanges
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition RNTupleDS.cxx:619

ROOT::Experimental::RNTupleDS::fCurrentRanges
std::vector< REntryRangeDS > fCurrentRanges
Basis for the ranges returned by the last GetEntryRanges() call.
Definition RNTupleDS.hxx:104

ROOT::Experimental::RNTupleDS::fStagingThreadShouldTerminate
bool fStagingThreadShouldTerminate
Is true when the I/O thread should quit.
Definition RNTupleDS.hxx:122

ROOT::Experimental::RNTupleDS::fHasNextSources
bool fHasNextSources
Is true when the staging thread has populated the next batch of files to fStagingArea.
Definition RNTupleDS.hxx:120

ROOT::Experimental::RNTupleDS::fNextRanges
std::vector< REntryRangeDS > fNextRanges
Basis for the ranges populated by the PrepareNextRanges() call.
Definition RNTupleDS.hxx:105

ROOT::Experimental::RNTupleDS::fMutexStaging
std::mutex fMutexStaging
Protects the shared state between the main thread and the I/O thread.
Definition RNTupleDS.hxx:114

ROOT::Experimental::RNTupleDS::StageNextSources
void StageNextSources()
Starting from fNextFileIndex, opens the next fNSlots files.
Definition RNTupleDS.cxx:507

ROOT::Experimental::RNTupleDS::HasColumn
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
Definition RNTupleDS.cxx:731

ROOT::Experimental::RNTupleDS::fStagingArea
std::vector< std::unique_ptr< ROOT::Experimental::Internal::RPageSource > > fStagingArea
The staging area is relevant for chains of files, i.e.
Definition RNTupleDS.hxx:80

ROOT::Experimental::RNTupleDS::PrepareNextRanges
void PrepareNextRanges()
Populates fNextRanges with the next set of entry ranges.
Definition RNTupleDS.cxx:524

ROOT::Experimental::RNTupleDS::fAlternativeProtoFields
std::unordered_map< std::size_t, std::vector< std::unique_ptr< ROOT::RFieldBase > > > fAlternativeProtoFields
Columns may be requested with types other than with which they were initially added as proto fields.
Definition RNTupleDS.hxx:92

ROOT::Experimental::RNTupleDS::fFileNames
std::vector< std::string > fFileNames
Definition RNTupleDS.hxx:63

ROOT::Experimental::RNTupleDS::fNTupleName
std::string fNTupleName
The data source may be constructed with an ntuple name and a list of files.
Definition RNTupleDS.hxx:62

ROOT::Experimental::RNTupleDS::fColumnTypes
std::vector< std::string > fColumnTypes
Definition RNTupleDS.hxx:98

ROOT::Experimental::RNTupleDS::RNTupleDS
RNTupleDS(std::unique_ptr< ROOT::Experimental::Internal::RPageSource > pageSource)
Definition RNTupleDS.cxx:381

ROOT::Experimental::RNTupleDS::fFieldId2QualifiedName
std::unordered_map< ROOT::DescriptorId_t, std::string > fFieldId2QualifiedName
Connects the IDs of active proto fields and their subfields to their fully qualified name (a....
Definition RNTupleDS.hxx:96

ROOT::Experimental::RNTupleDS::fSeenEntries
ULong64_t fSeenEntries
The number of entries so far returned by GetEntryRanges()
Definition RNTupleDS.hxx:103

ROOT::Experimental::RNTupleDS::fCvStaging
std::condition_variable fCvStaging
Signal for the state information of fIsReadyForStaging and fHasNextSources.
Definition RNTupleDS.hxx:116

ROOT::Experimental::RNTupleDS::fIsReadyForStaging
bool fIsReadyForStaging
Is true when the staging thread should start working.
Definition RNTupleDS.hxx:118

ROOT::Internal::RColumn::GetCollectionInfo
void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:285

ROOT::Internal::RNTupleReadOptionsManip::SetClusterBunchSize
static void SetClusterBunchSize(RNTupleReadOptions &options, unsigned int val)
Definition RNTupleReadOptions.hxx:121

ROOT::RDF::RDataSource::fNSlots
unsigned int fNSlots
Definition RDataSource.hxx:150

ROOT::RDF::RDataSource::Record_t
std::vector< void * > Record_t
Definition RDataSource.hxx:145

ROOT::RDataFrame
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Definition RDataFrame.hxx:41

ROOT::RException
Base class for all ROOT issued exceptions.
Definition RError.hxx:79

ROOT::RFieldBase::RColumnRepresentations
The list of column representations a field can have.
Definition RFieldBase.hxx:186

ROOT::RFieldBase
A field translates read and write calls from/to underlying columns to/from tree values.
Definition RFieldBase.hxx:83

ROOT::RFieldBase::fPrincipalColumn
ROOT::Internal::RColumn * fPrincipalColumn
All fields that have columns have a distinct main column.
Definition RFieldBase.hxx:287

ROOT::RFieldBase::cbegin
RConstSchemaIterator cbegin() const
Definition RFieldBase.cxx:831

ROOT::RFieldBase::GetFieldName
const std::string & GetFieldName() const
Definition RFieldBase.hxx:582

ROOT::RFieldBase::Create
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
Definition RFieldBase.cxx:293

ROOT::RFieldBase::GetOnDiskId
ROOT::DescriptorId_t GetOnDiskId() const
Definition RFieldBase.hxx:599

ROOT::RFieldBase::Clone
std::unique_ptr< RFieldBase > Clone(std::string_view newName) const
Copies the field and its subfields using a possibly new name and a new, unconnected set of columns.
Definition RFieldBase.cxx:627

ROOT::RNTupleDescriptor::RFieldDescriptorIterable::begin
RIterator begin()
Definition RNTupleDescriptor.hxx:952

ROOT::RNTupleDescriptor
The on-storage metadata of an RNTuple.
Definition RNTupleDescriptor.hxx:635

ROOT::RNTupleDescriptor::GetFieldIterable
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
Definition RNTupleDescriptor.cxx:1325

ROOT::RNTupleDescriptor::GetFieldDescriptor
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
Definition RNTupleDescriptor.hxx:749

ROOT::RNTupleDescriptor::GetFieldZeroId
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
Definition RNTupleDescriptor.hxx:805

ROOT::RNTupleDescriptor::FindFieldId
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
Definition RNTupleDescriptor.cxx:343

ROOT::RNTupleDescriptor::GetQualifiedFieldName
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
Definition RNTupleDescriptor.cxx:362

ROOT::RNTupleLocalIndex
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
Definition RNTupleUtil.hxx:132

ROOT::RNTupleReadOptions
Common user-tunable settings for reading RNTuples.
Definition RNTupleReadOptions.hxx:80

ROOT::RNTuple
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69

ROOT::RRangeCast::begin
const_iterator begin() const
Definition RRangeCast.hxx:104

ROOT::RRangeCast::end
const_iterator end() const
Definition RRangeCast.hxx:105

TSystem::Getenv
virtual const char * Getenv(const char *env)
Get environment variable.
Definition TSystem.cxx:1676

ROOT::Internal::RDF::TypeID2TypeName
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:121

ROOT::Internal::CallConnectPageSourceOnField
void CallConnectPageSourceOnField(RFieldBase &, ROOT::Experimental::Internal::RPageSource &)
Definition RFieldBase.cxx:84

ROOT::Internal::GetRenormalizedTypeName
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
Definition RFieldUtils.cxx:225

ROOT::Minuit2::GradientParameterSpace::Internal
@ Internal

ROOT::RDF::Experimental::FromRNTuple
RDataFrame FromRNTuple(std::string_view ntupleName, std::string_view fileName)
Definition RNTupleDS.cxx:791

ROOT
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition EExecutionPolicy.hxx:4

ROOT::DescriptorId_t
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Definition RNTupleUtil.hxx:128

ROOT::NTupleSize_t
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition RNTupleUtil.hxx:124

ROOT::kInvalidDescriptorId
constexpr DescriptorId_t kInvalidDescriptorId
Definition RNTupleUtil.hxx:129

ROOT::ENTupleStructure::kRecord
@ kRecord

ROOT::ENTupleStructure::kCollection
@ kCollection

ROOT::ENTupleStructure::kLeaf
@ kLeaf

ROOT::ENTupleColumnType::kSplitIndex64
@ kSplitIndex64

ROOT::ENTupleColumnType::kSplitIndex32
@ kSplitIndex32

ROOT::ENTupleColumnType::kIndex32
@ kIndex32

ROOT::ENTupleColumnType::kIndex64
@ kIndex64

ROOT::Experimental::RNTupleDS::REntryRangeDS
The PrepareNextRanges() method populates the fNextRanges list with REntryRangeDS records.
Definition RNTupleDS.hxx:51