Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTuple.cxx
Go to the documentation of this file.
1/// \file RNTuple.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RNTuple.hxx>
17
19#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RPageStorage.hxx>
22#include <ROOT/RPageSinkBuf.hxx>
24#ifdef R__USE_IMT
25#include <ROOT/TTaskGroup.hxx>
26#endif
27
28#include <TBuffer.h>
29#include <TError.h>
30#include <TFile.h>
31#include <TROOT.h> // for IsImplicitMTEnabled()
32
33#include <algorithm>
34#include <exception>
35#include <functional>
36#include <iomanip>
37#include <iostream>
38#include <sstream>
39#include <string>
40#include <unordered_map>
41#include <utility>
42
43#ifdef R__USE_IMT
45{
46 Reset();
47}
48
50{
51 fTaskGroup = std::make_unique<TTaskGroup>();
52}
53
54void ROOT::Experimental::RNTupleImtTaskScheduler::AddTask(const std::function<void(void)> &taskFunc)
55{
56 fTaskGroup->Run(taskFunc);
57}
58
60{
61 fTaskGroup->Wait();
62}
63#endif
64
65//------------------------------------------------------------------------------
66
68{
69 // We must not use the descriptor guard to prevent recursive locking in field.ConnectPageSource
70 model.GetFieldZero()->SetOnDiskId(fSource->GetSharedDescriptorGuard()->GetFieldZeroId());
71 for (auto &field : *model.GetFieldZero()) {
72 // If the model has been created from the descritor, the on-disk IDs are already set.
73 // User-provided models instead need to find their corresponding IDs in the descriptor.
74 if (field.GetOnDiskId() == kInvalidDescriptorId) {
75 field.SetOnDiskId(
76 fSource->GetSharedDescriptorGuard()->FindFieldId(field.GetName(), field.GetParent()->GetOnDiskId()));
77 }
78 field.ConnectPageSource(*fSource);
79 }
80}
81
83{
84#ifdef R__USE_IMT
85 if (IsImplicitMTEnabled()) {
86 fUnzipTasks = std::make_unique<RNTupleImtTaskScheduler>();
87 fSource->SetTaskScheduler(fUnzipTasks.get());
88 }
89#endif
90 fSource->Attach();
91 fMetrics.ObserveMetrics(fSource->GetMetrics());
92}
93
94ROOT::Experimental::RNTupleReader::RNTupleReader(std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
95 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
96 : fSource(std::move(source)), fModel(std::move(model)), fMetrics("RNTupleReader")
97{
98 if (!fSource) {
99 throw RException(R__FAIL("null source"));
100 }
101 if (!fModel) {
102 throw RException(R__FAIL("null model"));
103 }
104 if (!fModel->GetProjectedFields().IsEmpty()) {
105 throw RException(R__FAIL("model has projected fields, which is incompatible with providing a read model"));
106 }
107 fModel->Freeze();
110}
111
112ROOT::Experimental::RNTupleReader::RNTupleReader(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
113 : fSource(std::move(source)), fModel(nullptr), fMetrics("RNTupleReader")
114{
115 if (!fSource) {
116 throw RException(R__FAIL("null source"));
117 }
119}
120
122
123std::unique_ptr<ROOT::Experimental::RNTupleReader>
124ROOT::Experimental::RNTupleReader::Open(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName,
125 std::string_view storage, const RNTupleReadOptions &options)
126{
127 return std::make_unique<RNTupleReader>(std::move(model), Detail::RPageSource::Create(ntupleName, storage, options));
128}
129
130std::unique_ptr<ROOT::Experimental::RNTupleReader>
131ROOT::Experimental::RNTupleReader::Open(std::string_view ntupleName, std::string_view storage,
132 const RNTupleReadOptions &options)
133{
134 return std::make_unique<RNTupleReader>(Detail::RPageSource::Create(ntupleName, storage, options));
135}
136
137std::unique_ptr<ROOT::Experimental::RNTupleReader>
139{
140 return std::make_unique<RNTupleReader>(ntuple->MakePageSource(options));
141}
142
143std::unique_ptr<ROOT::Experimental::RNTupleReader>
145{
146 std::vector<std::unique_ptr<Detail::RPageSource>> sources;
147 for (const auto &n : ntuples) {
148 sources.emplace_back(Detail::RPageSource::Create(n.fNTupleName, n.fStorage, n.fOptions));
149 }
150 return std::make_unique<RNTupleReader>(std::make_unique<Detail::RPageSourceFriends>("_friends", sources));
151}
152
154{
155 if (!fModel) {
156 fModel = fSource->GetSharedDescriptorGuard()->GenerateModel();
157 ConnectModel(*fModel);
158 }
159 return fModel.get();
160}
161
163{
164 // TODO(lesimon): In a later version, these variables may be defined by the user or the ideal width may be read out
165 // from the terminal.
166 char frameSymbol = '*';
167 int width = 80;
168 /*
169 if (width < 30) {
170 output << "The width is too small! Should be at least 30." << std::endl;
171 return;
172 }
173 */
174 switch (what) {
176 std::string name;
177 std::unique_ptr<RNTupleModel> fullModel;
178 {
179 auto descriptorGuard = fSource->GetSharedDescriptorGuard();
180 name = descriptorGuard->GetName();
181 fullModel = descriptorGuard->GenerateModel();
182 }
183
184 for (int i = 0; i < (width / 2 + width % 2 - 4); ++i)
185 output << frameSymbol;
186 output << " NTUPLE ";
187 for (int i = 0; i < (width / 2 - 4); ++i)
188 output << frameSymbol;
189 output << std::endl;
190 // FitString defined in RFieldVisitor.cxx
191 output << frameSymbol << " N-Tuple : " << RNTupleFormatter::FitString(name, width - 13) << frameSymbol
192 << std::endl; // prints line with name of ntuple
193 output << frameSymbol << " Entries : " << RNTupleFormatter::FitString(std::to_string(GetNEntries()), width - 13)
194 << frameSymbol << std::endl; // prints line with number of entries
195
196 // Traverses through all fields to gather information needed for printing.
197 RPrepareVisitor prepVisitor;
198 // Traverses through all fields to do the actual printing.
199 RPrintSchemaVisitor printVisitor(output);
200
201 // Note that we do not need to connect the model, we are only looking at its tree of fields
202 fullModel->GetFieldZero()->AcceptVisitor(prepVisitor);
203
204 printVisitor.SetFrameSymbol(frameSymbol);
205 printVisitor.SetWidth(width);
206 printVisitor.SetDeepestLevel(prepVisitor.GetDeepestLevel());
207 printVisitor.SetNumFields(prepVisitor.GetNumFields());
208
209 for (int i = 0; i < width; ++i)
210 output << frameSymbol;
211 output << std::endl;
212 fullModel->GetFieldZero()->AcceptVisitor(printVisitor);
213 for (int i = 0; i < width; ++i)
214 output << frameSymbol;
215 output << std::endl;
216 break;
217 }
218 case ENTupleInfo::kStorageDetails: fSource->GetSharedDescriptorGuard()->PrintInfo(output); break;
219 case ENTupleInfo::kMetrics: fMetrics.Print(output); break;
220 default:
221 // Unhandled case, internal error
222 R__ASSERT(false);
223 }
224}
225
227{
228 if (!fDisplayReader)
229 fDisplayReader = Clone();
230 return fDisplayReader.get();
231}
232
234{
235 auto reader = GetDisplayReader();
236 auto entry = reader->GetModel()->GetDefaultEntry();
237
238 reader->LoadEntry(index);
239 output << "{";
240 for (auto iValue = entry->begin(); iValue != entry->end();) {
241 output << std::endl;
242 RPrintValueVisitor visitor(iValue->GetNonOwningCopy(), output, 1 /* level */);
243 iValue->GetField()->AcceptVisitor(visitor);
244
245 if (++iValue == entry->end()) {
246 output << std::endl;
247 break;
248 } else {
249 output << ",";
250 }
251 }
252 output << "}" << std::endl;
253}
254
256{
257 auto descriptorGuard = fSource->GetSharedDescriptorGuard();
258 if (!fCachedDescriptor || fCachedDescriptor->GetGeneration() != descriptorGuard->GetGeneration())
259 fCachedDescriptor = descriptorGuard->Clone();
260 return fCachedDescriptor.get();
261}
262
263//------------------------------------------------------------------------------
264
265ROOT::Experimental::RNTupleWriter::RNTupleWriter(std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
266 std::unique_ptr<ROOT::Experimental::Detail::RPageSink> sink)
267 : fSink(std::move(sink)), fModel(std::move(model)), fMetrics("RNTupleWriter")
268{
269 if (!fModel) {
270 throw RException(R__FAIL("null model"));
271 }
272 if (!fSink) {
273 throw RException(R__FAIL("null sink"));
274 }
275 fModel->Freeze();
276#ifdef R__USE_IMT
277 if (IsImplicitMTEnabled()) {
278 fZipTasks = std::make_unique<RNTupleImtTaskScheduler>();
279 fSink->SetTaskScheduler(fZipTasks.get());
280 }
281#endif
282 fSink->Create(*fModel.get());
283 fMetrics.ObserveMetrics(fSink->GetMetrics());
284
285 const auto &writeOpts = fSink->GetWriteOptions();
286 fMaxUnzippedClusterSize = writeOpts.GetMaxUnzippedClusterSize();
287 // First estimate is a factor 2 compression if compression is used at all
288 const int scale = writeOpts.GetCompression() ? 2 : 1;
289 fUnzippedClusterSizeEst = scale * writeOpts.GetApproxZippedClusterSize();
290}
291
293{
294 try {
295 CommitCluster(true /* commitClusterGroup */);
296 fSink->CommitDataset();
297 } catch (const RException &err) {
298 R__LOG_ERROR(NTupleLog()) << "failure committing ntuple: " << err.GetError().GetReport();
299 }
300}
301
302std::unique_ptr<ROOT::Experimental::RNTupleWriter>
303ROOT::Experimental::RNTupleWriter::Recreate(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName,
304 std::string_view storage, const RNTupleWriteOptions &options)
305{
306 return std::make_unique<RNTupleWriter>(std::move(model), Detail::RPageSink::Create(ntupleName, storage, options));
307}
308
309std::unique_ptr<ROOT::Experimental::RNTupleWriter>
310ROOT::Experimental::RNTupleWriter::Append(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName, TFile &file,
311 const RNTupleWriteOptions &options)
312{
313 auto sink = std::make_unique<Detail::RPageSinkFile>(ntupleName, file, options);
314 if (options.GetUseBufferedWrite()) {
315 auto bufferedSink = std::make_unique<Detail::RPageSinkBuf>(std::move(sink));
316 return std::make_unique<RNTupleWriter>(std::move(model), std::move(bufferedSink));
317 }
318 return std::make_unique<RNTupleWriter>(std::move(model), std::move(sink));
319}
320
322{
323 if (fNEntries == fLastCommittedClusterGroup)
324 return;
325 fSink->CommitClusterGroup();
326 fLastCommittedClusterGroup = fNEntries;
327}
328
330{
331 if (fNEntries == fLastCommitted) {
332 if (commitClusterGroup)
333 CommitClusterGroup();
334 return;
335 }
336 if (fSink->GetWriteOptions().GetHasSmallClusters() &&
337 (fUnzippedClusterSize > RNTupleWriteOptions::kMaxSmallClusterSize))
338 {
339 throw RException(R__FAIL("invalid attempt to write a cluster > 512MiB with 'small clusters' option enabled"));
340 }
341 for (auto &field : *fModel->GetFieldZero()) {
342 field.CommitCluster();
343 }
344 fNBytesCommitted += fSink->CommitCluster(fNEntries);
345 fNBytesFilled += fUnzippedClusterSize;
346
347 // Cap the compression factor at 1000 to prevent overflow of fUnzippedClusterSizeEst
348 const float compressionFactor =
349 std::min(1000.f, static_cast<float>(fNBytesFilled) / static_cast<float>(fNBytesCommitted));
350 fUnzippedClusterSizeEst =
351 compressionFactor * static_cast<float>(fSink->GetWriteOptions().GetApproxZippedClusterSize());
352
353 fLastCommitted = fNEntries;
354 fUnzippedClusterSize = 0;
355
356 if (commitClusterGroup)
357 CommitClusterGroup();
358}
359
360//------------------------------------------------------------------------------
361
363 : fOffset(0), fDefaultEntry(std::move(defaultEntry))
364{
365}
366
367//------------------------------------------------------------------------------
368
370{
371 static TClassRef RNTupleAnchorClass("ROOT::Experimental::Internal::RFileNTupleAnchor");
372
373 if (buf.IsReading()) {
374 RNTupleAnchorClass->ReadBuffer(buf, static_cast<Internal::RFileNTupleAnchor *>(this));
375 R__ASSERT(buf.GetParent() && buf.GetParent()->InheritsFrom("TFile"));
376 fFile = reinterpret_cast<TFile *>(buf.GetParent());
377 } else {
378 RNTupleAnchorClass->WriteBuffer(buf, static_cast<Internal::RFileNTupleAnchor *>(this));
379 }
380}
381
382std::unique_ptr<ROOT::Experimental::Detail::RPageSource>
384{
385 if (!fFile)
386 throw RException(R__FAIL("This RNTuple object was not streamed from a file"));
387
388 // TODO(jblomer): Add RRawFile factory that create a raw file from a TFile. This may then duplicate the file
389 // descriptor (to avoid re-open). There could also be a raw file that uses a TFile as a "backend" for TFile cases
390 // that are unsupported by raw file.
391 auto path = fFile->GetEndpointUrl()->GetFile();
392 return Detail::RPageSourceFile::CreateFromAnchor(GetAnchor(), path, options);
393}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:303
#define R__LOG_ERROR(...)
Definition RLogger.hxx:362
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t width
char name[80]
Definition TGX11.cxx:110
void SetOnDiskId(DescriptorId_t id)
Definition RField.cxx:622
void ObserveMetrics(RNTupleMetrics &observee)
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
static std::unique_ptr< RPageSourceFile > CreateFromAnchor(const Internal::RFileNTupleAnchor &anchor, std::string_view path, const RNTupleReadOptions &options)
Used from the RNTuple class to build a datasource if the anchor is already available.
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RCollectionNTupleWriter(std::unique_ptr< REntry > defaultEntry)
Definition RNTuple.cxx:362
std::string GetReport() const
Format a dignostics report, e.g. for an exception message.
Definition RError.cxx:25
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
const RError & GetError() const
Definition RError.hxx:82
The on-storage meta-data of an ntuple.
std::unique_ptr< RNTupleDescriptor > Clone() const
static std::string FitString(const std::string &str, int availableSpace)
void Reset() final
Start a new set of tasks.
Definition RNTuple.cxx:49
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition RNTuple.cxx:54
void Wait() final
Blocks until all scheduled tasks finished.
Definition RNTuple.cxx:59
The RNTupleModel encapulates the schema of an ntuple.
RFieldZero * GetFieldZero() const
Common user-tunable settings for reading ntuples.
An RNTuple that is used to read data from storage.
Definition RNTuple.hxx:101
static std::unique_ptr< RNTupleReader > OpenFriends(std::span< ROpenSpec > ntuples)
Open RNTuples as one virtual, horizontally combined ntuple.
Definition RNTuple.cxx:144
RNTupleReader * GetDisplayReader()
Definition RNTuple.cxx:226
void Show(NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
Definition RNTuple.cxx:233
const RNTupleDescriptor * GetDescriptor()
Returns a cached copy of the page source descriptor.
Definition RNTuple.cxx:255
static std::unique_ptr< RNTupleReader > Open(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Throws an exception if the model is null.
Definition RNTuple.cxx:124
std::unique_ptr< Detail::RPageSource > fSource
Definition RNTuple.hxx:107
void ConnectModel(const RNTupleModel &model)
Definition RNTuple.cxx:67
RNTupleReader(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSource > source)
The user imposes an ntuple model, which must be compatible with the model found in the data on storag...
Definition RNTuple.cxx:94
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout)
Prints a detailed summary of the ntuple, including a list of fields.
Definition RNTuple.cxx:162
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition RNTuple.hxx:109
Common user-tunable settings for storing ntuples.
static constexpr std::uint64_t kMaxSmallClusterSize
A maximum size of 512MB still allows for a vector of bool to be stored in a small cluster.
NTupleSize_t fUnzippedClusterSizeEst
Estimator of uncompressed cluster size, taking into account the estimated compression ratio.
Definition RNTuple.hxx:382
RNTupleWriter(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSink > sink)
Throws an exception if the model or the sink is null.
Definition RNTuple.cxx:265
std::size_t fMaxUnzippedClusterSize
Limit for committing cluster no matter the other tunables.
Definition RNTuple.hxx:380
void CommitCluster(bool commitClusterGroup=false)
Ensure that the data from the so far seen Fill calls has been written to storage.
Definition RNTuple.cxx:329
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition RNTuple.hxx:367
Detail::RNTupleMetrics fMetrics
Definition RNTuple.hxx:368
std::unique_ptr< Detail::RPageSink > fSink
Definition RNTuple.hxx:365
static std::unique_ptr< RNTupleWriter > Recreate(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition RNTuple.cxx:303
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fZipTasks
The page sink's parallel page compression scheduler if IMT is on.
Definition RNTuple.hxx:364
static std::unique_ptr< RNTupleWriter > Append(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition RNTuple.cxx:310
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:512
std::unique_ptr< Detail::RPageSource > MakePageSource(const RNTupleReadOptions &options=RNTupleReadOptions())
Create a page source from the RNTuple object.
Definition RNTuple.cxx:383
void Streamer(TBuffer &)
Definition RNTuple.cxx:369
Visitor used for a pre-processing run to collect information needed by another visitor class.
Contains settings for printing and prints a summary of an RField instance.
Renders a JSON value corresponding to the field.
Buffer base class used for serializing objects.
Definition TBuffer.h:43
TObject * GetParent() const
Return pointer to parent of this buffer.
Definition TBuffer.cxx:262
Bool_t IsReading() const
Definition TBuffer.h:86
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:28
Int_t ReadBuffer(TBuffer &b, void *pointer, Int_t version, UInt_t start, UInt_t count)
Function called by the Streamer functions to deserialize information from buffer b into object at p.
Definition TClass.cxx:6758
Int_t WriteBuffer(TBuffer &b, void *pointer, const char *info="")
Function called by the Streamer functions to serialize object at p to buffer b.
Definition TClass.cxx:6779
A ROOT file is composed of a header, followed by consecutive data records (TKey instances) with a wel...
Definition TFile.h:53
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition TObject.cxx:525
const Int_t n
Definition legend1.C:16
RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition RNTuple.hxx:59
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:570
Definition file.py:1
static const char * what
Definition stlLoader.cc:6
Entry point for an RNTuple in a ROOT file.
Definition RMiniFile.hxx:65
static void output()