Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTuple.cxx
Go to the documentation of this file.
1/// \file RNTuple.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RNTuple.hxx>
17
19#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RPageStorage.hxx>
22#include <ROOT/RPageSinkBuf.hxx>
24#ifdef R__USE_IMT
25#include <ROOT/TTaskGroup.hxx>
26#endif
27
28#include <TError.h>
29#include <TROOT.h> // for IsImplicitMTEnabled()
30
31#include <algorithm>
32#include <exception>
33#include <functional>
34#include <iomanip>
35#include <iostream>
36#include <sstream>
37#include <string>
38#include <unordered_map>
39#include <utility>
40
41
42#ifdef R__USE_IMT
44{
45 Reset();
46}
47
49{
50 fTaskGroup = std::make_unique<TTaskGroup>();
51}
52
53
54void ROOT::Experimental::RNTupleImtTaskScheduler::AddTask(const std::function<void(void)> &taskFunc)
55{
56 fTaskGroup->Run(taskFunc);
57}
58
59
61{
62 fTaskGroup->Wait();
63}
64#endif
65
66
67//------------------------------------------------------------------------------
68
69
71 const auto &desc = fSource->GetDescriptor();
72 model.GetFieldZero()->SetOnDiskId(desc.GetFieldZeroId());
73 for (auto &field : *model.GetFieldZero()) {
74 // If the model has been created from the descritor, the on-disk IDs are already set.
75 // User-provided models instead need to find their corresponding IDs in the descriptor.
76 if (field.GetOnDiskId() == kInvalidDescriptorId) {
77 field.SetOnDiskId(desc.FindFieldId(field.GetName(), field.GetParent()->GetOnDiskId()));
78 }
79 field.ConnectPageSource(*fSource);
80 }
81}
82
84{
85#ifdef R__USE_IMT
86 if (IsImplicitMTEnabled()) {
87 fUnzipTasks = std::make_unique<RNTupleImtTaskScheduler>();
88 fSource->SetTaskScheduler(fUnzipTasks.get());
89 }
90#endif
91 fSource->Attach();
92 fMetrics.ObserveMetrics(fSource->GetMetrics());
93}
94
96 std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
97 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
98 : fSource(std::move(source))
99 , fModel(std::move(model))
100 , fMetrics("RNTupleReader")
101{
102 if (!fSource) {
103 throw RException(R__FAIL("null source"));
104 }
105 if (!fModel) {
106 throw RException(R__FAIL("null model"));
107 }
110}
111
112ROOT::Experimental::RNTupleReader::RNTupleReader(std::unique_ptr<ROOT::Experimental::Detail::RPageSource> source)
113 : fSource(std::move(source))
114 , fModel(nullptr)
115 , fMetrics("RNTupleReader")
116{
117 if (!fSource) {
118 throw RException(R__FAIL("null source"));
119 }
121}
122
124
125std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::Open(
126 std::unique_ptr<RNTupleModel> model,
127 std::string_view ntupleName,
128 std::string_view storage,
129 const RNTupleReadOptions &options)
130{
131 return std::make_unique<RNTupleReader>(std::move(model), Detail::RPageSource::Create(ntupleName, storage, options));
132}
133
134std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::Open(
135 std::string_view ntupleName,
136 std::string_view storage,
137 const RNTupleReadOptions &options)
138{
139 return std::make_unique<RNTupleReader>(Detail::RPageSource::Create(ntupleName, storage, options));
140}
141
142std::unique_ptr<ROOT::Experimental::RNTupleReader> ROOT::Experimental::RNTupleReader::OpenFriends(
143 std::span<ROpenSpec> ntuples)
144{
145 std::vector<std::unique_ptr<Detail::RPageSource>> sources;
146 for (const auto &n : ntuples) {
147 sources.emplace_back(Detail::RPageSource::Create(n.fNTupleName, n.fStorage, n.fOptions));
148 }
149 return std::make_unique<RNTupleReader>(std::make_unique<Detail::RPageSourceFriends>("_friends", sources));
150}
151
153{
154 if (!fModel) {
155 fModel = fSource->GetDescriptor().GenerateModel();
156 ConnectModel(*fModel);
157 }
158 return fModel.get();
159}
160
162{
163 // TODO(lesimon): In a later version, these variables may be defined by the user or the ideal width may be read out from the terminal.
164 char frameSymbol = '*';
165 int width = 80;
166 /*
167 if (width < 30) {
168 output << "The width is too small! Should be at least 30." << std::endl;
169 return;
170 }
171 */
172 std::string name = fSource->GetDescriptor().GetName();
173 switch (what) {
175 for (int i = 0; i < (width/2 + width%2 - 4); ++i)
176 output << frameSymbol;
177 output << " NTUPLE ";
178 for (int i = 0; i < (width/2 - 4); ++i)
179 output << frameSymbol;
180 output << std::endl;
181 // FitString defined in RFieldVisitor.cxx
182 output << frameSymbol << " N-Tuple : " << RNTupleFormatter::FitString(name, width-13) << frameSymbol << std::endl; // prints line with name of ntuple
183 output << frameSymbol << " Entries : " << RNTupleFormatter::FitString(std::to_string(GetNEntries()), width - 13) << frameSymbol << std::endl; // prints line with number of entries
184
185 // Traverses through all fields to gather information needed for printing.
186 RPrepareVisitor prepVisitor;
187 // Traverses through all fields to do the actual printing.
188 RPrintSchemaVisitor printVisitor(output);
189
190 // Note that we do not need to connect the model, we are only looking at its tree of fields
191 auto fullModel = fSource->GetDescriptor().GenerateModel();
192 fullModel->GetFieldZero()->AcceptVisitor(prepVisitor);
193
194 printVisitor.SetFrameSymbol(frameSymbol);
195 printVisitor.SetWidth(width);
196 printVisitor.SetDeepestLevel(prepVisitor.GetDeepestLevel());
197 printVisitor.SetNumFields(prepVisitor.GetNumFields());
198
199 for (int i = 0; i < width; ++i)
200 output << frameSymbol;
201 output << std::endl;
202 fullModel->GetFieldZero()->AcceptVisitor(printVisitor);
203 for (int i = 0; i < width; ++i)
204 output << frameSymbol;
205 output << std::endl;
206 break;
207 }
209 fSource->GetDescriptor().PrintInfo(output);
210 break;
212 fMetrics.Print(output);
213 break;
214 default:
215 // Unhandled case, internal error
216 R__ASSERT(false);
217 }
218}
219
220
222{
223 if (!fDisplayReader)
224 fDisplayReader = Clone();
225 return fDisplayReader.get();
226}
227
228
230{
231 RNTupleReader *reader = this;
232 REntry *entry = nullptr;
233 // Don't accidentally trigger loading of the entire model
234 if (fModel)
235 entry = fModel->GetDefaultEntry();
236
237 switch(format) {
239 reader = GetDisplayReader();
240 entry = reader->GetModel()->GetDefaultEntry();
241 // Fall through
243 if (!entry) {
244 output << "{}" << std::endl;
245 break;
246 }
247
248 reader->LoadEntry(index);
249 output << "{";
250 for (auto iValue = entry->begin(); iValue != entry->end(); ) {
251 output << std::endl;
252 RPrintValueVisitor visitor(*iValue, output, 1 /* level */);
253 iValue->GetField()->AcceptVisitor(visitor);
254
255 if (++iValue == entry->end()) {
256 output << std::endl;
257 break;
258 } else {
259 output << ",";
260 }
261 }
262 output << "}" << std::endl;
263 break;
264 default:
265 // Unhandled case, internal error
266 R__ASSERT(false);
267 }
268}
269
270
271//------------------------------------------------------------------------------
272
273
275 std::unique_ptr<ROOT::Experimental::RNTupleModel> model,
276 std::unique_ptr<ROOT::Experimental::Detail::RPageSink> sink)
277 : fSink(std::move(sink))
278 , fModel(std::move(model))
279 , fMetrics("RNTupleWriter")
280{
281 if (!fModel) {
282 throw RException(R__FAIL("null model"));
283 }
284 if (!fSink) {
285 throw RException(R__FAIL("null sink"));
286 }
287#ifdef R__USE_IMT
288 if (IsImplicitMTEnabled()) {
289 fZipTasks = std::make_unique<RNTupleImtTaskScheduler>();
290 fSink->SetTaskScheduler(fZipTasks.get());
291 }
292#endif
293 fSink->Create(*fModel.get());
294 fMetrics.ObserveMetrics(fSink->GetMetrics());
295
296 const auto &writeOpts = fSink->GetWriteOptions();
297 fMaxUnzippedClusterSize = writeOpts.GetMaxUnzippedClusterSize();
298 // First estimate is a factor 2 compression if compression is used at all
299 const int scale = writeOpts.GetCompression() ? 2 : 1;
300 fUnzippedClusterSizeEst = scale * writeOpts.GetApproxZippedClusterSize();
301}
302
304{
305 CommitCluster();
306 fSink->CommitDataset();
307}
308
309std::unique_ptr<ROOT::Experimental::RNTupleWriter> ROOT::Experimental::RNTupleWriter::Recreate(
310 std::unique_ptr<RNTupleModel> model,
311 std::string_view ntupleName,
312 std::string_view storage,
313 const RNTupleWriteOptions &options)
314{
315 return std::make_unique<RNTupleWriter>(std::move(model), Detail::RPageSink::Create(ntupleName, storage, options));
316}
317
318std::unique_ptr<ROOT::Experimental::RNTupleWriter> ROOT::Experimental::RNTupleWriter::Append(
319 std::unique_ptr<RNTupleModel> model,
320 std::string_view ntupleName,
321 TFile &file,
322 const RNTupleWriteOptions &options)
323{
324 auto sink = std::make_unique<Detail::RPageSinkFile>(ntupleName, file, options);
325 if (options.GetUseBufferedWrite()) {
326 auto bufferedSink = std::make_unique<Detail::RPageSinkBuf>(std::move(sink));
327 return std::make_unique<RNTupleWriter>(std::move(model), std::move(bufferedSink));
328 }
329 return std::make_unique<RNTupleWriter>(std::move(model), std::move(sink));
330}
331
332
334{
335 if (fNEntries == fLastCommitted) return;
336 for (auto& field : *fModel->GetFieldZero()) {
337 field.Flush();
338 field.CommitCluster();
339 }
340 fNBytesCommitted += fSink->CommitCluster(fNEntries);
341 fNBytesFilled += fUnzippedClusterSize;
342
343 // Cap the compression factor at 1000 to prevent overflow of fUnzippedClusterSizeEst
344 const float compressionFactor = std::min(1000.f,
345 static_cast<float>(fNBytesFilled) / static_cast<float>(fNBytesCommitted));
346 fUnzippedClusterSizeEst =
347 compressionFactor * static_cast<float>(fSink->GetWriteOptions().GetApproxZippedClusterSize());
348
349 fLastCommitted = fNEntries;
350 fUnzippedClusterSize = 0;
351}
352
353
354//------------------------------------------------------------------------------
355
356
358 : fOffset(0), fDefaultEntry(std::move(defaultEntry))
359{
360}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:291
include TDocParser_001 C image html pict1_TDocParser_001 png width
#define R__ASSERT(e)
Definition TError.h:118
char name[80]
Definition TGX11.cxx:110
void SetOnDiskId(DescriptorId_t id)
Definition RField.hxx:248
void ObserveMetrics(RNTupleMetrics &observee)
static std::unique_ptr< RPageSink > Create(std::string_view ntupleName, std::string_view location, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Guess the concrete derived page source from the file name (location)
static std::unique_ptr< RPageSource > Create(std::string_view ntupleName, std::string_view location, const RNTupleReadOptions &options=RNTupleReadOptions())
Guess the concrete derived page source from the file name (location)
RCollectionNTupleWriter(std::unique_ptr< REntry > defaultEntry)
Definition RNTuple.cxx:357
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:42
Base class for all ROOT issued exceptions.
Definition RError.hxx:114
static std::string FitString(const std::string &str, int availableSpace)
void Reset() final
Start a new set of tasks.
Definition RNTuple.cxx:48
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition RNTuple.cxx:54
void Wait() final
Blocks until all scheduled tasks finished.
Definition RNTuple.cxx:60
The RNTupleModel encapulates the schema of an ntuple.
RFieldZero * GetFieldZero() const
Common user-tunable settings for reading ntuples.
An RNTuple that is used to read data from storage.
Definition RNTuple.hxx:102
static std::unique_ptr< RNTupleReader > OpenFriends(std::span< ROpenSpec > ntuples)
Open RNTuples as one virtual, horizontally combined ntuple.
Definition RNTuple.cxx:142
RNTupleReader * GetDisplayReader()
Definition RNTuple.cxx:221
static std::unique_ptr< RNTupleReader > Open(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Throws an exception if the model is null.
Definition RNTuple.cxx:125
std::unique_ptr< Detail::RPageSource > fSource
Definition RNTuple.hxx:108
void ConnectModel(const RNTupleModel &model)
Definition RNTuple.cxx:70
RNTupleReader(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSource > source)
The user imposes an ntuple model, which must be compatible with the model found in the data on storag...
Definition RNTuple.cxx:95
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
Definition RNTuple.hxx:237
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout)
Prints a detailed summary of the ntuple, including a list of fields.
Definition RNTuple.cxx:161
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition RNTuple.hxx:110
void Show(NTupleSize_t index, const ENTupleShowFormat format=ENTupleShowFormat::kCurrentModelJSON, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
Definition RNTuple.cxx:229
Common user-tunable settings for storing ntuples.
void CommitCluster()
Ensure that the data from the so far seen Fill calls has been written to storage.
Definition RNTuple.cxx:333
NTupleSize_t fUnzippedClusterSizeEst
Estimator of uncompressed cluster size, taking into account the estimated compression ratio.
Definition RNTuple.hxx:372
RNTupleWriter(std::unique_ptr< RNTupleModel > model, std::unique_ptr< Detail::RPageSink > sink)
Throws an exception if the model or the sink is null.
Definition RNTuple.cxx:274
std::size_t fMaxUnzippedClusterSize
Limit for committing cluster no matter the other tunables.
Definition RNTuple.hxx:370
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition RNTuple.hxx:358
Detail::RNTupleMetrics fMetrics
Definition RNTuple.hxx:359
std::unique_ptr< Detail::RPageSink > fSink
Definition RNTuple.hxx:356
static std::unique_ptr< RNTupleWriter > Recreate(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition RNTuple.cxx:309
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fZipTasks
The page sink's parallel page compression scheduler if IMT is on.
Definition RNTuple.hxx:355
static std::unique_ptr< RNTupleWriter > Append(std::unique_ptr< RNTupleModel > model, std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options=RNTupleWriteOptions())
Throws an exception if the model is null.
Definition RNTuple.cxx:318
Visitor used for a pre-processing run to collect information needed by another visitor class.
Contains settings for printing and prints a summary of an RField instance.
Renders a JSON value corresponding to the field.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
const Int_t n
Definition legend1.C:16
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition RNTuple.hxx:51
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleShowFormat
Listing of the different entry output formats of RNTupleReader::Show()
Definition RNTuple.hxx:60
constexpr DescriptorId_t kInvalidDescriptorId
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:558
Definition file.py:1
static const char * what
Definition stlLoader.cc:6
static void output(int code)
Definition gifencode.c:226