Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFSnapshotHelpers.cxx
Go to the documentation of this file.
1/**
2 \file RDFSnapshotHelpers.cxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \author Stephan Hageboeck
9 \date 2025-06
10*/
11
12/*************************************************************************
13 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
14 * All rights reserved. *
15 * *
16 * For the licensing terms see $ROOTSYS/LICENSE. *
17 * For the list of contributors see $ROOTSYS/README/CREDITS. *
18 *************************************************************************/
19
21
22#include <ROOT/REntry.hxx>
23#include <ROOT/RNTuple.hxx>
24#include <ROOT/RNTupleDS.hxx>
26#include <ROOT/RTTreeDS.hxx>
28
29#include <TBranchObject.h>
30#include <TClassEdit.h>
31#include <TDictionary.h>
32#include <TDataType.h>
33#include <TFile.h>
34#include <TLeaf.h>
35#include <TTreeReader.h>
36
37namespace {
38
40{
41 if (inputTree) {
42 if (auto *getBranchRes = inputTree->GetBranch(branchName.c_str()))
43 return getBranchRes;
44
45 // try harder
46 if (auto *findBranchRes = inputTree->FindBranch(branchName.c_str()))
47 return findBranchRes;
48 }
49 return nullptr;
50}
51
53 const std::string &outputBranchName, int basketSize, void *address)
54{
55 if (!inputBranch)
56 return;
57 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
58 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
59 return;
60 // must construct the leaflist for the output branch and create the branch in the output tree
61 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
62 if (!leaf)
63 return;
64 const auto bname = leaf->GetName();
65 auto *sizeLeaf = leaf->GetLeafCount();
66 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
67
68 // We proceed only if branch is a fixed-or-variable-sized array
69 if (sizeLeaf || leaf->GetLenStatic() > 1) {
71 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
72 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
73 // branch to be successful. So we create the size leaf here.
75 // Use Original basket size for Existing Branches otherwise use Custom basket Size.
76 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
77 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
78 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
79 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
81 }
82
83 const auto btype = leaf->GetTypeName();
85 if (rootbtype == ' ') {
86 Warning("Snapshot",
87 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
88 "leaf is of type '%s'. This column will not be written out.",
89 bname, btype);
90 return;
91 }
92
93 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
94 // Use original basket size for existing branches and new basket size for new branches
95 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
96 void *addressForBranch = [address]() -> void * {
97 if (address) {
98 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we need
99 // its buffer, so we cast it and extract the address of the buffer
100 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(address);
101 return rawRVec->data();
102 }
103 return nullptr;
104 }();
105 auto *outputBranch = outputTree.Branch(outputBranchName.c_str(), addressForBranch, leaflist.c_str(), bufSize);
106 outputBranch->SetTitle(inputBranch->GetTitle());
108 }
109}
110
111void SetBranchAddress(TBranch *inputBranch, TBranch &outputBranch, void *&outputBranchAddress, bool isCArray,
112 void *valueAddress)
113{
114 const static TClassRef TBOClRef("TBranchObject");
115 if (inputBranch && inputBranch->IsA() == TBOClRef) {
116 outputBranch.SetAddress(reinterpret_cast<void **>(inputBranch->GetAddress()));
117 } else if (outputBranch.IsA() != TBranch::Class()) {
120 } else {
121 void *correctAddress = [valueAddress, isCArray]() -> void * {
122 if (isCArray) {
123 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
124 // need its buffer, so we cast it and extract the address of the buffer
125 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
126 return rawRVec->data();
127 }
128 return valueAddress;
129 }();
130 outputBranch.SetAddress(correctAddress);
132 }
133}
134
137 int bufSize)
138{
139 // Logic taken from
140 // TTree::BranchImpRef(
141 // const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize, Int_t splitlevel)
143 if (rootTypeChar == ' ') {
144 Warning("Snapshot",
145 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
146 "column will not be written out.",
147 outputBranchName.c_str());
148 return;
149 }
150 std::string leafList{outputBranchName + '/' + rootTypeChar};
151 auto *outputBranch = outputTree.Branch(outputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize);
153}
154
155/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the
156/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case
157/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
159 const std::string &fileName)
160{
161 TString fileMode = opts.fMode;
162 fileMode.ToLower();
163 if (fileMode != "update")
164 return;
165
166 // output file opened in "update" mode: must check whether output TTree is already present in file
167 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
168 if (!outFile || outFile->IsZombie())
169 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
170
171 TObject *outTree = outFile->Get(treeName.c_str());
172 if (outTree == nullptr)
173 return;
174
175 // object called treeName is already present in the file
176 if (opts.fOverwriteIfExists) {
177 if (outTree->InheritsFrom("TTree")) {
178 static_cast<TTree *>(outTree)->Delete("all");
179 } else {
180 outFile->Delete(treeName.c_str());
181 }
182 } else {
183 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
184 "\". If you want to delete the original tree and write another, please set "
185 "RSnapshotOptions::fOverwriteIfExists to true.";
186 throw std::invalid_argument(msg);
187 }
188}
189
190/// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the
191/// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case
192/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
194 const std::string &fileName)
195{
196 TString fileMode = opts.fMode;
197 fileMode.ToLower();
198 if (fileMode != "update")
199 return;
200
201 // output file opened in "update" mode: must check whether output RNTuple is already present in file
202 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
203 if (!outFile || outFile->IsZombie())
204 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
205
206 auto *outNTuple = outFile->Get<ROOT::RNTuple>(ntupleName.c_str());
207
208 if (outNTuple) {
209 if (opts.fOverwriteIfExists) {
210 outFile->Delete((ntupleName + ";*").c_str());
211 return;
212 } else {
213 const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName +
214 "\". If you want to delete the original ntuple and write another, please set "
215 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
216 throw std::invalid_argument(msg);
217 }
218 }
219
220 // Also check if there is any object other than an RNTuple with the provided ntupleName.
221 TObject *outObj = outFile->Get(ntupleName.c_str());
222
223 if (!outObj)
224 return;
225
226 // An object called ntupleName is already present in the file.
227 if (opts.fOverwriteIfExists) {
228 if (auto tree = dynamic_cast<TTree *>(outObj)) {
229 tree->Delete("all");
230 } else {
231 outFile->Delete((ntupleName + ";*").c_str());
232 }
233 } else {
234 const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName +
235 "\". If you want to delete the original object and write a new RNTuple, please set "
236 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
237 throw std::invalid_argument(msg);
238 }
239}
240
242 int basketSize, const std::string &inputBranchName, const std::string &outputBranchName,
243 const std::type_info &valueTypeID, void *valueAddress, TBranch *&actionHelperBranchPtr,
245{
246
248
249 // Respect the original bufsize and splitlevel arguments
250 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
251 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
252 // TObject branch, see https://bit.ly/2EjLMId ).
253 // A user-provided basket size value takes precedence.
254 const auto bufSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
255 const auto splitLevel = inputBranch ? inputBranch->GetSplitLevel() : 99;
256
258 // The output branch was already created, we just need to (re)set its address
261 return;
262 }
263
265 if (dynamic_cast<TDataType *>(dictionary)) {
266 // Branch of fundamental type
268 return;
269 }
270
271 if (!isDefine) {
272 // Cases where we need a leaflist (e.g. C-style arrays)
273 // We only enter this code path if the input value does not come from a Define/Redefine. In those cases, it is
274 // not allowed to create a column of C-style array type, so that can't happen when writing the TTree. This is
275 // currently what prevents writing the wrong branch output type in a scenario where the input branch of the TTree
276 // is a C-style array and then the user is Redefining it with some other type (e.g. a ROOT::RVec).
278 }
280 // A branch was created in the previous function call
282 if (valueAddress) {
283 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
284 // need its buffer, so we cast it and extract the address of the buffer
285 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
287 }
288 return;
289 }
290
291 if (auto *classPtr = dynamic_cast<TClass *>(dictionary)) {
293 // Case of unsplit object with polymorphic type
294 if (inputBranch && dynamic_cast<TBranchObject *>(inputBranch) && valueAddress)
296 inputBranch->GetAddress(), bufSize, splitLevel);
297 // General case, with valid address
298 else if (valueAddress)
302 // No value was passed, we're just creating a hollow branch to populate the dataset schema
303 else
304 outputBranch = outputTree.Branch(outputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
306 return;
307 }
308
309 // We are not aware of other cases
310 throw std::logic_error(
311 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
312}
313} // namespace
314
316{
317 auto it = std::find(fNames.begin(), fNames.end(), name);
318 if (it == fNames.end())
319 return nullptr;
320 return fBranches[std::distance(fNames.begin(), it)];
321}
322
324{
325 if (auto it = std::find(fNames.begin(), fNames.end(), name); it != fNames.end())
326 return fIsCArray[std::distance(fNames.begin(), it)];
327 return false;
328}
329
330void ROOT::Internal::RDF::RBranchSet::Insert(const std::string &name, TBranch *address, bool isCArray)
331{
332 if (address == nullptr) {
333 throw std::logic_error("Trying to insert a null branch address.");
334 }
335 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
336 throw std::logic_error("Trying to insert a branch address that's already present.");
337 }
338 if (std::find(fNames.begin(), fNames.end(), name) != fNames.end()) {
339 throw std::logic_error("Trying to insert a branch name that's already present.");
340 }
341 fNames.emplace_back(name);
342 fBranches.emplace_back(address);
343 fIsCArray.push_back(isCArray);
344}
345
347{
348 fBranches.clear();
349 fNames.clear();
350 fIsCArray.clear();
351}
352
354{
355 std::vector<TBranch *> branchesWithNullAddress;
356 std::copy_if(fBranches.begin(), fBranches.end(), std::back_inserter(branchesWithNullAddress),
357 [](TBranch *b) { return b->GetAddress() == nullptr; });
358
359 if (branchesWithNullAddress.empty())
360 return;
361
362 // otherwise build error message and throw
363 std::vector<std::string> missingBranchNames;
365 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
366 std::string msg = "RDataFrame::Snapshot:";
367 if (missingBranchNames.size() == 1) {
368 msg += " branch " + missingBranchNames[0] +
369 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
370 "it is";
371 } else {
372 msg += " branches ";
373 for (const auto &bName : missingBranchNames)
374 msg += bName + ", ";
375 msg.resize(msg.size() - 2); // remove last ", "
376 msg += " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
377 }
378 msg += " not part of the set of branches that are being written out.";
379 throw std::runtime_error(msg);
380}
381
383 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
384 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
386 const std::vector<const std::type_info *> &colTypeIDs)
387 : fFileName(filename),
388 fDirName(dirname),
389 fTreeName(treename),
390 fOptions(options),
391 fInputBranchNames(vbnames),
392 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)),
393 fBranches(vbnames.size(), nullptr),
394 fBranchAddresses(vbnames.size(), nullptr),
395 fIsDefine(std::move(isDefine)),
396 fOutputLoopManager(loopManager),
397 fInputLoopManager(inputLM),
398 fInputColumnTypeIDs(colTypeIDs)
399{
401}
402
403// Define special member methods here where the definition of all the data member types is available
407 ROOT::Internal::RDF::UntypedSnapshotTTreeHelper &&) noexcept = default;
408
410{
411 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy) {
412 const auto fileOpenMode = [&]() {
413 TString checkupdate = fOptions.fMode;
414 checkupdate.ToLower();
415 return checkupdate == "update" ? "updated" : "created";
416 }();
417 Warning("Snapshot",
418 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
419 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
420 "its result in a variable and for example calling the GetValue() method on it.",
421 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
422 }
423}
424
426{
427 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
428 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
429 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
430 fInputTree = treeDS->GetTree();
431 fBranchAddressesNeedReset = true;
432}
433
434void ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::Exec(unsigned int, const std::vector<void *> &values)
435{
436 if (!fBranchAddressesNeedReset) {
437 UpdateCArraysPtrs(values);
438 } else {
439 SetBranches(values);
440 fBranchAddressesNeedReset = false;
441 }
442
443 fOutputTree->Fill();
444}
445
447{
448 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
449 // associated to those is re-allocated. As a result the value of the pointer can change therewith
450 // leaving associated to the branch of the output tree an invalid pointer.
451 // With this code, we set the value of the pointer in the output branch anew when needed.
452 assert(values.size() == fBranches.size());
453 auto nValues = values.size();
454 for (decltype(nValues) i{}; i < nValues; i++) {
455 if (fBranches[i] && fOutputBranches.IsCArray(fOutputBranchNames[i])) {
456 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
457 // need its buffer, so we cast it and extract the address of the buffer
458 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
459 if (auto *data = rawRVec->data(); fBranchAddresses[i] != data) {
460 // reset the branch address
461 fBranches[i]->SetAddress(data);
462 fBranchAddresses[i] = data;
463 }
464 }
465 }
466}
467
469{
470 // create branches in output tree
471 auto nValues = values.size();
472 for (decltype(nValues) i{}; i < nValues; i++) {
473 SetBranchesHelper(fInputTree, *fOutputTree, fOutputBranches, fOptions.fBasketSize, fInputBranchNames[i],
474 fOutputBranchNames[i], *fInputColumnTypeIDs[i], values[i], fBranches[i], fBranchAddresses[i],
475 fIsDefine[i]);
476 }
477 fOutputBranches.AssertNoNullBranchAddresses();
478}
479
481{
482 void *dummyValueAddress{};
484 void *dummyTBranchAddress{};
486 auto nBranches = fInputBranchNames.size();
487 for (decltype(nBranches) i{}; i < nBranches; i++) {
488 SetBranchesHelper(inputTree, outputTree, outputBranches, fOptions.fBasketSize, fInputBranchNames[i],
489 fOutputBranchNames[i], *fInputColumnTypeIDs[i], dummyValueAddress, dummyTBranchPtr,
490 dummyTBranchAddress, fIsDefine[i]);
491 }
492}
493
495{
496 fOutputFile.reset(
497 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
498 ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel)));
499 if (!fOutputFile)
500 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
501
502 TDirectory *outputDir = fOutputFile.get();
503 if (!fDirName.empty()) {
504 TString checkupdate = fOptions.fMode;
505 checkupdate.ToLower();
506 if (checkupdate == "update")
507 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
508 else
509 outputDir = fOutputFile->mkdir(fDirName.c_str());
510 }
511
512 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
513
514 if (fOptions.fAutoFlush)
515 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
516}
517
519{
520 assert(fOutputTree != nullptr);
521 assert(fOutputFile != nullptr);
522
523 // There were no entries to fill the TTree with (either the input TTree was empty or no event passed after
524 // filtering). We have already created an empty TTree, now also create the branches to preserve the schema
525 if (fOutputTree->GetEntries() == 0) {
526 SetEmptyBranches(fInputTree, *fOutputTree);
527 }
528 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
529 fOutputTree->AutoSave("flushbaskets");
530 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
531 fOutputTree.reset();
532 fOutputFile->Close();
533
534 // Now connect the data source to the loop manager so it can be used for further processing
535 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
536 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
537}
538
539/**
540 * \brief Create a new UntypedSnapshotTTreeHelper with a different output file name
541 *
542 * \param newName A type-erased string with the output file name
543 * \return UntypedSnapshotTTreeHelper
544 *
545 * This MakeNew implementation is tied to the cloning feature of actions
546 * of the computation graph. In particular, cloning a Snapshot node usually
547 * also involves changing the name of the output file, otherwise the cloned
548 * Snapshot would overwrite the same file.
549 */
552{
553 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
555 fDirName,
556 fTreeName,
557 fInputBranchNames,
558 fOutputBranchNames,
559 fOptions,
560 std::vector<bool>(fIsDefine),
561 fOutputLoopManager,
562 fInputLoopManager,
563 fInputColumnTypeIDs};
564}
565
567 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename,
568 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
570 const std::vector<const std::type_info *> &colTypeIDs)
571 : fNSlots(nSlots),
572 fOutputFiles(fNSlots),
573 fOutputTrees(fNSlots),
574 fBranchAddressesNeedReset(fNSlots, 1),
575 fInputTrees(fNSlots),
576 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
577 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr)),
578 fOutputBranches(fNSlots),
579 fFileName(filename),
580 fDirName(dirname),
581 fTreeName(treename),
582 fOptions(options),
583 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)),
584 fOutputLoopManager(loopManager),
585 fInputLoopManager(inputLM),
586 fInputBranchNames(vbnames),
587 fInputColumnTypeIDs(colTypeIDs),
588 fIsDefine(std::move(isDefine))
589{
591}
592
593// Define special member methods here where the definition of all the data member types is available
598
600{
601 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
602 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */) {
603 const auto fileOpenMode = [&]() {
604 TString checkupdate = fOptions.fMode;
605 checkupdate.ToLower();
606 return checkupdate == "update" ? "updated" : "created";
607 }();
608 Warning("Snapshot",
609 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
610 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
611 "its result in a variable and for example calling the GetValue() method on it.",
612 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
613 }
614}
615
617{
618 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
619 if (!fOutputFiles[slot]) {
620 // first time this thread executes something, let's create a TBufferMerger output directory
621 fOutputFiles[slot] = fMerger->GetFile();
622 }
623 TDirectory *treeDirectory = fOutputFiles[slot].get();
624 if (!fDirName.empty()) {
625 // call returnExistingDirectory=true since MT can end up making this call multiple times
626 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
627 }
628 // re-create output tree as we need to create its branches again, with new input variables
629 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
630 fOutputTrees[slot] =
631 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
632 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
633 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
634 fOutputTrees[slot]->SetImplicitMT(false);
635 if (fOptions.fAutoFlush)
636 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
637 if (r) {
638 // We could be getting a task-local TTreeReader from the TTreeProcessorMT.
639 fInputTrees[slot] = r->GetTree();
640 } else if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource())) {
641 fInputTrees[slot] = treeDS->GetTree();
642 }
643 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
644}
645
647{
648 if (fOutputTrees[slot]->GetEntries() > 0)
649 fOutputFiles[slot]->Write();
650 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
651 fOutputTrees[slot].reset(nullptr);
652 fOutputBranches[slot].Clear();
653}
654
655void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Exec(unsigned int slot, const std::vector<void *> &values)
656{
657 if (fBranchAddressesNeedReset[slot] == 0) {
658 UpdateCArraysPtrs(slot, values);
659 } else {
660 SetBranches(slot, values);
661 fBranchAddressesNeedReset[slot] = 0;
662 }
663 fOutputTrees[slot]->Fill();
664 auto entries = fOutputTrees[slot]->GetEntries();
665 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
666 if ((autoFlush > 0) && (entries % autoFlush == 0))
667 fOutputFiles[slot]->Write();
668}
669
671 const std::vector<void *> &values)
672{
673 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
674 // associated to those is re-allocated. As a result the value of the pointer can change therewith
675 // leaving associated to the branch of the output tree an invalid pointer.
676 // With this code, we set the value of the pointer in the output branch anew when needed.
677 assert(values.size() == fBranches[slot].size());
678 auto nValues = values.size();
679 for (decltype(nValues) i{}; i < nValues; i++) {
680 if (fBranches[slot][i] && fOutputBranches[slot].IsCArray(fOutputBranchNames[i])) {
681 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
682 // need its buffer, so we cast it and extract the address of the buffer
683 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
684 if (auto *data = rawRVec->data(); fBranchAddresses[slot][i] != data) {
685 // reset the branch address
686 fBranches[slot][i]->SetAddress(data);
687 fBranchAddresses[slot][i] = data;
688 }
689 }
690 }
691}
692
694 const std::vector<void *> &values)
695{
696 // create branches in output tree
697 auto nValues = values.size();
698 for (decltype(nValues) i{}; i < nValues; i++) {
699 SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fOutputBranches[slot], fOptions.fBasketSize,
700 fInputBranchNames[i], fOutputBranchNames[i], *fInputColumnTypeIDs[i], values[i],
701 fBranches[slot][i], fBranchAddresses[slot][i], fIsDefine[i]);
702 }
703 fOutputBranches[slot].AssertNoNullBranchAddresses();
704}
705
707{
708 void *dummyValueAddress{};
710 void *dummyTBranchAddress{};
712 auto nBranches = fInputBranchNames.size();
713 for (decltype(nBranches) i{}; i < nBranches; i++) {
714 SetBranchesHelper(inputTree, outputTree, outputBranches, fOptions.fBasketSize, fInputBranchNames[i],
715 fOutputBranchNames[i], *fInputColumnTypeIDs[i], dummyValueAddress, dummyTBranchPtr,
716 dummyTBranchAddress, fIsDefine[i]);
717 }
718}
719
721{
722 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
723 auto outFile =
724 std::unique_ptr<TFile>{TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs)};
725 if (!outFile)
726 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
727 fOutputFile = outFile.get();
728 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(outFile));
729}
730
732{
733
734 for (auto &file : fOutputFiles) {
735 if (file) {
736 file->Write();
737 file->Close();
738 }
739 }
740
741 // If there were no entries to fill the TTree with (either the input TTree was empty or no event passed after
742 // filtering), create an empty TTree in the output file and create the branches to preserve the schema
743 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
744 assert(fOutputFile && "Missing output file in Snapshot finalization.");
745 if (!fOutputFile->Get(fullTreeName.c_str())) {
746
747 // First find in which directory we need to write the output TTree
748 TDirectory *treeDirectory = fOutputFile;
749 if (!fDirName.empty()) {
750 treeDirectory = fOutputFile->mkdir(fDirName.c_str(), "", true);
751 }
753
754 // Create the output TTree and create the user-requested branches
755 auto outTree =
756 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
757 TTree *inputTree{};
758 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
759 inputTree = treeDS->GetTree();
760 SetEmptyBranches(inputTree, *outTree);
761
762 fOutputFile->Write();
763 }
764
765 // flush all buffers to disk by destroying the TBufferMerger
766 fOutputFiles.clear();
767 fMerger.reset();
768
769 // Now connect the data source to the loop manager so it can be used for further processing
770 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
771}
772
773/**
774 * \brief Create a new UntypedSnapshotTTreeHelperMT with a different output file name
775 *
776 * \param newName A type-erased string with the output file name
777 * \return UntypedSnapshotTTreeHelperMT
778 *
779 * This MakeNew implementation is tied to the cloning feature of actions
780 * of the computation graph. In particular, cloning a Snapshot node usually
781 * also involves changing the name of the output file, otherwise the cloned
782 * Snapshot would overwrite the same file.
783 */
786{
787 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
789 finalName,
790 fDirName,
791 fTreeName,
792 fInputBranchNames,
793 fOutputBranchNames,
794 fOptions,
795 std::vector<bool>(fIsDefine),
796 fOutputLoopManager,
797 fInputLoopManager,
798 fInputColumnTypeIDs};
799}
800
802 std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames,
805 const std::vector<const std::type_info *> &colTypeIDs)
806 : fFileName(filename),
807 fDirName(dirname),
808 fNTupleName(ntuplename),
809 fOutputFile(nullptr),
810 fOptions(options),
811 fInputLoopManager(inputLM),
812 fOutputLoopManager(outputLM),
813 fInputFieldNames(vfnames),
814 fOutputFieldNames(ReplaceDotWithUnderscore(fnames)),
815 fWriter(nullptr),
816 fOutputEntry(nullptr),
817 fIsDefine(std::move(isDefine)),
818 fInputColumnTypeIDs(colTypeIDs)
819{
821}
822
823// Define special member methods here where the definition of all the data member types is available
828
830{
831 if (!fNTupleName.empty() && !fOutputLoopManager->GetDataSource() && fOptions.fLazy)
832 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
833}
834
835void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Exec(unsigned int /* slot */, const std::vector<void *> &values)
836{
837 assert(values.size() == fOutputFieldNames.size());
838 for (decltype(values.size()) i = 0; i < values.size(); i++) {
839 fOutputEntry->BindRawPtr(fOutputFieldNames[i], values[i]);
840 }
841 fWriter->Fill();
842}
843
845{
846 auto model = ROOT::RNTupleModel::Create();
847 auto nFields = fOutputFieldNames.size();
848 for (decltype(nFields) i = 0; i < nFields; i++) {
849 // Need to retrieve the type of every field to create as a string
850 // If the input type for a field does not have RTTI, internally we store it as the tag UseNativeDataType. When
851 // that is detected, we need to ask the data source which is the type name based on the on-disk information.
852 const auto typeName = *fInputColumnTypeIDs[i] == typeid(ROOT::Internal::RDF::UseNativeDataType)
853 ? ROOT::Internal::RDF::GetTypeNameWithOpts(*fInputLoopManager->GetDataSource(),
854 fInputFieldNames[i], fOptions.fVector2RVec)
855 : ROOT::Internal::RDF::TypeID2TypeName(*fInputColumnTypeIDs[i]);
856 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], typeName).Unwrap());
857 }
858 fOutputEntry = &model->GetDefaultEntry();
859
861 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
862
863 fOutputFile.reset(TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
864 if (!fOutputFile)
865 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
866
867 TDirectory *outputDir = fOutputFile.get();
868 if (!fDirName.empty()) {
869 TString checkupdate = fOptions.fMode;
870 checkupdate.ToLower();
871 if (checkupdate == "update")
872 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
873 else
874 outputDir = fOutputFile->mkdir(fDirName.c_str());
875 }
876
877 fWriter = ROOT::RNTupleWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
878}
879
881{
882 fWriter.reset();
883 // We can now set the data source of the loop manager for the RDataFrame that is returned by the Snapshot call.
884 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName + "/" + fNTupleName, fFileName));
885}
886
887/**
888 * Create a new UntypedSnapshotRNTupleHelper with a different output file name.
889 *
890 * \param[in] newName A type-erased string with the output file name
891 * \return UntypedSnapshotRNTupleHelper
892 *
893 * This MakeNew implementation is tied to the cloning feature of actions
894 * of the computation graph. In particular, cloning a Snapshot node usually
895 * also involves changing the name of the output file, otherwise the cloned
896 * Snapshot would overwrite the same file.
897 */
900{
901 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
902 return UntypedSnapshotRNTupleHelper{finalName, fDirName, fNTupleName,
903 fInputFieldNames, fOutputFieldNames, fOptions,
904 fInputLoopManager, fOutputLoopManager, std::vector<bool>(fIsDefine),
905 fInputColumnTypeIDs};
906}
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
char name[80]
Definition TGX11.cxx:110
static TBranch * SearchForBranch(TTree *tree, const char *name)
Definition TTreePyz.cxx:50
The head node of a RDF computation graph.
void Insert(const std::string &name, TBranch *address, bool isCArray=false)
std::vector< TBranch * > fBranches
std::vector< std::string > fNames
bool IsCArray(const std::string &name) const
TBranch * Get(const std::string &name) const
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
UntypedSnapshotRNTupleHelper(std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, std::vector< bool > &&isDefine, const std::vector< const std::type_info * > &colTypeIDs)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
void InitTask(TTreeReader *r, unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > Create()
Common user-tunable settings for storing RNTuples.
static std::unique_ptr< RNTupleWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Creates an RNTupleWriter that writes into an existing TFile or TDirectory, without overwriting its co...
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1525
A Branch for the case of an object.
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
Basic data type descriptor (datatype information is obtained from CINT).
Definition TDataType.h:44
Int_t GetType() const
Definition TDataType.h:68
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3765
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Mother of all ROOT objects.
Definition TObject.h:41
Basic string class.
Definition TString.h:138
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:295
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:397
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:342
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:627
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:206
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10087
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10081
std::vector< std::string > ColumnNames_t
Namespace for new ROOT classes and functions.
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:344
A collection of options to steer the creation of the dataset on file.