Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFSnapshotHelpers.cxx
Go to the documentation of this file.
1/**
2 \file RDFSnapshotHelpers.cxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \author Stephan Hageboeck
9 \date 2025-06
10*/
11
12/*************************************************************************
13 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
14 * All rights reserved. *
15 * *
16 * For the licensing terms see $ROOTSYS/LICENSE. *
17 * For the list of contributors see $ROOTSYS/README/CREDITS. *
18 *************************************************************************/
19
21
22#include <ROOT/REntry.hxx>
23#include <ROOT/RFieldToken.hxx>
24#include <ROOT/RNTuple.hxx>
25#include <ROOT/RNTupleDS.hxx>
28#include <ROOT/RTTreeDS.hxx>
30
31#include <TBranchObject.h>
32#include <TClassEdit.h>
33#include <TDictionary.h>
34#include <TDataType.h>
35#include <TFile.h>
36#include <TLeaf.h>
37#include <TTreeReader.h>
38
39#include <algorithm>
40#include <type_traits>
41#include <utility>
42
44// Maintaining the following allows for faster vector resize:
45static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
46static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
47
48namespace {
49
50void AssertNoNullBranchAddresses(const std::vector<RBranchData> &branches)
51{
52 std::vector<TBranch *> branchesWithNullAddress;
53 for (const auto &branchData : branches) {
54 if (branchData.fOutputBranch->GetAddress() == nullptr)
55 branchesWithNullAddress.push_back(branchData.fOutputBranch);
56 }
57
58 if (branchesWithNullAddress.empty())
59 return;
60
61 // otherwise build error message and throw
62 std::vector<std::string> missingBranchNames;
64 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
65 std::string msg = "RDataFrame::Snapshot:";
66 if (missingBranchNames.size() == 1) {
67 msg += " branch " + missingBranchNames[0] +
68 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
69 "it is";
70 } else {
71 msg += " branches ";
72 for (const auto &bName : missingBranchNames)
73 msg += bName + ", ";
74 msg.resize(msg.size() - 2); // remove last ", "
75 msg += " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
76 }
77 msg += " not part of the set of branches that are being written out.";
78 throw std::runtime_error(msg);
79}
80
81TBranch *SearchForBranch(TTree *inputTree, const std::string &branchName)
82{
83 if (inputTree) {
84 if (auto *getBranchRes = inputTree->GetBranch(branchName.c_str()))
85 return getBranchRes;
86
87 // try harder
88 if (auto *findBranchRes = inputTree->FindBranch(branchName.c_str()))
89 return findBranchRes;
90 }
91 return nullptr;
92}
93
94std::vector<RBranchData>::iterator CreateCStyleArrayBranch(TTree &outputTree, std::vector<RBranchData> &outputBranches,
95 std::vector<RBranchData>::iterator thisBranch,
96 TBranch *inputBranch, int basketSize, void *address)
97{
98 if (!inputBranch)
99 return thisBranch;
100 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
101 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
102 return thisBranch;
103 // must construct the leaflist for the output branch and create the branch in the output tree
104 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
105 if (!leaf)
106 return thisBranch;
107 const auto bname = leaf->GetName();
108 auto *sizeLeaf = leaf->GetLeafCount();
109 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
110
111 // We proceed only if branch is a fixed-or-variable-sized array
112 if (sizeLeaf || leaf->GetLenStatic() > 1) {
113 if (sizeLeaf) {
114 // The array branch `bname` has dynamic size stored in leaf `sizeLeafName`, so we need to ensure that it's
115 // in the output tree.
116 auto sizeLeafIt =
117 std::find_if(outputBranches.begin(), outputBranches.end(),
118 [&sizeLeafName](const RBranchData &bd) { return bd.fOutputBranchName == sizeLeafName; });
119 if (sizeLeafIt == outputBranches.end()) {
120 // The size leaf is not part of the output branches yet, so emplace an empty slot for it.
121 // This means that iterators need to be updated in case the container reallocates.
122 const auto indexBeforeEmplace = std::distance(outputBranches.begin(), thisBranch);
123 outputBranches.emplace_back("", sizeLeafName, /*isDefine=*/false, /*typeID=*/nullptr);
126 }
127 if (!sizeLeafIt->fOutputBranch) {
128 // The size leaf was emplaced, but not initialised yet
130 // Use original basket size for existing branches otherwise use custom basket size.
131 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
132 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for
133 // `sizeLeafName`
134 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
135 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
136 sizeLeafIt->fOutputBranch = outputBranch;
137 }
138 }
139
140 const auto btype = leaf->GetTypeName();
142 if (rootbtype == ' ') {
143 Warning("Snapshot",
144 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
145 "leaf is of type '%s'. This column will not be written out.",
146 bname, btype);
147 return thisBranch;
148 }
149
150 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
151 // Use original basket size for existing branches and new basket size for new branches
152 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
153 void *addressForBranch = [address]() -> void * {
154 if (address) {
155 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we need
156 // its buffer, so we cast it and extract the address of the buffer
157 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(address);
158 return rawRVec->data();
159 }
160 return nullptr;
161 }();
162 thisBranch->fOutputBranch =
163 outputTree.Branch(thisBranch->fOutputBranchName.c_str(), addressForBranch, leaflist.c_str(), bufSize);
164 thisBranch->fOutputBranch->SetTitle(inputBranch->GetTitle());
165 thisBranch->fIsCArray = true;
166 }
167
168 return thisBranch;
169}
170
171void SetBranchAddress(TBranch *inputBranch, RBranchData &branchData, void *valueAddress)
172{
173 const static TClassRef TBOClRef("TBranchObject");
174 if (inputBranch && inputBranch->IsA() == TBOClRef) {
175 branchData.fOutputBranch->SetAddress(reinterpret_cast<void **>(inputBranch->GetAddress()));
176 } else if (branchData.fOutputBranch->IsA() != TBranch::Class()) {
177 // This is a relatively rare case of a fixed-size array getting redefined
178 branchData.fBranchAddressForCArrays = valueAddress;
179 branchData.fOutputBranch->SetAddress(&branchData.fBranchAddressForCArrays);
180 } else {
181 void *correctAddress = [valueAddress, isCArray = branchData.fIsCArray]() -> void * {
182 if (isCArray) {
183 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
184 // need its buffer, so we cast it and extract the address of the buffer
185 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
186 return rawRVec->data();
187 }
188 return valueAddress;
189 }();
190 branchData.fOutputBranch->SetAddress(correctAddress);
191 branchData.fBranchAddressForCArrays = valueAddress;
192 }
193}
194
196{
197 // Logic taken from
198 // TTree::BranchImpRef(
199 // const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize, Int_t splitlevel)
201 if (rootTypeChar == ' ') {
202 Warning("Snapshot",
203 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
204 "column will not be written out.",
205 bd.fOutputBranchName.c_str());
206 return;
207 }
208 std::string leafList{bd.fOutputBranchName + '/' + rootTypeChar};
209 bd.fOutputBranch = outputTree.Branch(bd.fOutputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize);
210}
211
212/// Ensure that an object with the input name can be written to the target file. This means checking that the
213/// TFile can be opened in the mode specified in `opts`, deleting any pre-existing objects with the same name in case
214/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
216 const std::string &fileName)
217{
218 TString fileMode = opts.fMode;
219 fileMode.ToLower();
220 if (fileMode != "update")
221 return;
222
223 // output file opened in "update" mode: must check whether target object name is already present in file
224 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
225 if (!outFile || outFile->IsZombie())
226 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
227
228 // Object is not present in the file, we are good
229 if (!outFile->GetKey(objName.c_str()))
230 return;
231
232 // object called objName is already present in the file
233 if (opts.fOverwriteIfExists) {
234 if (auto existingTree = outFile->Get<TTree>(objName.c_str()); existingTree) {
235 // Special case for TTree. TTree::Delete invalidates the 'this' pointer, so we don't wrap it in a
236 // std::unique_ptr.
237 existingTree->Delete("all");
238 } else {
239 // Ensure deletion of object and all its cycles.
240 outFile->Delete((objName + ";*").c_str());
241 }
242 } else {
243 const std::string msg = "Snapshot: object \"" + objName + "\" already present in file \"" + fileName +
244 "\". If you want to delete the original object and write another, please set the "
245 "'fOverwriteIfExists' option to true in RSnapshotOptions.";
246 throw std::invalid_argument(msg);
247 }
248}
249
251 std::vector<ROOT::Internal::RDF::RBranchData> &allBranchData, std::size_t currentIndex,
252 int basketSize, void *valueAddress)
253{
255 auto *inputBranch = branchData->fIsDefine ? nullptr : SearchForBranch(inputTree, branchData->fInputBranchName);
256
257 if (branchData->fOutputBranch && valueAddress) {
258 // The output branch was already created, we just need to (re)set its address
259 SetBranchAddress(inputBranch, *branchData, valueAddress);
260 return;
261 }
262
263 // Respect the original bufsize and splitlevel arguments
264 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
265 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
266 // TObject branch, see https://bit.ly/2EjLMId ).
267 // A user-provided basket size value takes precedence.
268 const auto bufSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
269 const auto splitLevel = inputBranch ? inputBranch->GetSplitLevel() : 99;
270
271 auto *dictionary = TDictionary::GetDictionary(*branchData->fInputTypeID);
272 if (dynamic_cast<TDataType *>(dictionary)) {
273 // Branch of fundamental type
275 return;
276 }
277
278 if (!branchData->fIsDefine) {
279 // Cases where we need a leaflist (e.g. C-style arrays)
280 // We only enter this code path if the input value does not come from a Define/Redefine. In those cases, it is
281 // not allowed to create a column of C-style array type, so that can't happen when writing the TTree. This is
282 // currently what prevents writing the wrong branch output type in a scenario where the input branch of the TTree
283 // is a C-style array and then the user is Redefining it with some other type (e.g. a ROOT::RVec).
285 }
286 if (branchData->fOutputBranch) {
287 // A branch was created in the previous function call
288 if (valueAddress) {
289 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
290 // need its buffer, so we cast it and extract the address of the buffer
291 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
292 branchData->fBranchAddressForCArrays = rawRVec->data();
293 }
294 return;
295 }
296
297 if (auto *classPtr = dynamic_cast<TClass *>(dictionary)) {
298 // Case of unsplit object with polymorphic type
299 if (inputBranch && dynamic_cast<TBranchObject *>(inputBranch) && valueAddress)
300 branchData->fOutputBranch =
302 inputBranch->GetAddress(), bufSize, splitLevel);
303 // General case, with valid address
304 else if (valueAddress)
306 outputTree, branchData->fOutputBranchName.c_str(), classPtr, TDataType::GetType(*branchData->fInputTypeID),
308 // No value was passed, we're just creating a hollow branch to populate the dataset schema
309 else
310 branchData->fOutputBranch =
311 outputTree.Branch(branchData->fOutputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
312 return;
313 }
314
315 // We are not aware of other cases
316 throw std::logic_error(
317 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
318}
319
321{
324
325 if (options.fOutputFormat == OutputFormat::kTTree || options.fOutputFormat == OutputFormat::kDefault) {
326 // The default compression settings for TTree is 101
327 if (options.fCompressionAlgorithm == CompAlgo::kUndefined) {
328 return ROOT::CompressionSettings(CompAlgo::kZLIB, 1);
329 }
331 } else if (options.fOutputFormat == OutputFormat::kRNTuple) {
332 // The default compression settings for RNTuple is 505
333 if (options.fCompressionAlgorithm == CompAlgo::kUndefined) {
334 return ROOT::CompressionSettings(CompAlgo::kZSTD, 5);
335 }
337 } else {
338 throw std::invalid_argument("RDataFrame::Snapshot: unrecognized output format");
339 }
340}
341} // namespace
342
344 const std::type_info *typeID)
345 : fInputBranchName{std::move(inputBranchName)},
346 fOutputBranchName{std::move(outputBranchName)},
347 fInputTypeID{typeID},
348 fIsDefine{isDefine}
349{
351 if (auto datatype = dynamic_cast<TDataType *>(dictionary); datatype) {
353 } else if (auto tclass = dynamic_cast<TClass *>(dictionary); tclass) {
354 fTypeData = EmptyDynamicType{tclass};
355 }
356}
357
358/// @brief Return a pointer to an empty instance of the type represented by this branch.
359/// For fundamental types, this is simply an 8-byte region of zeroes. For classes, it is an instance created with
360/// TClass::New.
361/// @param pointerToPointer Return a pointer to a pointer, so it can be used directly in TTree::SetBranchAddress().
363{
364 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
365 assert(!pointerToPointer); // Not used for fundamental types
366 return fundamental->fBytes.data();
367 }
368
369 auto &dynamic = std::get<EmptyDynamicType>(fTypeData);
370 if (!dynamic.fEmptyInstance) {
371 auto *dictionary = TDictionary::GetDictionary(*fInputTypeID);
372 assert(dynamic_cast<TDataType *>(dictionary) ==
373 nullptr); // TDataType should be handled by writing into the local buffer
374
375 auto tclass = dynamic_cast<TClass *>(dictionary);
376 assert(tclass);
377 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
378 }
379
380 if (pointerToPointer) {
381 // Make TTree happy (needs a pointer to pointer):
382 dynamic.fRawPtrToEmptyInstance = dynamic.fEmptyInstance.get();
383 return &dynamic.fRawPtrToEmptyInstance;
384 } else {
385 return dynamic.fEmptyInstance.get();
386 }
387}
388
389/// Point the branch address to an empty instance of the type represented by this branch
390/// or write null bytes into the space used by the fundamental type.
391/// This is used in case of variations, when certain defines/actions don't execute. We
392/// nevertheless need to write something, so we point the branch to an empty instance.
394{
395 if (!fOutputBranch)
396 return;
397
398 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
399 fundamental->fBytes.fill(std::byte{0});
400 } else {
401 // TTree expects pointer to pointer, to figure out who allocates the object:
402 fOutputBranch->SetAddress(EmptyInstance(/*pointerToPointer=*/true));
403 }
404}
405
407 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
408 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
410 const std::vector<const std::type_info *> &colTypeIDs)
411 : fFileName(filename),
412 fDirName(dirname),
413 fTreeName(treename),
414 fOptions(options),
415 fOutputLoopManager(loopManager),
416 fInputLoopManager(inputLM)
417{
419
421 fBranchData.reserve(vbnames.size());
422 for (unsigned int i = 0; i < vbnames.size(); ++i) {
423 fBranchData.emplace_back(vbnames[i], std::move(outputBranchNames[i]), isDefine[i], colTypeIDs[i]);
424 }
425}
426
427// Define special member methods here where the definition of all the data member types is available
431 ROOT::Internal::RDF::UntypedSnapshotTTreeHelper &&) noexcept = default;
432
434{
435 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy) {
436 const auto fileOpenMode = [&]() {
437 TString checkupdate = fOptions.fMode;
438 checkupdate.ToLower();
439 return checkupdate == "update" ? "updated" : "created";
440 }();
441 Warning("Snapshot",
442 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
443 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
444 "its result in a variable and for example calling the GetValue() method on it.",
445 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
446 }
447}
448
450{
451 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
452 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
453 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
454 fInputTree = treeDS->GetTree();
455 fBranchAddressesNeedReset = true;
456}
457
458void ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::Exec(unsigned int, const std::vector<void *> &values)
459{
460 if (!fBranchAddressesNeedReset) {
461 UpdateCArraysPtrs(values);
462 } else {
463 SetBranches(values);
464 fBranchAddressesNeedReset = false;
465 }
466
467 fOutputTree->Fill();
468}
469
471{
472 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
473 // associated to those is re-allocated. As a result the value of the pointer can change therewith
474 // leaving associated to the branch of the output tree an invalid pointer.
475 // With this code, we set the value of the pointer in the output branch anew when needed.
476 assert(values.size() == fBranchData.size());
477 auto nValues = values.size();
478 for (decltype(nValues) i{}; i < nValues; i++) {
479 if (fBranchData[i].fIsCArray) {
480 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
481 // need its buffer, so we cast it and extract the address of the buffer
482 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
483 if (auto *data = rawRVec->data(); fBranchData[i].fBranchAddressForCArrays != data) {
484 fBranchData[i].fOutputBranch->SetAddress(data);
485 fBranchData[i].fBranchAddressForCArrays = data;
486 }
487 }
488 }
489}
490
492{
493 // create branches in output tree
494 assert(fBranchData.size() == values.size());
495 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
496 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
497 }
498 AssertNoNullBranchAddresses(fBranchData);
499}
500
502{
503 void *dummyValueAddress{};
504 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
505 SetBranchesHelper(inputTree, outputTree, fBranchData, i, fOptions.fBasketSize, dummyValueAddress);
506 }
507}
508
510{
511 fOutputFile.reset(
512 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"", GetSnapshotCompressionSettings(fOptions)));
513 if (!fOutputFile)
514 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
515
516 TDirectory *outputDir = fOutputFile.get();
517 if (!fDirName.empty()) {
518 TString checkupdate = fOptions.fMode;
519 checkupdate.ToLower();
520 if (checkupdate == "update")
521 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
522 else
523 outputDir = fOutputFile->mkdir(fDirName.c_str());
524 }
525
526 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
527
528 if (fOptions.fAutoFlush)
529 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
530}
531
533{
534 assert(fOutputTree != nullptr);
535 assert(fOutputFile != nullptr);
536
537 // There were no entries to fill the TTree with (either the input TTree was empty or no event passed after
538 // filtering). We have already created an empty TTree, now also create the branches to preserve the schema
539 if (fOutputTree->GetEntries() == 0) {
540 SetEmptyBranches(fInputTree, *fOutputTree);
541 }
542 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
543 fOutputTree->AutoSave("flushbaskets");
544 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
545 fOutputTree.reset();
546 fOutputFile->Close();
547
548 // Now connect the data source to the loop manager so it can be used for further processing
549 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
550 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
551}
552
553/**
554 * \brief Create a new UntypedSnapshotTTreeHelper with a different output file name
555 *
556 * \param newName A type-erased string with the output file name
557 * \return UntypedSnapshotTTreeHelper
558 *
559 * This MakeNew implementation is tied to the cloning feature of actions
560 * of the computation graph. In particular, cloning a Snapshot node usually
561 * also involves changing the name of the output file, otherwise the cloned
562 * Snapshot would overwrite the same file.
563 */
566{
567 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
568 std::vector<std::string> inputBranchNames;
569 std::vector<std::string> outputBranchNames;
570 std::vector<bool> isDefine;
571 std::vector<const std::type_info *> inputColumnTypeIDs;
572 for (const auto &bd : fBranchData) {
573 if (bd.fInputBranchName.empty())
574 break;
575 inputBranchNames.push_back(bd.fInputBranchName);
576 outputBranchNames.push_back(bd.fOutputBranchName);
577 isDefine.push_back(bd.fIsDefine);
578 inputColumnTypeIDs.push_back(bd.fInputTypeID);
579 }
580
582 fDirName,
583 fTreeName,
584 std::move(inputBranchNames),
585 std::move(outputBranchNames),
586 fOptions,
587 std::move(isDefine),
588 fOutputLoopManager,
589 fInputLoopManager,
591}
592
594 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename,
595 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
597 const std::vector<const std::type_info *> &colTypeIDs)
598 : fNSlots(nSlots),
599 fOutputFiles(fNSlots),
600 fOutputTrees(fNSlots),
601 fBranchAddressesNeedReset(fNSlots, 1),
602 fInputTrees(fNSlots),
603 fFileName(filename),
604 fDirName(dirname),
605 fTreeName(treename),
606 fOptions(options),
607 fOutputLoopManager(loopManager),
608 fInputLoopManager(inputLM)
609{
611
613 fBranchData.reserve(fNSlots);
614 for (unsigned int slot = 0; slot < fNSlots; ++slot) {
615 fBranchData.emplace_back();
616 auto &thisSlot = fBranchData.back();
617 thisSlot.reserve(vbnames.size());
618 for (unsigned int i = 0; i < vbnames.size(); ++i) {
619 thisSlot.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
620 }
621 }
622}
623
624// Define special member methods here where the definition of all the data member types is available
629
631{
632 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
633 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */) {
634 const auto fileOpenMode = [&]() {
635 TString checkupdate = fOptions.fMode;
636 checkupdate.ToLower();
637 return checkupdate == "update" ? "updated" : "created";
638 }();
639 Warning("Snapshot",
640 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
641 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
642 "its result in a variable and for example calling the GetValue() method on it.",
643 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
644 }
645}
646
648{
649 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
650 if (!fOutputFiles[slot]) {
651 // first time this thread executes something, let's create a TBufferMerger output directory
652 fOutputFiles[slot] = fMerger->GetFile();
653 }
654 TDirectory *treeDirectory = fOutputFiles[slot].get();
655 if (!fDirName.empty()) {
656 // call returnExistingDirectory=true since MT can end up making this call multiple times
657 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
658 }
659 // re-create output tree as we need to create its branches again, with new input variables
660 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
661 fOutputTrees[slot] =
662 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
663 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
664 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
665 fOutputTrees[slot]->SetImplicitMT(false);
666 if (fOptions.fAutoFlush)
667 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
668 if (r) {
669 // We could be getting a task-local TTreeReader from the TTreeProcessorMT.
670 fInputTrees[slot] = r->GetTree();
671 } else if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource())) {
672 fInputTrees[slot] = treeDS->GetTree();
673 }
674 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
675}
676
678{
679 if (fOutputTrees[slot]->GetEntries() > 0)
680 fOutputFiles[slot]->Write();
681 for (auto &branchData : fBranchData[slot])
682 branchData.ClearBranchPointers(); // The branch pointers will go stale below
683 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
684 fOutputTrees[slot].reset(nullptr);
685}
686
687void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Exec(unsigned int slot, const std::vector<void *> &values)
688{
689 if (fBranchAddressesNeedReset[slot] == 0) {
690 UpdateCArraysPtrs(slot, values);
691 } else {
692 SetBranches(slot, values);
693 fBranchAddressesNeedReset[slot] = 0;
694 }
695 fOutputTrees[slot]->Fill();
696 auto entries = fOutputTrees[slot]->GetEntries();
697 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
698 if ((autoFlush > 0) && (entries % autoFlush == 0))
699 fOutputFiles[slot]->Write();
700}
701
703 const std::vector<void *> &values)
704{
705 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
706 // associated to those is re-allocated. As a result the value of the pointer can change therewith
707 // leaving associated to the branch of the output tree an invalid pointer.
708 // With this code, we set the value of the pointer in the output branch anew when needed.
709 assert(values.size() == fBranchData[slot].size());
710 auto nValues = values.size();
711 for (decltype(nValues) i{}; i < nValues; i++) {
712 auto &branchData = fBranchData[slot][i];
713 if (branchData.fIsCArray) {
714 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
715 // need its buffer, so we cast it and extract the address of the buffer
716 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
717 if (auto *data = rawRVec->data(); branchData.fBranchAddressForCArrays != data) {
718 // reset the branch address
719 branchData.fOutputBranch->SetAddress(data);
720 branchData.fBranchAddressForCArrays = data;
721 }
722 }
723 }
724}
725
727 const std::vector<void *> &values)
728{
729 // create branches in output tree
730 auto &branchData = fBranchData[slot];
731 assert(branchData.size() == values.size());
732 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
733 SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], branchData, i, fOptions.fBasketSize, values[i]);
734 }
735
737}
738
740{
741 void *dummyValueAddress{};
742 auto &branchData = fBranchData.front();
743 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
745 }
746}
747
749{
750 auto outFile =
751 std::unique_ptr<TFile>{TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(),
753 if (!outFile)
754 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
755 fOutputFile = outFile.get();
756 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(outFile));
757}
758
760{
761
762 for (auto &file : fOutputFiles) {
763 if (file) {
764 file->Write();
765 file->Close();
766 }
767 }
768
769 // If there were no entries to fill the TTree with (either the input TTree was empty or no event passed after
770 // filtering), create an empty TTree in the output file and create the branches to preserve the schema
771 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
772 assert(fOutputFile && "Missing output file in Snapshot finalization.");
773 // Use GetKey to avoid having to deal with memory management of the object in the file
774 if (!fOutputFile->GetKey(fullTreeName.c_str())) {
775
776 // First find in which directory we need to write the output TTree
777 TDirectory *treeDirectory = fOutputFile;
778 if (!fDirName.empty()) {
779 treeDirectory = fOutputFile->mkdir(fDirName.c_str(), "", true);
780 }
782
783 // Create the output TTree and create the user-requested branches
784 auto outTree =
785 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
786 TTree *inputTree{};
787 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
788 inputTree = treeDS->GetTree();
789 SetEmptyBranches(inputTree, *outTree);
790
791 fOutputFile->Write();
792 }
793
794 // flush all buffers to disk by destroying the TBufferMerger
795 fOutputFiles.clear();
796 fMerger.reset();
797
798 // Now connect the data source to the loop manager so it can be used for further processing
799 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
800}
801
802/**
803 * \brief Create a new UntypedSnapshotTTreeHelperMT with a different output file name
804 *
805 * \param newName A type-erased string with the output file name
806 * \return UntypedSnapshotTTreeHelperMT
807 *
808 * This MakeNew implementation is tied to the cloning feature of actions
809 * of the computation graph. In particular, cloning a Snapshot node usually
810 * also involves changing the name of the output file, otherwise the cloned
811 * Snapshot would overwrite the same file.
812 */
815{
816 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
817 std::vector<std::string> inputBranchNames;
818 std::vector<std::string> outputBranchNames;
819 std::vector<bool> isDefine;
820 std::vector<const std::type_info *> inputColumnTypeIDs;
821 for (const auto &bd : fBranchData.front()) {
822 if (bd.fInputBranchName.empty())
823 break;
824 inputBranchNames.push_back(bd.fInputBranchName);
825 outputBranchNames.push_back(bd.fOutputBranchName);
826 isDefine.push_back(bd.fIsDefine);
827 inputColumnTypeIDs.push_back(bd.fInputTypeID);
828 }
829
831 finalName,
832 fDirName,
833 fTreeName,
834 std::move(inputBranchNames),
835 std::move(outputBranchNames),
836 fOptions,
837 std::move(isDefine),
838 fOutputLoopManager,
839 fInputLoopManager,
840 std::move(inputColumnTypeIDs)};
841}
842
844 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename,
845 const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options,
847 const std::vector<const std::type_info *> &colTypeIDs)
848 : fFileName(filename),
849 fDirName(dirname),
850 fNTupleName(ntuplename),
851 fOptions(options),
852 fInputLoopManager(inputLM),
853 fOutputLoopManager(outputLM),
854 fInputFieldNames(vfnames),
855 fOutputFieldNames(ReplaceDotWithUnderscore(fnames)),
856 fNSlots(nSlots),
857 fFillContexts(nSlots),
858 fEntries(nSlots),
859 fInputColumnTypeIDs(colTypeIDs)
860{
862}
863
864// Define special member methods here where the definition of all the data member types is available
869
871{
872 if (!fNTupleName.empty() /* not moved from */ && !fOutputFile /* did not run */ && fOptions.fLazy)
873 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
874}
875
877{
878 auto model = ROOT::RNTupleModel::CreateBare();
879 auto nFields = fOutputFieldNames.size();
880 fFieldTokens.resize(nFields);
881 for (decltype(nFields) i = 0; i < nFields; i++) {
882 // Need to retrieve the type of every field to create as a string
883 // If the input type for a field does not have RTTI, internally we store it as the tag UseNativeDataType. When
884 // that is detected, we need to ask the data source which is the type name based on the on-disk information.
885 const auto typeName = *fInputColumnTypeIDs[i] == typeid(ROOT::Internal::RDF::UseNativeDataType)
886 ? ROOT::Internal::RDF::GetTypeNameWithOpts(*fInputLoopManager->GetDataSource(),
887 fInputFieldNames[i], fOptions.fVector2RVec)
888 : ROOT::Internal::RDF::TypeID2TypeName(*fInputColumnTypeIDs[i]);
889
890 // Cardinality fields are read-only, so instead we snapshot them as their inner type.
891 if (typeName.substr(0, 25) == "ROOT::RNTupleCardinality<") {
892 // Get "T" from "ROOT::RNTupleCardinality<T>".
893 std::string cardinalityType = typeName.substr(25, typeName.size() - 26);
894 Warning("Snapshot",
895 "Column \"%s\" is a read-only \"%s\" column. It will be snapshot as its inner type \"%s\" instead.",
896 fInputFieldNames[i].c_str(), typeName.c_str(), cardinalityType.c_str());
897 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], cardinalityType).Unwrap());
898 } else {
899 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], typeName).Unwrap());
900 }
901 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
902 }
903 model->Freeze();
904
906 writeOptions.SetCompression(GetSnapshotCompressionSettings(fOptions));
907 writeOptions.SetInitialUnzippedPageSize(fOptions.fInitialUnzippedPageSize);
908 writeOptions.SetMaxUnzippedPageSize(fOptions.fMaxUnzippedPageSize);
909 writeOptions.SetApproxZippedClusterSize(fOptions.fApproxZippedClusterSize);
910 writeOptions.SetMaxUnzippedClusterSize(fOptions.fMaxUnzippedClusterSize);
911 writeOptions.SetEnablePageChecksums(fOptions.fEnablePageChecksums);
912 writeOptions.SetEnableSamePageMerging(fOptions.fEnableSamePageMerging);
913
914 fOutputFile.reset(TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
915 if (!fOutputFile)
916 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
917
918 TDirectory *outputDir = fOutputFile.get();
919 if (!fDirName.empty()) {
920 TString checkupdate = fOptions.fMode;
921 checkupdate.ToLower();
922 if (checkupdate == "update")
923 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
924 else
925 outputDir = fOutputFile->mkdir(fDirName.c_str());
926 }
927
928 // The RNTupleParallelWriter has exclusive access to the underlying TFile, no further synchronization is needed for
929 // calls to Fill() (in Exec) and FlushCluster() (in FinalizeTask).
930 fWriter = ROOT::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
931}
932
934{
935 if (!fFillContexts[slot]) {
936 fFillContexts[slot] = fWriter->CreateFillContext();
937 fEntries[slot] = fFillContexts[slot]->GetModel().CreateBareEntry();
938 }
939}
940
941void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Exec(unsigned int slot, const std::vector<void *> &values)
942{
943 auto &fillContext = fFillContexts[slot];
944 auto &outputEntry = fEntries[slot];
945 assert(values.size() == fFieldTokens.size());
946 for (decltype(values.size()) i = 0; i < values.size(); i++) {
947 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
948 }
949 fillContext->Fill(*outputEntry);
950}
951
953{
954 // In principle we would not need to flush a cluster here, but we want to benefit from parallelism for compression.
955 // NB: RNTupleFillContext::FlushCluster() is a nop if there is no new entry since the last flush.
956 fFillContexts[slot]->FlushCluster();
957}
958
960{
961 // First clear and destroy all entries, which were created from the RNTupleFillContexts.
962 fEntries.clear();
963 fFillContexts.clear();
964 // Then destroy the RNTupleParallelWriter and write the metadata.
965 fWriter.reset();
966 // We can now set the data source of the loop manager for the RDataFrame that is returned by the Snapshot call.
967 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName + "/" + fNTupleName, fFileName));
968}
969
970/**
971 * Create a new UntypedSnapshotRNTupleHelper with a different output file name.
972 *
973 * \param[in] newName A type-erased string with the output file name
974 * \return UntypedSnapshotRNTupleHelper
975 *
976 * This MakeNew implementation is tied to the cloning feature of actions
977 * of the computation graph. In particular, cloning a Snapshot node usually
978 * also involves changing the name of the output file, otherwise the cloned
979 * Snapshot would overwrite the same file.
980 */
983{
984 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
986 fNSlots, finalName, fDirName, fNTupleName, fInputFieldNames,
987 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
988}
989
990/*
991 * ------------------------------------
992 * Snapshot with systematic variations
993 * ------------------------------------
994 */
995namespace ROOT::Internal::RDF {
996/// An object to store an output file and a tree in one common place to share them between instances
997/// of Snapshot with systematic uncertainties.
999 std::unique_ptr<TFile> fFile;
1000 std::unique_ptr<TTree> fTree;
1001 std::string fDirectoryName;
1003
1004 // Bitmasks to indicate whether syst. uncertainties have been computed. Bound to TBranches, so need to be stable in
1005 // memory.
1006 struct Bitmask {
1007 std::string branchName;
1008 std::bitset<64> bitset{};
1009 std::unique_ptr<uint64_t> branchBuffer{new uint64_t{}};
1010 };
1011 std::vector<Bitmask> fBitMasks;
1012
1013 std::unordered_map<std::string, unsigned int> fBranchToVariationMapping;
1014 // The corresponding ROOT dictionary is declared in core/clingutils/src
1015 std::unordered_map<std::string, std::pair<std::string, unsigned int>> fBranchToBitmaskMapping;
1016 unsigned int fNBits = 0;
1017
1020 {
1021 if (!fBranchToBitmaskMapping.empty()) {
1022 fFile->WriteObject(&fBranchToBitmaskMapping,
1023 (std::string{"R_rdf_column_to_bitmask_mapping_"} + fTree->GetName()).c_str());
1024 }
1025 if (fTree) {
1026 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1027 fTree->AutoSave("flushbaskets");
1028
1029 // Now connect the data source to the loop manager so it can be used for further processing
1030 std::string tree = fTree->GetName();
1031 if (!fDirectoryName.empty())
1032 tree = fDirectoryName + '/' + tree;
1033 std::string file = fFile->GetName();
1034
1035 fTree.reset();
1036 fFile.reset();
1037
1039 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(tree, file));
1040 }
1041 }
1042 SnapshotOutputWriter(SnapshotOutputWriter const &) = delete; // Anyway deleted because of the unique_ptrs
1045 delete; // Can be done, but need to make move-from object safe to destruct
1047
1048 /// Register a branch and corresponding systematic uncertainty.
1049 /// This will create an entry in the mapping from branch names to bitmasks, so the corresponding
1050 /// column can be masked if it doesn't contain valid entries. This mapping is written next to the
1051 /// tree into the output file.
1052 void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
1053 {
1054 if (auto it = fBranchToVariationMapping.find(branchName); it != fBranchToVariationMapping.end()) {
1055 if (variationIndex != it->second) {
1056 throw std::logic_error("Branch " + branchName +
1057 " is being registered with different variation index than the expected one: " +
1058 std::to_string(variationIndex));
1059 }
1060 return;
1061 }
1062
1063 // Neither branch nor systematic are known, so a new entry needs to be created
1064 fNBits = std::max(fNBits, variationIndex);
1065 const auto vectorIndex = variationIndex / 64u;
1066 const auto bitIndex = variationIndex % 64u;
1067
1068 // Create bitmask branches as long as necessary to capture the bit
1069 while (vectorIndex >= fBitMasks.size()) {
1070 std::string bitmaskBranchName =
1071 std::string{"R_rdf_mask_"} + fTree->GetName() + '_' + std::to_string(fBitMasks.size());
1073 fTree->Branch(bitmaskBranchName.c_str(), fBitMasks.back().branchBuffer.get());
1074 }
1075
1077 fBranchToBitmaskMapping[branchName] = std::make_pair(fBitMasks[vectorIndex].branchName, bitIndex);
1078 }
1079
1080 /// Clear all bits, as if none of the variations passed its filter.
1082 {
1083 for (auto &mask : fBitMasks)
1084 mask.bitset.reset();
1085 }
1086
1087 /// Set a bit signalling that the variation at `index` passed its filter.
1088 void SetMaskBit(unsigned int index)
1089 {
1090 const auto vectorIndex = index / 64;
1091 const auto bitIndex = index % 64;
1092 fBitMasks[vectorIndex].bitset.set(bitIndex, true);
1093 }
1094
1095 /// Test if any of the mask bits are set.
1096 bool MaskEmpty() const
1097 {
1098 return std::none_of(fBitMasks.begin(), fBitMasks.end(), [](Bitmask const &mask) { return mask.bitset.any(); });
1099 }
1100
1101 /// Write the current event and the bitmask to the output dataset.
1102 void Write() const
1103 {
1104 if (!fTree)
1105 throw std::runtime_error("The TTree associated to the Snapshot action doesn't exist, any more.");
1106
1107 for (auto const &mask : fBitMasks) {
1108 *mask.branchBuffer = mask.bitset.to_ullong();
1109 }
1110
1111 fTree->Fill();
1112 }
1113};
1114
1115} // namespace ROOT::Internal::RDF
1116
1118 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
1119 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
1121 const std::vector<const std::type_info *> &colTypeIDs)
1122 : fOptions(options), fInputLoopManager{inputLoopMgr}, fOutputLoopManager{outputLoopMgr}
1123{
1124 EnsureValidSnapshotOutput(fOptions, std::string(treename), std::string(filename));
1125
1127 fOutputHandle = std::make_shared<SnapshotOutputWriter>(
1128 TFile::Open(filename.data(), fOptions.fMode.c_str(), /*ftitle=*/"", GetSnapshotCompressionSettings(fOptions)));
1129 if (!fOutputHandle->fFile)
1130 throw std::runtime_error(std::string{"Snapshot: could not create output file "} + std::string{filename});
1131
1132 TDirectory *outputDir = fOutputHandle->fFile.get();
1133 if (!dirname.empty()) {
1134 fOutputHandle->fDirectoryName = dirname;
1136 checkupdate.ToLower();
1137 if (checkupdate == "update")
1138 outputDir =
1139 fOutputHandle->fFile->mkdir(std::string{dirname}.c_str(), "", true); // do not overwrite existing directory
1140 else
1141 outputDir = fOutputHandle->fFile->mkdir(std::string{dirname}.c_str());
1142 }
1143
1144 fOutputHandle->fTree = std::make_unique<TTree>(std::string{treename}.c_str(), std::string{treename}.c_str(),
1145 fOptions.fSplitLevel, /*dir=*/outputDir);
1146 fOutputHandle->fOutputLoopManager = fOutputLoopManager;
1147 if (fOptions.fAutoFlush)
1148 fOutputHandle->fTree->SetAutoFlush(fOptions.fAutoFlush);
1149
1151
1152 fBranchData.reserve(vbnames.size());
1153 for (unsigned int i = 0; i < vbnames.size(); ++i) {
1154 fOutputHandle->RegisterBranch(outputBranchNames[i], 0);
1155 fBranchData.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
1156 }
1157}
1158
1159/// Register a new column as a variation of the column at `originalColumnIndex`, and clone its properties.
1160/// If a nominal column is registered here, it is written without changes, but it means that it will be masked
1161/// in case its selection cuts don't pass.
1162/// \param slot Task ID for MT runs.
1163/// \param columnIndex Index where the data of this column will be passed into the helper.
1164/// \param originalColumnIndex If the column being registered is a variation of a "nominal" column, this designates the
1165/// original.
1166/// Properties such as name and output type are cloned from the original.
1167/// \param variationName The variation that this column belongs to. If "nominal" is used, this column is considered as
1168/// the original.
1170 unsigned int columnIndex,
1171 unsigned int originalColumnIndex,
1172 unsigned int variationIndex,
1173 std::string const &variationName)
1174{
1176 fBranchData[columnIndex].fVariationIndex = variationIndex; // The base column has variations
1177 fOutputHandle->RegisterBranch(fBranchData[columnIndex].fOutputBranchName, variationIndex);
1178 } else if (columnIndex >= fBranchData.size()) {
1179 // First task, need to create branches
1180 fBranchData.resize(columnIndex + 1);
1181 auto &bd = fBranchData[columnIndex];
1182 bd = fBranchData[originalColumnIndex];
1183 std::string newOutputName = bd.fOutputBranchName + "__" + variationName;
1184 std::replace(newOutputName.begin(), newOutputName.end(), ':', '_');
1185 bd.fOutputBranchName = std::move(newOutputName);
1186 bd.fVariationIndex = variationIndex;
1187
1188 fOutputHandle->RegisterBranch(bd.fOutputBranchName, variationIndex);
1189 } else {
1190 assert(static_cast<unsigned int>(fBranchData[columnIndex].fVariationIndex) == variationIndex);
1191 }
1192}
1193
1194/// Bind all output branches to RDF columns for the given slots.
1196{
1197 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
1198 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
1199 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
1200 fInputTree = treeDS->GetTree();
1201
1202 // Create all output branches; and bind them to empty values
1203 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
1204 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1205 fBranchData[i].EmptyInstance(/*pointerToPointer=*/false));
1206 }
1207
1208 AssertNoNullBranchAddresses(fBranchData);
1209}
1210
1211/// Connect all output fields to the values pointed to by `values`, fill the output dataset,
1212/// call the Fill of the output tree, and clear the mask bits that show whether a variation was reached.
1213void ROOT::Internal::RDF::SnapshotHelperWithVariations::Exec(unsigned int /*slot*/, const std::vector<void *> &values,
1214 std::vector<bool> const &filterPassed)
1215{
1216 // Rebind branch pointers to RDF values
1217 assert(fBranchData.size() == values.size());
1218 for (std::size_t i = 0; i < values.size(); i++) {
1219 const auto variationIndex = fBranchData[i].fVariationIndex;
1220 if (variationIndex < 0) {
1221 // Branch without variations
1222 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1223 } else if (filterPassed[variationIndex]) {
1224 // Branch with variations
1225 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1226 if (!fundamentalType) {
1227 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1228 }
1229 fOutputHandle->SetMaskBit(variationIndex);
1230 }
1231 }
1232
1233 assert(!fOutputHandle->MaskEmpty()); // Exec should not have been called if nothing passes
1234
1235 fOutputHandle->Write();
1236 fOutputHandle->ClearMaskBits();
1237 for (auto &branchData : fBranchData) {
1238 branchData.ClearBranchContents();
1239 }
1240}
1241
1243{
1244 fOutputHandle.reset();
1245}
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
static TBranch * SearchForBranch(TTree *tree, const char *name)
Definition TTreePyz.cxx:61
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
void InitTask(TTreeReader *r, unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1524
A Branch for the case of an object.
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
Basic data type descriptor (datatype information is obtained from CINT).
Definition TDataType.h:44
Int_t GetType() const
Definition TDataType.h:71
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3787
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Basic string class.
Definition TString.h:138
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:305
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:415
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:360
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:191
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:645
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:219
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10133
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10127
std::vector< std::string > ColumnNames_t
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:347
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
Definition Compression.h:88
A collection of options to steer the creation of the dataset on disk through Snapshot().
int fAutoFlush
*(TTree only)* AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
*(TTree only)* Split level of output tree
int fCompressionLevel
Compression level of output file.