Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFSnapshotHelpers.cxx
Go to the documentation of this file.
1/**
2 \file RDFSnapshotHelpers.cxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \author Stephan Hageboeck
9 \date 2025-06
10*/
11
12/*************************************************************************
13 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
14 * All rights reserved. *
15 * *
16 * For the licensing terms see $ROOTSYS/LICENSE. *
17 * For the list of contributors see $ROOTSYS/README/CREDITS. *
18 *************************************************************************/
19
21
22#include <ROOT/REntry.hxx>
23#include <ROOT/RFieldToken.hxx>
24#include <ROOT/RNTuple.hxx>
25#include <ROOT/RNTupleDS.hxx>
28#include <ROOT/RTTreeDS.hxx>
30
31#include <TBranchObject.h>
32#include <TClassEdit.h>
33#include <TDictionary.h>
34#include <TDataType.h>
35#include <TFile.h>
36#include <TLeaf.h>
37#include <TTreeReader.h>
38
39#include <algorithm>
40#include <type_traits>
41#include <utility>
42
44// Maintaining the following allows for faster vector resize:
45static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
46static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
47
48namespace {
49
50void AssertNoNullBranchAddresses(const std::vector<RBranchData> &branches)
51{
52 std::vector<TBranch *> branchesWithNullAddress;
53 for (const auto &branchData : branches) {
54 if (branchData.fOutputBranch->GetAddress() == nullptr)
55 branchesWithNullAddress.push_back(branchData.fOutputBranch);
56 }
57
58 if (branchesWithNullAddress.empty())
59 return;
60
61 // otherwise build error message and throw
62 std::vector<std::string> missingBranchNames;
64 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
65 std::string msg = "RDataFrame::Snapshot:";
66 if (missingBranchNames.size() == 1) {
67 msg += " branch " + missingBranchNames[0] +
68 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
69 "it is";
70 } else {
71 msg += " branches ";
72 for (const auto &bName : missingBranchNames)
73 msg += bName + ", ";
74 msg.resize(msg.size() - 2); // remove last ", "
75 msg += " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
76 }
77 msg += " not part of the set of branches that are being written out.";
78 throw std::runtime_error(msg);
79}
80
81TBranch *SearchForBranch(TTree *inputTree, const std::string &branchName)
82{
83 if (inputTree) {
84 if (auto *getBranchRes = inputTree->GetBranch(branchName.c_str()))
85 return getBranchRes;
86
87 // try harder
88 if (auto *findBranchRes = inputTree->FindBranch(branchName.c_str()))
89 return findBranchRes;
90 }
91 return nullptr;
92}
93
94std::vector<RBranchData>::iterator CreateCStyleArrayBranch(TTree &outputTree, std::vector<RBranchData> &outputBranches,
95 std::vector<RBranchData>::iterator thisBranch,
96 TBranch *inputBranch, int basketSize, void *address)
97{
98 if (!inputBranch)
99 return thisBranch;
100 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
101 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
102 return thisBranch;
103 // must construct the leaflist for the output branch and create the branch in the output tree
104 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
105 if (!leaf)
106 return thisBranch;
107 const auto bname = leaf->GetName();
108 auto *sizeLeaf = leaf->GetLeafCount();
109 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
110
111 // We proceed only if branch is a fixed-or-variable-sized array
112 if (sizeLeaf || leaf->GetLenStatic() > 1) {
113 if (sizeLeaf) {
114 // The array branch `bname` has dynamic size stored in leaf `sizeLeafName`, so we need to ensure that it's
115 // in the output tree.
116 auto sizeLeafIt =
117 std::find_if(outputBranches.begin(), outputBranches.end(),
118 [&sizeLeafName](const RBranchData &bd) { return bd.fOutputBranchName == sizeLeafName; });
119 if (sizeLeafIt == outputBranches.end()) {
120 // The size leaf is not part of the output branches yet, so emplace an empty slot for it.
121 // This means that iterators need to be updated in case the container reallocates.
122 const auto indexBeforeEmplace = std::distance(outputBranches.begin(), thisBranch);
123 outputBranches.emplace_back("", sizeLeafName, /*isDefine=*/false, /*typeID=*/nullptr);
126 }
127 if (!sizeLeafIt->fOutputBranch) {
128 // The size leaf was emplaced, but not initialised yet
130 // Use original basket size for existing branches otherwise use custom basket size.
131 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
132 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for
133 // `sizeLeafName`
134 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
135 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
136 sizeLeafIt->fOutputBranch = outputBranch;
137 }
138 }
139
140 const auto btype = leaf->GetTypeName();
142 if (rootbtype == ' ') {
143 Warning("Snapshot",
144 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
145 "leaf is of type '%s'. This column will not be written out.",
146 bname, btype);
147 return thisBranch;
148 }
149
150 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
151 // Use original basket size for existing branches and new basket size for new branches
152 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
153 void *addressForBranch = [address]() -> void * {
154 if (address) {
155 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we need
156 // its buffer, so we cast it and extract the address of the buffer
157 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(address);
158 return rawRVec->data();
159 }
160 return nullptr;
161 }();
162 thisBranch->fOutputBranch =
163 outputTree.Branch(thisBranch->fOutputBranchName.c_str(), addressForBranch, leaflist.c_str(), bufSize);
164 thisBranch->fOutputBranch->SetTitle(inputBranch->GetTitle());
165 thisBranch->fIsCArray = true;
166 }
167
168 return thisBranch;
169}
170
171void SetBranchAddress(TBranch *inputBranch, RBranchData &branchData, void *valueAddress)
172{
173 const static TClassRef TBOClRef("TBranchObject");
174 if (inputBranch && inputBranch->IsA() == TBOClRef) {
175 branchData.fOutputBranch->SetAddress(reinterpret_cast<void **>(inputBranch->GetAddress()));
176 } else if (branchData.fOutputBranch->IsA() != TBranch::Class()) {
177 // This is a relatively rare case of a fixed-size array getting redefined
178 branchData.fBranchAddressForCArrays = valueAddress;
179 branchData.fOutputBranch->SetAddress(&branchData.fBranchAddressForCArrays);
180 } else {
181 void *correctAddress = [valueAddress, isCArray = branchData.fIsCArray]() -> void * {
182 if (isCArray) {
183 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
184 // need its buffer, so we cast it and extract the address of the buffer
185 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
186 return rawRVec->data();
187 }
188 return valueAddress;
189 }();
190 branchData.fOutputBranch->SetAddress(correctAddress);
191 branchData.fBranchAddressForCArrays = valueAddress;
192 }
193}
194
196{
197 // Logic taken from
198 // TTree::BranchImpRef(
199 // const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize, Int_t splitlevel)
201 if (rootTypeChar == ' ') {
202 Warning("Snapshot",
203 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
204 "column will not be written out.",
205 bd.fOutputBranchName.c_str());
206 return;
207 }
208 std::string leafList{bd.fOutputBranchName + '/' + rootTypeChar};
209 bd.fOutputBranch = outputTree.Branch(bd.fOutputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize);
210}
211
212/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the
213/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case
214/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
216 const std::string &fileName)
217{
218 TString fileMode = opts.fMode;
219 fileMode.ToLower();
220 if (fileMode != "update")
221 return;
222
223 // output file opened in "update" mode: must check whether output TTree is already present in file
224 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
225 if (!outFile || outFile->IsZombie())
226 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
227
228 TObject *outTree = outFile->Get(treeName.c_str());
229 if (outTree == nullptr)
230 return;
231
232 // object called treeName is already present in the file
233 if (opts.fOverwriteIfExists) {
234 if (outTree->InheritsFrom("TTree")) {
235 static_cast<TTree *>(outTree)->Delete("all");
236 } else {
237 outFile->Delete(treeName.c_str());
238 }
239 } else {
240 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
241 "\". If you want to delete the original tree and write another, please set "
242 "RSnapshotOptions::fOverwriteIfExists to true.";
243 throw std::invalid_argument(msg);
244 }
245}
246
247/// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the
248/// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case
249/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
251 const std::string &fileName)
252{
253 TString fileMode = opts.fMode;
254 fileMode.ToLower();
255 if (fileMode != "update")
256 return;
257
258 // output file opened in "update" mode: must check whether output RNTuple is already present in file
259 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
260 if (!outFile || outFile->IsZombie())
261 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
262
263 auto *outNTuple = outFile->Get<ROOT::RNTuple>(ntupleName.c_str());
264
265 if (outNTuple) {
266 if (opts.fOverwriteIfExists) {
267 outFile->Delete((ntupleName + ";*").c_str());
268 return;
269 } else {
270 const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName +
271 "\". If you want to delete the original ntuple and write another, please set "
272 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
273 throw std::invalid_argument(msg);
274 }
275 }
276
277 // Also check if there is any object other than an RNTuple with the provided ntupleName.
278 TObject *outObj = outFile->Get(ntupleName.c_str());
279
280 if (!outObj)
281 return;
282
283 // An object called ntupleName is already present in the file.
284 if (opts.fOverwriteIfExists) {
285 if (auto tree = dynamic_cast<TTree *>(outObj)) {
286 tree->Delete("all");
287 } else {
288 outFile->Delete((ntupleName + ";*").c_str());
289 }
290 } else {
291 const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName +
292 "\". If you want to delete the original object and write a new RNTuple, please set "
293 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
294 throw std::invalid_argument(msg);
295 }
296}
297
299 std::vector<ROOT::Internal::RDF::RBranchData> &allBranchData, std::size_t currentIndex,
300 int basketSize, void *valueAddress)
301{
303 auto *inputBranch = branchData->fIsDefine ? nullptr : SearchForBranch(inputTree, branchData->fInputBranchName);
304
305 if (branchData->fOutputBranch && valueAddress) {
306 // The output branch was already created, we just need to (re)set its address
307 SetBranchAddress(inputBranch, *branchData, valueAddress);
308 return;
309 }
310
311 // Respect the original bufsize and splitlevel arguments
312 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
313 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
314 // TObject branch, see https://bit.ly/2EjLMId ).
315 // A user-provided basket size value takes precedence.
316 const auto bufSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
317 const auto splitLevel = inputBranch ? inputBranch->GetSplitLevel() : 99;
318
319 auto *dictionary = TDictionary::GetDictionary(*branchData->fInputTypeID);
320 if (dynamic_cast<TDataType *>(dictionary)) {
321 // Branch of fundamental type
323 return;
324 }
325
326 if (!branchData->fIsDefine) {
327 // Cases where we need a leaflist (e.g. C-style arrays)
328 // We only enter this code path if the input value does not come from a Define/Redefine. In those cases, it is
329 // not allowed to create a column of C-style array type, so that can't happen when writing the TTree. This is
330 // currently what prevents writing the wrong branch output type in a scenario where the input branch of the TTree
331 // is a C-style array and then the user is Redefining it with some other type (e.g. a ROOT::RVec).
333 }
334 if (branchData->fOutputBranch) {
335 // A branch was created in the previous function call
336 if (valueAddress) {
337 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
338 // need its buffer, so we cast it and extract the address of the buffer
339 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
340 branchData->fBranchAddressForCArrays = rawRVec->data();
341 }
342 return;
343 }
344
345 if (auto *classPtr = dynamic_cast<TClass *>(dictionary)) {
346 // Case of unsplit object with polymorphic type
347 if (inputBranch && dynamic_cast<TBranchObject *>(inputBranch) && valueAddress)
348 branchData->fOutputBranch =
350 inputBranch->GetAddress(), bufSize, splitLevel);
351 // General case, with valid address
352 else if (valueAddress)
354 outputTree, branchData->fOutputBranchName.c_str(), classPtr, TDataType::GetType(*branchData->fInputTypeID),
356 // No value was passed, we're just creating a hollow branch to populate the dataset schema
357 else
358 branchData->fOutputBranch =
359 outputTree.Branch(branchData->fOutputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
360 return;
361 }
362
363 // We are not aware of other cases
364 throw std::logic_error(
365 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
366}
367} // namespace
368
370 const std::type_info *typeID)
371 : fInputBranchName{std::move(inputBranchName)},
372 fOutputBranchName{std::move(outputBranchName)},
373 fInputTypeID{typeID},
374 fIsDefine{isDefine}
375{
377 if (auto datatype = dynamic_cast<TDataType *>(dictionary); datatype) {
379 } else if (auto tclass = dynamic_cast<TClass *>(dictionary); tclass) {
380 fTypeData = EmptyDynamicType{tclass};
381 }
382}
383
384/// @brief Return a pointer to an empty instance of the type represented by this branch.
385/// For fundamental types, this is simply an 8-byte region of zeroes. For classes, it is an instance created with
386/// TClass::New.
387/// @param pointerToPointer Return a pointer to a pointer, so it can be used directly in TTree::SetBranchAddress().
389{
390 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
391 assert(!pointerToPointer); // Not used for fundamental types
392 return fundamental->fBytes.data();
393 }
394
395 auto &dynamic = std::get<EmptyDynamicType>(fTypeData);
396 if (!dynamic.fEmptyInstance) {
397 auto *dictionary = TDictionary::GetDictionary(*fInputTypeID);
398 assert(dynamic_cast<TDataType *>(dictionary) ==
399 nullptr); // TDataType should be handled by writing into the local buffer
400
401 auto tclass = dynamic_cast<TClass *>(dictionary);
402 assert(tclass);
403 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
404 }
405
406 if (pointerToPointer) {
407 // Make TTree happy (needs a pointer to pointer):
408 dynamic.fRawPtrToEmptyInstance = dynamic.fEmptyInstance.get();
409 return &dynamic.fRawPtrToEmptyInstance;
410 } else {
411 return dynamic.fEmptyInstance.get();
412 }
413}
414
415/// Point the branch address to an empty instance of the type represented by this branch
416/// or write null bytes into the space used by the fundamental type.
417/// This is used in case of variations, when certain defines/actions don't execute. We
418/// nevertheless need to write something, so we point the branch to an empty instance.
420{
421 if (!fOutputBranch)
422 return;
423
424 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
425 fundamental->fBytes.fill(std::byte{0});
426 } else {
427 // TTree expects pointer to pointer, to figure out who allocates the object:
428 fOutputBranch->SetAddress(EmptyInstance(/*pointerToPointer=*/true));
429 }
430}
431
433 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
434 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
436 const std::vector<const std::type_info *> &colTypeIDs)
437 : fFileName(filename),
438 fDirName(dirname),
439 fTreeName(treename),
440 fOptions(options),
441 fOutputLoopManager(loopManager),
442 fInputLoopManager(inputLM)
443{
445
447 fBranchData.reserve(vbnames.size());
448 for (unsigned int i = 0; i < vbnames.size(); ++i) {
449 fBranchData.emplace_back(vbnames[i], std::move(outputBranchNames[i]), isDefine[i], colTypeIDs[i]);
450 }
451}
452
453// Define special member methods here where the definition of all the data member types is available
457 ROOT::Internal::RDF::UntypedSnapshotTTreeHelper &&) noexcept = default;
458
460{
461 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy) {
462 const auto fileOpenMode = [&]() {
463 TString checkupdate = fOptions.fMode;
464 checkupdate.ToLower();
465 return checkupdate == "update" ? "updated" : "created";
466 }();
467 Warning("Snapshot",
468 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
469 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
470 "its result in a variable and for example calling the GetValue() method on it.",
471 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
472 }
473}
474
476{
477 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
478 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
479 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
480 fInputTree = treeDS->GetTree();
481 fBranchAddressesNeedReset = true;
482}
483
484void ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::Exec(unsigned int, const std::vector<void *> &values)
485{
486 if (!fBranchAddressesNeedReset) {
487 UpdateCArraysPtrs(values);
488 } else {
489 SetBranches(values);
490 fBranchAddressesNeedReset = false;
491 }
492
493 fOutputTree->Fill();
494}
495
497{
498 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
499 // associated to those is re-allocated. As a result the value of the pointer can change therewith
500 // leaving associated to the branch of the output tree an invalid pointer.
501 // With this code, we set the value of the pointer in the output branch anew when needed.
502 assert(values.size() == fBranchData.size());
503 auto nValues = values.size();
504 for (decltype(nValues) i{}; i < nValues; i++) {
505 if (fBranchData[i].fIsCArray) {
506 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
507 // need its buffer, so we cast it and extract the address of the buffer
508 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
509 if (auto *data = rawRVec->data(); fBranchData[i].fBranchAddressForCArrays != data) {
510 fBranchData[i].fOutputBranch->SetAddress(data);
511 fBranchData[i].fBranchAddressForCArrays = data;
512 }
513 }
514 }
515}
516
518{
519 // create branches in output tree
520 assert(fBranchData.size() == values.size());
521 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
522 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
523 }
524 AssertNoNullBranchAddresses(fBranchData);
525}
526
528{
529 void *dummyValueAddress{};
530 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
531 SetBranchesHelper(inputTree, outputTree, fBranchData, i, fOptions.fBasketSize, dummyValueAddress);
532 }
533}
534
536{
537 fOutputFile.reset(
538 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
539 ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel)));
540 if (!fOutputFile)
541 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
542
543 TDirectory *outputDir = fOutputFile.get();
544 if (!fDirName.empty()) {
545 TString checkupdate = fOptions.fMode;
546 checkupdate.ToLower();
547 if (checkupdate == "update")
548 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
549 else
550 outputDir = fOutputFile->mkdir(fDirName.c_str());
551 }
552
553 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
554
555 if (fOptions.fAutoFlush)
556 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
557}
558
560{
561 assert(fOutputTree != nullptr);
562 assert(fOutputFile != nullptr);
563
564 // There were no entries to fill the TTree with (either the input TTree was empty or no event passed after
565 // filtering). We have already created an empty TTree, now also create the branches to preserve the schema
566 if (fOutputTree->GetEntries() == 0) {
567 SetEmptyBranches(fInputTree, *fOutputTree);
568 }
569 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
570 fOutputTree->AutoSave("flushbaskets");
571 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
572 fOutputTree.reset();
573 fOutputFile->Close();
574
575 // Now connect the data source to the loop manager so it can be used for further processing
576 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
577 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
578}
579
580/**
581 * \brief Create a new UntypedSnapshotTTreeHelper with a different output file name
582 *
583 * \param newName A type-erased string with the output file name
584 * \return UntypedSnapshotTTreeHelper
585 *
586 * This MakeNew implementation is tied to the cloning feature of actions
587 * of the computation graph. In particular, cloning a Snapshot node usually
588 * also involves changing the name of the output file, otherwise the cloned
589 * Snapshot would overwrite the same file.
590 */
593{
594 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
595 std::vector<std::string> inputBranchNames;
596 std::vector<std::string> outputBranchNames;
597 std::vector<bool> isDefine;
598 std::vector<const std::type_info *> inputColumnTypeIDs;
599 for (const auto &bd : fBranchData) {
600 if (bd.fInputBranchName.empty())
601 break;
602 inputBranchNames.push_back(bd.fInputBranchName);
603 outputBranchNames.push_back(bd.fOutputBranchName);
604 isDefine.push_back(bd.fIsDefine);
605 inputColumnTypeIDs.push_back(bd.fInputTypeID);
606 }
607
609 fDirName,
610 fTreeName,
611 std::move(inputBranchNames),
612 std::move(outputBranchNames),
613 fOptions,
614 std::move(isDefine),
615 fOutputLoopManager,
616 fInputLoopManager,
618}
619
621 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename,
622 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
624 const std::vector<const std::type_info *> &colTypeIDs)
625 : fNSlots(nSlots),
626 fOutputFiles(fNSlots),
627 fOutputTrees(fNSlots),
628 fBranchAddressesNeedReset(fNSlots, 1),
629 fInputTrees(fNSlots),
630 fFileName(filename),
631 fDirName(dirname),
632 fTreeName(treename),
633 fOptions(options),
634 fOutputLoopManager(loopManager),
635 fInputLoopManager(inputLM)
636{
638
640 fBranchData.reserve(fNSlots);
641 for (unsigned int slot = 0; slot < fNSlots; ++slot) {
642 fBranchData.emplace_back();
643 auto &thisSlot = fBranchData.back();
644 thisSlot.reserve(vbnames.size());
645 for (unsigned int i = 0; i < vbnames.size(); ++i) {
646 thisSlot.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
647 }
648 }
649}
650
651// Define special member methods here where the definition of all the data member types is available
656
658{
659 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
660 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */) {
661 const auto fileOpenMode = [&]() {
662 TString checkupdate = fOptions.fMode;
663 checkupdate.ToLower();
664 return checkupdate == "update" ? "updated" : "created";
665 }();
666 Warning("Snapshot",
667 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
668 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
669 "its result in a variable and for example calling the GetValue() method on it.",
670 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
671 }
672}
673
675{
676 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
677 if (!fOutputFiles[slot]) {
678 // first time this thread executes something, let's create a TBufferMerger output directory
679 fOutputFiles[slot] = fMerger->GetFile();
680 }
681 TDirectory *treeDirectory = fOutputFiles[slot].get();
682 if (!fDirName.empty()) {
683 // call returnExistingDirectory=true since MT can end up making this call multiple times
684 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
685 }
686 // re-create output tree as we need to create its branches again, with new input variables
687 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
688 fOutputTrees[slot] =
689 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
690 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
691 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
692 fOutputTrees[slot]->SetImplicitMT(false);
693 if (fOptions.fAutoFlush)
694 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
695 if (r) {
696 // We could be getting a task-local TTreeReader from the TTreeProcessorMT.
697 fInputTrees[slot] = r->GetTree();
698 } else if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource())) {
699 fInputTrees[slot] = treeDS->GetTree();
700 }
701 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
702}
703
705{
706 if (fOutputTrees[slot]->GetEntries() > 0)
707 fOutputFiles[slot]->Write();
708 for (auto &branchData : fBranchData[slot])
709 branchData.ClearBranchPointers(); // The branch pointers will go stale below
710 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
711 fOutputTrees[slot].reset(nullptr);
712}
713
714void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Exec(unsigned int slot, const std::vector<void *> &values)
715{
716 if (fBranchAddressesNeedReset[slot] == 0) {
717 UpdateCArraysPtrs(slot, values);
718 } else {
719 SetBranches(slot, values);
720 fBranchAddressesNeedReset[slot] = 0;
721 }
722 fOutputTrees[slot]->Fill();
723 auto entries = fOutputTrees[slot]->GetEntries();
724 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
725 if ((autoFlush > 0) && (entries % autoFlush == 0))
726 fOutputFiles[slot]->Write();
727}
728
730 const std::vector<void *> &values)
731{
732 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
733 // associated to those is re-allocated. As a result the value of the pointer can change therewith
734 // leaving associated to the branch of the output tree an invalid pointer.
735 // With this code, we set the value of the pointer in the output branch anew when needed.
736 assert(values.size() == fBranchData[slot].size());
737 auto nValues = values.size();
738 for (decltype(nValues) i{}; i < nValues; i++) {
739 auto &branchData = fBranchData[slot][i];
740 if (branchData.fIsCArray) {
741 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
742 // need its buffer, so we cast it and extract the address of the buffer
743 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
744 if (auto *data = rawRVec->data(); branchData.fBranchAddressForCArrays != data) {
745 // reset the branch address
746 branchData.fOutputBranch->SetAddress(data);
747 branchData.fBranchAddressForCArrays = data;
748 }
749 }
750 }
751}
752
754 const std::vector<void *> &values)
755{
756 // create branches in output tree
757 auto &branchData = fBranchData[slot];
758 assert(branchData.size() == values.size());
759 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
760 SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], branchData, i, fOptions.fBasketSize, values[i]);
761 }
762
764}
765
767{
768 void *dummyValueAddress{};
769 auto &branchData = fBranchData.front();
770 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
772 }
773}
774
776{
777 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
778 auto outFile =
779 std::unique_ptr<TFile>{TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs)};
780 if (!outFile)
781 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
782 fOutputFile = outFile.get();
783 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(outFile));
784}
785
787{
788
789 for (auto &file : fOutputFiles) {
790 if (file) {
791 file->Write();
792 file->Close();
793 }
794 }
795
796 // If there were no entries to fill the TTree with (either the input TTree was empty or no event passed after
797 // filtering), create an empty TTree in the output file and create the branches to preserve the schema
798 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
799 assert(fOutputFile && "Missing output file in Snapshot finalization.");
800 if (!fOutputFile->Get(fullTreeName.c_str())) {
801
802 // First find in which directory we need to write the output TTree
803 TDirectory *treeDirectory = fOutputFile;
804 if (!fDirName.empty()) {
805 treeDirectory = fOutputFile->mkdir(fDirName.c_str(), "", true);
806 }
808
809 // Create the output TTree and create the user-requested branches
810 auto outTree =
811 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
812 TTree *inputTree{};
813 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
814 inputTree = treeDS->GetTree();
815 SetEmptyBranches(inputTree, *outTree);
816
817 fOutputFile->Write();
818 }
819
820 // flush all buffers to disk by destroying the TBufferMerger
821 fOutputFiles.clear();
822 fMerger.reset();
823
824 // Now connect the data source to the loop manager so it can be used for further processing
825 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
826}
827
828/**
829 * \brief Create a new UntypedSnapshotTTreeHelperMT with a different output file name
830 *
831 * \param newName A type-erased string with the output file name
832 * \return UntypedSnapshotTTreeHelperMT
833 *
834 * This MakeNew implementation is tied to the cloning feature of actions
835 * of the computation graph. In particular, cloning a Snapshot node usually
836 * also involves changing the name of the output file, otherwise the cloned
837 * Snapshot would overwrite the same file.
838 */
841{
842 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
843 std::vector<std::string> inputBranchNames;
844 std::vector<std::string> outputBranchNames;
845 std::vector<bool> isDefine;
846 std::vector<const std::type_info *> inputColumnTypeIDs;
847 for (const auto &bd : fBranchData.front()) {
848 if (bd.fInputBranchName.empty())
849 break;
850 inputBranchNames.push_back(bd.fInputBranchName);
851 outputBranchNames.push_back(bd.fOutputBranchName);
852 isDefine.push_back(bd.fIsDefine);
853 inputColumnTypeIDs.push_back(bd.fInputTypeID);
854 }
855
857 finalName,
858 fDirName,
859 fTreeName,
860 std::move(inputBranchNames),
861 std::move(outputBranchNames),
862 fOptions,
863 std::move(isDefine),
864 fOutputLoopManager,
865 fInputLoopManager,
866 std::move(inputColumnTypeIDs)};
867}
868
870 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename,
871 const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options,
873 const std::vector<const std::type_info *> &colTypeIDs)
874 : fFileName(filename),
875 fDirName(dirname),
876 fNTupleName(ntuplename),
877 fOptions(options),
878 fInputLoopManager(inputLM),
879 fOutputLoopManager(outputLM),
880 fInputFieldNames(vfnames),
881 fOutputFieldNames(ReplaceDotWithUnderscore(fnames)),
882 fNSlots(nSlots),
883 fFillContexts(nSlots),
884 fEntries(nSlots),
885 fInputColumnTypeIDs(colTypeIDs)
886{
888}
889
890// Define special member methods here where the definition of all the data member types is available
895
897{
898 if (!fNTupleName.empty() /* not moved from */ && !fOutputFile /* did not run */ && fOptions.fLazy)
899 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
900}
901
903{
904 auto model = ROOT::RNTupleModel::CreateBare();
905 auto nFields = fOutputFieldNames.size();
906 fFieldTokens.resize(nFields);
907 for (decltype(nFields) i = 0; i < nFields; i++) {
908 // Need to retrieve the type of every field to create as a string
909 // If the input type for a field does not have RTTI, internally we store it as the tag UseNativeDataType. When
910 // that is detected, we need to ask the data source which is the type name based on the on-disk information.
911 const auto typeName = *fInputColumnTypeIDs[i] == typeid(ROOT::Internal::RDF::UseNativeDataType)
912 ? ROOT::Internal::RDF::GetTypeNameWithOpts(*fInputLoopManager->GetDataSource(),
913 fInputFieldNames[i], fOptions.fVector2RVec)
914 : ROOT::Internal::RDF::TypeID2TypeName(*fInputColumnTypeIDs[i]);
915 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], typeName).Unwrap());
916 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
917 }
918 model->Freeze();
919
921 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
922
923 fOutputFile.reset(TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
924 if (!fOutputFile)
925 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
926
927 TDirectory *outputDir = fOutputFile.get();
928 if (!fDirName.empty()) {
929 TString checkupdate = fOptions.fMode;
930 checkupdate.ToLower();
931 if (checkupdate == "update")
932 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
933 else
934 outputDir = fOutputFile->mkdir(fDirName.c_str());
935 }
936
937 // The RNTupleParallelWriter has exclusive access to the underlying TFile, no further synchronization is needed for
938 // calls to Fill() (in Exec) and FlushCluster() (in FinalizeTask).
939 fWriter = ROOT::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
940}
941
943{
944 if (!fFillContexts[slot]) {
945 fFillContexts[slot] = fWriter->CreateFillContext();
946 fEntries[slot] = fFillContexts[slot]->GetModel().CreateBareEntry();
947 }
948}
949
950void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Exec(unsigned int slot, const std::vector<void *> &values)
951{
952 auto &fillContext = fFillContexts[slot];
953 auto &outputEntry = fEntries[slot];
954 assert(values.size() == fFieldTokens.size());
955 for (decltype(values.size()) i = 0; i < values.size(); i++) {
956 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
957 }
958 fillContext->Fill(*outputEntry);
959}
960
962{
963 // In principle we would not need to flush a cluster here, but we want to benefit from parallelism for compression.
964 // NB: RNTupleFillContext::FlushCluster() is a nop if there is no new entry since the last flush.
965 fFillContexts[slot]->FlushCluster();
966}
967
969{
970 // First clear and destroy all entries, which were created from the RNTupleFillContexts.
971 fEntries.clear();
972 fFillContexts.clear();
973 // Then destroy the RNTupleParallelWriter and write the metadata.
974 fWriter.reset();
975 // We can now set the data source of the loop manager for the RDataFrame that is returned by the Snapshot call.
976 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName + "/" + fNTupleName, fFileName));
977}
978
979/**
980 * Create a new UntypedSnapshotRNTupleHelper with a different output file name.
981 *
982 * \param[in] newName A type-erased string with the output file name
983 * \return UntypedSnapshotRNTupleHelper
984 *
985 * This MakeNew implementation is tied to the cloning feature of actions
986 * of the computation graph. In particular, cloning a Snapshot node usually
987 * also involves changing the name of the output file, otherwise the cloned
988 * Snapshot would overwrite the same file.
989 */
992{
993 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
995 fNSlots, finalName, fDirName, fNTupleName, fInputFieldNames,
996 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
997}
998
999/*
1000 * ------------------------------------
1001 * Snapshot with systematic variations
1002 * ------------------------------------
1003 */
1004namespace ROOT::Internal::RDF {
1005/// An object to store an output file and a tree in one common place to share them between instances
1006/// of Snapshot with systematic uncertainties.
1008 std::unique_ptr<TFile> fFile;
1009 std::unique_ptr<TTree> fTree;
1010 std::string fDirectoryName;
1012
1013 // Bitmasks to indicate whether syst. uncertainties have been computed. Bound to TBranches, so need to be stable in
1014 // memory.
1015 struct Bitmask {
1016 std::string branchName;
1017 std::bitset<64> bitset{};
1018 std::unique_ptr<uint64_t> branchBuffer{new uint64_t{}};
1019 };
1020 std::vector<Bitmask> fBitMasks;
1021
1022 std::unordered_map<std::string, unsigned int> fBranchToVariationMapping;
1023 // The corresponding ROOT dictionary is declared in core/clingutils/src
1024 std::unordered_map<std::string, std::pair<std::string, unsigned int>> fBranchToBitmaskMapping;
1025 unsigned int fNBits = 0;
1026
1029 {
1030 if (!fBranchToBitmaskMapping.empty()) {
1031 fFile->WriteObject(&fBranchToBitmaskMapping,
1032 (std::string{"R_rdf_branchToBitmaskMapping_"} + fTree->GetName()).c_str());
1033 }
1034 if (fTree) {
1035 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1036 fTree->AutoSave("flushbaskets");
1037
1038 // Now connect the data source to the loop manager so it can be used for further processing
1039 std::string tree = fTree->GetName();
1040 if (!fDirectoryName.empty())
1041 tree = fDirectoryName + '/' + tree;
1042 std::string file = fFile->GetName();
1043
1044 fTree.reset();
1045 fFile.reset();
1046
1048 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(tree, file));
1049 }
1050 }
1051 SnapshotOutputWriter(SnapshotOutputWriter const &) = delete; // Anyway deleted because of the unique_ptrs
1054 delete; // Can be done, but need to make move-from object safe to destruct
1056
1057 /// Register a branch and corresponding systematic uncertainty.
1058 /// This will create an entry in the mapping from branch names to bitmasks, so the corresponding
1059 /// column can be masked if it doesn't contain valid entries. This mapping is written next to the
1060 /// tree into the output file.
1061 void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
1062 {
1063 if (auto it = fBranchToVariationMapping.find(branchName); it != fBranchToVariationMapping.end()) {
1064 if (variationIndex != it->second) {
1065 throw std::logic_error("Branch " + branchName +
1066 " is being registered with different variation index than the expected one: " +
1067 std::to_string(variationIndex));
1068 }
1069 return;
1070 }
1071
1072 // Neither branch nor systematic are known, so a new entry needs to be created
1073 fNBits = std::max(fNBits, variationIndex);
1074 const auto vectorIndex = variationIndex / 64u;
1075 const auto bitIndex = variationIndex % 64u;
1076
1077 // Create bitmask branches as long as necessary to capture the bit
1078 while (vectorIndex >= fBitMasks.size()) {
1079 std::string bitmaskBranchName =
1080 std::string{"R_rdf_mask_"} + fTree->GetName() + '_' + std::to_string(fBitMasks.size());
1082 fTree->Branch(bitmaskBranchName.c_str(), fBitMasks.back().branchBuffer.get());
1083 }
1084
1086 fBranchToBitmaskMapping[branchName] = std::make_pair(fBitMasks[vectorIndex].branchName, bitIndex);
1087 }
1088
1089 /// Clear all bits, as if none of the variations passed its filter.
1091 {
1092 for (auto &mask : fBitMasks)
1093 mask.bitset.reset();
1094 }
1095
1096 /// Set a bit signalling that the variation at `index` passed its filter.
1097 void SetMaskBit(unsigned int index)
1098 {
1099 const auto vectorIndex = index / 64;
1100 const auto bitIndex = index % 64;
1101 fBitMasks[vectorIndex].bitset.set(bitIndex, true);
1102 }
1103
1104 /// Test if any of the mask bits are set.
1105 bool MaskEmpty() const
1106 {
1107 return std::none_of(fBitMasks.begin(), fBitMasks.end(), [](Bitmask const &mask) { return mask.bitset.any(); });
1108 }
1109
1110 /// Write the current event and the bitmask to the output dataset.
1111 void Write() const
1112 {
1113 if (!fTree)
1114 throw std::runtime_error("The TTree associated to the Snapshot action doesn't exist, any more.");
1115
1116 for (auto const &mask : fBitMasks) {
1117 *mask.branchBuffer = mask.bitset.to_ullong();
1118 }
1119
1120 fTree->Fill();
1121 }
1122};
1123
1124} // namespace ROOT::Internal::RDF
1125
1127 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
1128 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
1130 const std::vector<const std::type_info *> &colTypeIDs)
1131 : fOptions(options), fInputLoopManager{inputLoopMgr}, fOutputLoopManager{outputLoopMgr}
1132{
1133 EnsureValidSnapshotTTreeOutput(fOptions, std::string(treename), std::string(filename));
1134
1136 fOutputHandle = std::make_shared<SnapshotOutputWriter>(
1137 TFile::Open(filename.data(), fOptions.fMode.c_str(), /*ftitle=*/"",
1139 if (!fOutputHandle->fFile)
1140 throw std::runtime_error(std::string{"Snapshot: could not create output file "} + std::string{filename});
1141
1142 TDirectory *outputDir = fOutputHandle->fFile.get();
1143 if (!dirname.empty()) {
1144 fOutputHandle->fDirectoryName = dirname;
1146 checkupdate.ToLower();
1147 if (checkupdate == "update")
1148 outputDir =
1149 fOutputHandle->fFile->mkdir(std::string{dirname}.c_str(), "", true); // do not overwrite existing directory
1150 else
1151 outputDir = fOutputHandle->fFile->mkdir(std::string{dirname}.c_str());
1152 }
1153
1154 fOutputHandle->fTree = std::make_unique<TTree>(std::string{treename}.c_str(), std::string{treename}.c_str(),
1155 fOptions.fSplitLevel, /*dir=*/outputDir);
1156 fOutputHandle->fOutputLoopManager = fOutputLoopManager;
1157 if (fOptions.fAutoFlush)
1158 fOutputHandle->fTree->SetAutoFlush(fOptions.fAutoFlush);
1159
1161
1162 fBranchData.reserve(vbnames.size());
1163 for (unsigned int i = 0; i < vbnames.size(); ++i) {
1164 fOutputHandle->RegisterBranch(outputBranchNames[i], 0);
1165 fBranchData.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
1166 }
1167}
1168
1169/// Register a new column as a variation of the column at `originalColumnIndex`, and clone its properties.
1170/// If a nominal column is registered here, it is written without changes, but it means that it will be masked
1171/// in case its selection cuts don't pass.
1172/// \param slot Task ID for MT runs.
1173/// \param columnIndex Index where the data of this column will be passed into the helper.
1174/// \param originalColumnIndex If the column being registered is a variation of a "nominal" column, this designates the
1175/// original.
1176/// Properties such as name and output type are cloned from the original.
1177/// \param variationName The variation that this column belongs to. If "nominal" is used, this column is considered as
1178/// the original.
1180 unsigned int columnIndex,
1181 unsigned int originalColumnIndex,
1182 unsigned int variationIndex,
1183 std::string const &variationName)
1184{
1186 fBranchData[columnIndex].fVariationIndex = variationIndex; // The base column has variations
1187 fOutputHandle->RegisterBranch(fBranchData[columnIndex].fOutputBranchName, variationIndex);
1188 } else if (columnIndex >= fBranchData.size()) {
1189 // First task, need to create branches
1190 fBranchData.resize(columnIndex + 1);
1191 auto &bd = fBranchData[columnIndex];
1192 bd = fBranchData[originalColumnIndex];
1193 std::string newOutputName = bd.fOutputBranchName + "__" + variationName;
1194 std::replace(newOutputName.begin(), newOutputName.end(), ':', '_');
1195 bd.fOutputBranchName = std::move(newOutputName);
1196 bd.fVariationIndex = variationIndex;
1197
1198 fOutputHandle->RegisterBranch(bd.fOutputBranchName, variationIndex);
1199 } else {
1200 assert(static_cast<unsigned int>(fBranchData[columnIndex].fVariationIndex) == variationIndex);
1201 }
1202}
1203
1204/// Bind all output branches to RDF columns for the given slots.
1206{
1207 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
1208 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
1209 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
1210 fInputTree = treeDS->GetTree();
1211
1212 // Create all output branches; and bind them to empty values
1213 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
1214 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1215 fBranchData[i].EmptyInstance(/*pointerToPointer=*/false));
1216 }
1217
1218 AssertNoNullBranchAddresses(fBranchData);
1219}
1220
1221/// Connect all output fields to the values pointed to by `values`, fill the output dataset,
1222/// call the Fill of the output tree, and clear the mask bits that show whether a variation was reached.
1223void ROOT::Internal::RDF::SnapshotHelperWithVariations::Exec(unsigned int /*slot*/, const std::vector<void *> &values,
1224 std::vector<bool> const &filterPassed)
1225{
1226 // Rebind branch pointers to RDF values
1227 assert(fBranchData.size() == values.size());
1228 for (std::size_t i = 0; i < values.size(); i++) {
1229 const auto variationIndex = fBranchData[i].fVariationIndex;
1230 if (variationIndex < 0) {
1231 // Branch without variations
1232 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1233 } else if (filterPassed[variationIndex]) {
1234 // Branch with variations
1235 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1236 if (!fundamentalType) {
1237 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1238 }
1239 fOutputHandle->SetMaskBit(variationIndex);
1240 }
1241 }
1242
1243 assert(!fOutputHandle->MaskEmpty()); // Exec should not have been called if nothing passes
1244
1245 fOutputHandle->Write();
1246 fOutputHandle->ClearMaskBits();
1247 for (auto &branchData : fBranchData) {
1248 branchData.ClearBranchContents();
1249 }
1250}
1251
1253{
1254 fOutputHandle.reset();
1255}
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
static TBranch * SearchForBranch(TTree *tree, const char *name)
Definition TTreePyz.cxx:50
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
void InitTask(TTreeReader *r, unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1526
A Branch for the case of an object.
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
Basic data type descriptor (datatype information is obtained from CINT).
Definition TDataType.h:44
Int_t GetType() const
Definition TDataType.h:68
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3764
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Mother of all ROOT objects.
Definition TObject.h:41
Basic string class.
Definition TString.h:138
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:297
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:397
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:342
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:627
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:206
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10094
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10088
std::vector< std::string > ColumnNames_t
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:344
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fCompressionLevel
Compression level of output file.