Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFSnapshotHelpers.cxx
Go to the documentation of this file.
1/**
2 \file RDFSnapshotHelpers.cxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \author Stephan Hageboeck
9 \date 2025-06
10*/
11
12/*************************************************************************
13 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
14 * All rights reserved. *
15 * *
16 * For the licensing terms see $ROOTSYS/LICENSE. *
17 * For the list of contributors see $ROOTSYS/README/CREDITS. *
18 *************************************************************************/
19
21
22#include <ROOT/REntry.hxx>
23#include <ROOT/RFieldToken.hxx>
24#include <ROOT/RNTuple.hxx>
25#include <ROOT/RNTupleDS.hxx>
28#include <ROOT/RTTreeDS.hxx>
30
31#include <TBranchObject.h>
32#include <TClassEdit.h>
33#include <TDictionary.h>
34#include <TDataType.h>
35#include <TFile.h>
36#include <TLeaf.h>
37#include <TTreeReader.h>
38
39#include <algorithm>
40#include <type_traits>
41#include <utility>
42
44// Maintaining the following allows for faster vector resize:
45static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
46static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
47
48namespace {
49
50void AssertNoNullBranchAddresses(const std::vector<RBranchData> &branches)
51{
52 std::vector<TBranch *> branchesWithNullAddress;
53 for (const auto &branchData : branches) {
54 if (branchData.fOutputBranch->GetAddress() == nullptr)
55 branchesWithNullAddress.push_back(branchData.fOutputBranch);
56 }
57
58 if (branchesWithNullAddress.empty())
59 return;
60
61 // otherwise build error message and throw
62 std::vector<std::string> missingBranchNames;
64 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
65 std::string msg = "RDataFrame::Snapshot:";
66 if (missingBranchNames.size() == 1) {
67 msg += " branch " + missingBranchNames[0] +
68 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
69 "it is";
70 } else {
71 msg += " branches ";
72 for (const auto &bName : missingBranchNames)
73 msg += bName + ", ";
74 msg.resize(msg.size() - 2); // remove last ", "
75 msg += " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
76 }
77 msg += " not part of the set of branches that are being written out.";
78 throw std::runtime_error(msg);
79}
80
81TBranch *SearchForBranch(TTree *inputTree, const std::string &branchName)
82{
83 if (inputTree) {
84 if (auto *getBranchRes = inputTree->GetBranch(branchName.c_str()))
85 return getBranchRes;
86
87 // try harder
88 if (auto *findBranchRes = inputTree->FindBranch(branchName.c_str()))
89 return findBranchRes;
90 }
91 return nullptr;
92}
93
94std::vector<RBranchData>::iterator CreateCStyleArrayBranch(TTree &outputTree, std::vector<RBranchData> &outputBranches,
95 std::vector<RBranchData>::iterator thisBranch,
96 TBranch *inputBranch, int basketSize, void *address)
97{
98 if (!inputBranch)
99 return thisBranch;
100 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
101 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
102 return thisBranch;
103 // must construct the leaflist for the output branch and create the branch in the output tree
104 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
105 if (!leaf)
106 return thisBranch;
107 const auto bname = leaf->GetName();
108 auto *sizeLeaf = leaf->GetLeafCount();
109 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
110
111 // We proceed only if branch is a fixed-or-variable-sized array
112 if (sizeLeaf || leaf->GetLenStatic() > 1) {
113 if (sizeLeaf) {
114 // The array branch `bname` has dynamic size stored in leaf `sizeLeafName`, so we need to ensure that it's
115 // in the output tree.
116 auto sizeLeafIt =
117 std::find_if(outputBranches.begin(), outputBranches.end(),
118 [&sizeLeafName](const RBranchData &bd) { return bd.fOutputBranchName == sizeLeafName; });
119 if (sizeLeafIt == outputBranches.end()) {
120 // The size leaf is not part of the output branches yet, so emplace an empty slot for it.
121 // This means that iterators need to be updated in case the container reallocates.
122 const auto indexBeforeEmplace = std::distance(outputBranches.begin(), thisBranch);
123 outputBranches.emplace_back("", sizeLeafName, /*isDefine=*/false, /*typeID=*/nullptr);
126 }
127 if (!sizeLeafIt->fOutputBranch) {
128 // The size leaf was emplaced, but not initialised yet
130 // Use original basket size for existing branches otherwise use custom basket size.
131 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
132 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for
133 // `sizeLeafName`
134 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
135 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
136 sizeLeafIt->fOutputBranch = outputBranch;
137 }
138 }
139
140 const auto btype = leaf->GetTypeName();
142 if (rootbtype == ' ') {
143 Warning("Snapshot",
144 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
145 "leaf is of type '%s'. This column will not be written out.",
146 bname, btype);
147 return thisBranch;
148 }
149
150 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
151 // Use original basket size for existing branches and new basket size for new branches
152 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
153 void *addressForBranch = [address]() -> void * {
154 if (address) {
155 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we need
156 // its buffer, so we cast it and extract the address of the buffer
157 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(address);
158 return rawRVec->data();
159 }
160 return nullptr;
161 }();
162 thisBranch->fOutputBranch =
163 outputTree.Branch(thisBranch->fOutputBranchName.c_str(), addressForBranch, leaflist.c_str(), bufSize);
164 thisBranch->fOutputBranch->SetTitle(inputBranch->GetTitle());
165 thisBranch->fIsCArray = true;
166 }
167
168 return thisBranch;
169}
170
171void SetBranchAddress(TBranch *inputBranch, RBranchData &branchData, void *valueAddress)
172{
173 const static TClassRef TBOClRef("TBranchObject");
174 if (inputBranch && inputBranch->IsA() == TBOClRef) {
175 branchData.fOutputBranch->SetAddress(reinterpret_cast<void **>(inputBranch->GetAddress()));
176 } else if (branchData.fOutputBranch->IsA() != TBranch::Class()) {
177 // This is a relatively rare case of a fixed-size array getting redefined
178 branchData.fBranchAddressForCArrays = valueAddress;
179 branchData.fOutputBranch->SetAddress(&branchData.fBranchAddressForCArrays);
180 } else {
181 void *correctAddress = [valueAddress, isCArray = branchData.fIsCArray]() -> void * {
182 if (isCArray) {
183 // Address here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
184 // need its buffer, so we cast it and extract the address of the buffer
185 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
186 return rawRVec->data();
187 }
188 return valueAddress;
189 }();
190 branchData.fOutputBranch->SetAddress(correctAddress);
191 branchData.fBranchAddressForCArrays = valueAddress;
192 }
193}
194
196{
197 // Logic taken from
198 // TTree::BranchImpRef(
199 // const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize, Int_t splitlevel)
201 if (rootTypeChar == ' ') {
202 Warning("Snapshot",
203 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
204 "column will not be written out.",
205 bd.fOutputBranchName.c_str());
206 return;
207 }
208 std::string leafList{bd.fOutputBranchName + '/' + rootTypeChar};
209 bd.fOutputBranch = outputTree.Branch(bd.fOutputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize);
210}
211
212/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the
213/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case
214/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
216 const std::string &fileName)
217{
218 TString fileMode = opts.fMode;
219 fileMode.ToLower();
220 if (fileMode != "update")
221 return;
222
223 // output file opened in "update" mode: must check whether output TTree is already present in file
224 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
225 if (!outFile || outFile->IsZombie())
226 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
227
228 TObject *outTree = outFile->Get(treeName.c_str());
229 if (outTree == nullptr)
230 return;
231
232 // object called treeName is already present in the file
233 if (opts.fOverwriteIfExists) {
234 if (outTree->InheritsFrom("TTree")) {
235 static_cast<TTree *>(outTree)->Delete("all");
236 } else {
237 outFile->Delete(treeName.c_str());
238 }
239 } else {
240 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
241 "\". If you want to delete the original tree and write another, please set "
242 "RSnapshotOptions::fOverwriteIfExists to true.";
243 throw std::invalid_argument(msg);
244 }
245}
246
247/// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the
248/// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case
249/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
251 const std::string &fileName)
252{
253 TString fileMode = opts.fMode;
254 fileMode.ToLower();
255 if (fileMode != "update")
256 return;
257
258 // output file opened in "update" mode: must check whether output RNTuple is already present in file
259 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
260 if (!outFile || outFile->IsZombie())
261 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
262
263 auto *outNTuple = outFile->Get<ROOT::RNTuple>(ntupleName.c_str());
264
265 if (outNTuple) {
266 if (opts.fOverwriteIfExists) {
267 outFile->Delete((ntupleName + ";*").c_str());
268 return;
269 } else {
270 const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName +
271 "\". If you want to delete the original ntuple and write another, please set "
272 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
273 throw std::invalid_argument(msg);
274 }
275 }
276
277 // Also check if there is any object other than an RNTuple with the provided ntupleName.
278 TObject *outObj = outFile->Get(ntupleName.c_str());
279
280 if (!outObj)
281 return;
282
283 // An object called ntupleName is already present in the file.
284 if (opts.fOverwriteIfExists) {
285 if (auto tree = dynamic_cast<TTree *>(outObj)) {
286 tree->Delete("all");
287 } else {
288 outFile->Delete((ntupleName + ";*").c_str());
289 }
290 } else {
291 const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName +
292 "\". If you want to delete the original object and write a new RNTuple, please set "
293 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
294 throw std::invalid_argument(msg);
295 }
296}
297
299 std::vector<ROOT::Internal::RDF::RBranchData> &allBranchData, std::size_t currentIndex,
300 int basketSize, void *valueAddress)
301{
303 auto *inputBranch = branchData->fIsDefine ? nullptr : SearchForBranch(inputTree, branchData->fInputBranchName);
304
305 if (branchData->fOutputBranch && valueAddress) {
306 // The output branch was already created, we just need to (re)set its address
307 SetBranchAddress(inputBranch, *branchData, valueAddress);
308 return;
309 }
310
311 // Respect the original bufsize and splitlevel arguments
312 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
313 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
314 // TObject branch, see https://bit.ly/2EjLMId ).
315 // A user-provided basket size value takes precedence.
316 const auto bufSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
317 const auto splitLevel = inputBranch ? inputBranch->GetSplitLevel() : 99;
318
319 auto *dictionary = TDictionary::GetDictionary(*branchData->fInputTypeID);
320 if (dynamic_cast<TDataType *>(dictionary)) {
321 // Branch of fundamental type
323 return;
324 }
325
326 if (!branchData->fIsDefine) {
327 // Cases where we need a leaflist (e.g. C-style arrays)
328 // We only enter this code path if the input value does not come from a Define/Redefine. In those cases, it is
329 // not allowed to create a column of C-style array type, so that can't happen when writing the TTree. This is
330 // currently what prevents writing the wrong branch output type in a scenario where the input branch of the TTree
331 // is a C-style array and then the user is Redefining it with some other type (e.g. a ROOT::RVec).
333 }
334 if (branchData->fOutputBranch) {
335 // A branch was created in the previous function call
336 if (valueAddress) {
337 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
338 // need its buffer, so we cast it and extract the address of the buffer
339 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(valueAddress);
340 branchData->fBranchAddressForCArrays = rawRVec->data();
341 }
342 return;
343 }
344
345 if (auto *classPtr = dynamic_cast<TClass *>(dictionary)) {
346 // Case of unsplit object with polymorphic type
347 if (inputBranch && dynamic_cast<TBranchObject *>(inputBranch) && valueAddress)
348 branchData->fOutputBranch =
350 inputBranch->GetAddress(), bufSize, splitLevel);
351 // General case, with valid address
352 else if (valueAddress)
354 outputTree, branchData->fOutputBranchName.c_str(), classPtr, TDataType::GetType(*branchData->fInputTypeID),
356 // No value was passed, we're just creating a hollow branch to populate the dataset schema
357 else
358 branchData->fOutputBranch =
359 outputTree.Branch(branchData->fOutputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
360 return;
361 }
362
363 // We are not aware of other cases
364 throw std::logic_error(
365 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
366}
367} // namespace
368
370 const std::type_info *typeID)
371 : fInputBranchName{std::move(inputBranchName)},
372 fOutputBranchName{std::move(outputBranchName)},
373 fInputTypeID{typeID},
374 fIsDefine{isDefine}
375{
377 if (auto datatype = dynamic_cast<TDataType *>(dictionary); datatype) {
379 } else if (auto tclass = dynamic_cast<TClass *>(dictionary); tclass) {
380 fTypeData = EmptyDynamicType{tclass};
381 }
382}
383
384/// @brief Return a pointer to an empty instance of the type represented by this branch.
385/// For fundamental types, this is simply an 8-byte region of zeroes. For classes, it is an instance created with
386/// TClass::New.
387/// @param pointerToPointer Return a pointer to a pointer, so it can be used directly in TTree::SetBranchAddress().
389{
390 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
391 assert(!pointerToPointer); // Not used for fundamental types
392 return fundamental->fBytes.data();
393 }
394
395 auto &dynamic = std::get<EmptyDynamicType>(fTypeData);
396 if (!dynamic.fEmptyInstance) {
397 auto *dictionary = TDictionary::GetDictionary(*fInputTypeID);
398 assert(dynamic_cast<TDataType *>(dictionary) ==
399 nullptr); // TDataType should be handled by writing into the local buffer
400
401 auto tclass = dynamic_cast<TClass *>(dictionary);
402 assert(tclass);
403 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
404 }
405
406 if (pointerToPointer) {
407 // Make TTree happy (needs a pointer to pointer):
408 dynamic.fRawPtrToEmptyInstance = dynamic.fEmptyInstance.get();
409 return &dynamic.fRawPtrToEmptyInstance;
410 } else {
411 return dynamic.fEmptyInstance.get();
412 }
413}
414
415/// Point the branch address to an empty instance of the type represented by this branch
416/// or write null bytes into the space used by the fundamental type.
417/// This is used in case of variations, when certain defines/actions don't execute. We
418/// nevertheless need to write something, so we point the branch to an empty instance.
420{
421 if (!fOutputBranch)
422 return;
423
424 if (auto fundamental = std::get_if<FundamentalType>(&fTypeData); fundamental) {
425 fundamental->fBytes.fill(std::byte{0});
426 } else {
427 // TTree expects pointer to pointer, to figure out who allocates the object:
428 fOutputBranch->SetAddress(EmptyInstance(/*pointerToPointer=*/true));
429 }
430}
431
433 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
434 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
436 const std::vector<const std::type_info *> &colTypeIDs)
437 : fFileName(filename),
438 fDirName(dirname),
439 fTreeName(treename),
440 fOptions(options),
441 fOutputLoopManager(loopManager),
442 fInputLoopManager(inputLM)
443{
445
447 fBranchData.reserve(vbnames.size());
448 for (unsigned int i = 0; i < vbnames.size(); ++i) {
449 fBranchData.emplace_back(vbnames[i], std::move(outputBranchNames[i]), isDefine[i], colTypeIDs[i]);
450 }
451}
452
453// Define special member methods here where the definition of all the data member types is available
457 ROOT::Internal::RDF::UntypedSnapshotTTreeHelper &&) noexcept = default;
458
460{
461 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy) {
462 const auto fileOpenMode = [&]() {
463 TString checkupdate = fOptions.fMode;
464 checkupdate.ToLower();
465 return checkupdate == "update" ? "updated" : "created";
466 }();
467 Warning("Snapshot",
468 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
469 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
470 "its result in a variable and for example calling the GetValue() method on it.",
471 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
472 }
473}
474
476{
477 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
478 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
479 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
480 fInputTree = treeDS->GetTree();
481 fBranchAddressesNeedReset = true;
482}
483
484void ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::Exec(unsigned int, const std::vector<void *> &values)
485{
486 if (!fBranchAddressesNeedReset) {
487 UpdateCArraysPtrs(values);
488 } else {
489 SetBranches(values);
490 fBranchAddressesNeedReset = false;
491 }
492
493 fOutputTree->Fill();
494}
495
497{
498 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
499 // associated to those is re-allocated. As a result the value of the pointer can change therewith
500 // leaving associated to the branch of the output tree an invalid pointer.
501 // With this code, we set the value of the pointer in the output branch anew when needed.
502 assert(values.size() == fBranchData.size());
503 auto nValues = values.size();
504 for (decltype(nValues) i{}; i < nValues; i++) {
505 if (fBranchData[i].fIsCArray) {
506 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
507 // need its buffer, so we cast it and extract the address of the buffer
508 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
509 if (auto *data = rawRVec->data(); fBranchData[i].fBranchAddressForCArrays != data) {
510 fBranchData[i].fOutputBranch->SetAddress(data);
511 fBranchData[i].fBranchAddressForCArrays = data;
512 }
513 }
514 }
515}
516
518{
519 // create branches in output tree
520 assert(fBranchData.size() == values.size());
521 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
522 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
523 }
524 AssertNoNullBranchAddresses(fBranchData);
525}
526
528{
529 void *dummyValueAddress{};
530 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
531 SetBranchesHelper(inputTree, outputTree, fBranchData, i, fOptions.fBasketSize, dummyValueAddress);
532 }
533}
534
536{
537 fOutputFile.reset(
538 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
539 ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel)));
540 if (!fOutputFile)
541 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
542
543 TDirectory *outputDir = fOutputFile.get();
544 if (!fDirName.empty()) {
545 TString checkupdate = fOptions.fMode;
546 checkupdate.ToLower();
547 if (checkupdate == "update")
548 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
549 else
550 outputDir = fOutputFile->mkdir(fDirName.c_str());
551 }
552
553 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
554
555 if (fOptions.fAutoFlush)
556 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
557}
558
560{
561 assert(fOutputTree != nullptr);
562 assert(fOutputFile != nullptr);
563
564 // There were no entries to fill the TTree with (either the input TTree was empty or no event passed after
565 // filtering). We have already created an empty TTree, now also create the branches to preserve the schema
566 if (fOutputTree->GetEntries() == 0) {
567 SetEmptyBranches(fInputTree, *fOutputTree);
568 }
569 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
570 fOutputTree->AutoSave("flushbaskets");
571 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
572 fOutputTree.reset();
573 fOutputFile->Close();
574
575 // Now connect the data source to the loop manager so it can be used for further processing
576 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
577 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
578}
579
580/**
581 * \brief Create a new UntypedSnapshotTTreeHelper with a different output file name
582 *
583 * \param newName A type-erased string with the output file name
584 * \return UntypedSnapshotTTreeHelper
585 *
586 * This MakeNew implementation is tied to the cloning feature of actions
587 * of the computation graph. In particular, cloning a Snapshot node usually
588 * also involves changing the name of the output file, otherwise the cloned
589 * Snapshot would overwrite the same file.
590 */
593{
594 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
595 std::vector<std::string> inputBranchNames;
596 std::vector<std::string> outputBranchNames;
597 std::vector<bool> isDefine;
598 std::vector<const std::type_info *> inputColumnTypeIDs;
599 for (const auto &bd : fBranchData) {
600 if (bd.fInputBranchName.empty())
601 break;
602 inputBranchNames.push_back(bd.fInputBranchName);
603 outputBranchNames.push_back(bd.fOutputBranchName);
604 isDefine.push_back(bd.fIsDefine);
605 inputColumnTypeIDs.push_back(bd.fInputTypeID);
606 }
607
609 fDirName,
610 fTreeName,
611 std::move(inputBranchNames),
612 std::move(outputBranchNames),
613 fOptions,
614 std::move(isDefine),
615 fOutputLoopManager,
616 fInputLoopManager,
618}
619
621 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename,
622 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
624 const std::vector<const std::type_info *> &colTypeIDs)
625 : fNSlots(nSlots),
626 fOutputFiles(fNSlots),
627 fOutputTrees(fNSlots),
628 fBranchAddressesNeedReset(fNSlots, 1),
629 fInputTrees(fNSlots),
630 fFileName(filename),
631 fDirName(dirname),
632 fTreeName(treename),
633 fOptions(options),
634 fOutputLoopManager(loopManager),
635 fInputLoopManager(inputLM)
636{
638
640 fBranchData.reserve(fNSlots);
641 for (unsigned int slot = 0; slot < fNSlots; ++slot) {
642 fBranchData.emplace_back();
643 auto &thisSlot = fBranchData.back();
644 thisSlot.reserve(vbnames.size());
645 for (unsigned int i = 0; i < vbnames.size(); ++i) {
646 thisSlot.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
647 }
648 }
649}
650
651// Define special member methods here where the definition of all the data member types is available
656
658{
659 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
660 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */) {
661 const auto fileOpenMode = [&]() {
662 TString checkupdate = fOptions.fMode;
663 checkupdate.ToLower();
664 return checkupdate == "update" ? "updated" : "created";
665 }();
666 Warning("Snapshot",
667 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
668 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
669 "its result in a variable and for example calling the GetValue() method on it.",
670 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
671 }
672}
673
675{
676 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
677 if (!fOutputFiles[slot]) {
678 // first time this thread executes something, let's create a TBufferMerger output directory
679 fOutputFiles[slot] = fMerger->GetFile();
680 }
681 TDirectory *treeDirectory = fOutputFiles[slot].get();
682 if (!fDirName.empty()) {
683 // call returnExistingDirectory=true since MT can end up making this call multiple times
684 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
685 }
686 // re-create output tree as we need to create its branches again, with new input variables
687 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
688 fOutputTrees[slot] =
689 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
690 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
691 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
692 fOutputTrees[slot]->SetImplicitMT(false);
693 if (fOptions.fAutoFlush)
694 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
695 if (r) {
696 // We could be getting a task-local TTreeReader from the TTreeProcessorMT.
697 fInputTrees[slot] = r->GetTree();
698 } else if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource())) {
699 fInputTrees[slot] = treeDS->GetTree();
700 }
701 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
702}
703
705{
706 if (fOutputTrees[slot]->GetEntries() > 0)
707 fOutputFiles[slot]->Write();
708 for (auto &branchData : fBranchData[slot])
709 branchData.ClearBranchPointers(); // The branch pointers will go stale below
710 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
711 fOutputTrees[slot].reset(nullptr);
712}
713
714void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Exec(unsigned int slot, const std::vector<void *> &values)
715{
716 if (fBranchAddressesNeedReset[slot] == 0) {
717 UpdateCArraysPtrs(slot, values);
718 } else {
719 SetBranches(slot, values);
720 fBranchAddressesNeedReset[slot] = 0;
721 }
722 fOutputTrees[slot]->Fill();
723 auto entries = fOutputTrees[slot]->GetEntries();
724 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
725 if ((autoFlush > 0) && (entries % autoFlush == 0))
726 fOutputFiles[slot]->Write();
727}
728
730 const std::vector<void *> &values)
731{
732 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
733 // associated to those is re-allocated. As a result the value of the pointer can change therewith
734 // leaving associated to the branch of the output tree an invalid pointer.
735 // With this code, we set the value of the pointer in the output branch anew when needed.
736 assert(values.size() == fBranchData[slot].size());
737 auto nValues = values.size();
738 for (decltype(nValues) i{}; i < nValues; i++) {
739 auto &branchData = fBranchData[slot][i];
740 if (branchData.fIsCArray) {
741 // valueAddress here points to a ROOT::RVec<std::byte> coming from RTreeUntypedArrayColumnReader. We know we
742 // need its buffer, so we cast it and extract the address of the buffer
743 auto *rawRVec = reinterpret_cast<ROOT::RVec<std::byte> *>(values[i]);
744 if (auto *data = rawRVec->data(); branchData.fBranchAddressForCArrays != data) {
745 // reset the branch address
746 branchData.fOutputBranch->SetAddress(data);
747 branchData.fBranchAddressForCArrays = data;
748 }
749 }
750 }
751}
752
754 const std::vector<void *> &values)
755{
756 // create branches in output tree
757 auto &branchData = fBranchData[slot];
758 assert(branchData.size() == values.size());
759 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
760 SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], branchData, i, fOptions.fBasketSize, values[i]);
761 }
762
764}
765
767{
768 void *dummyValueAddress{};
769 auto &branchData = fBranchData.front();
770 for (std::size_t i = 0; i < branchData.size(); i++) { // branchData can grow due to insertions
772 }
773}
774
776{
777 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
778 auto outFile =
779 std::unique_ptr<TFile>{TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs)};
780 if (!outFile)
781 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
782 fOutputFile = outFile.get();
783 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(outFile));
784}
785
787{
788
789 for (auto &file : fOutputFiles) {
790 if (file) {
791 file->Write();
792 file->Close();
793 }
794 }
795
796 // If there were no entries to fill the TTree with (either the input TTree was empty or no event passed after
797 // filtering), create an empty TTree in the output file and create the branches to preserve the schema
798 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
799 assert(fOutputFile && "Missing output file in Snapshot finalization.");
800 if (!fOutputFile->Get(fullTreeName.c_str())) {
801
802 // First find in which directory we need to write the output TTree
803 TDirectory *treeDirectory = fOutputFile;
804 if (!fDirName.empty()) {
805 treeDirectory = fOutputFile->mkdir(fDirName.c_str(), "", true);
806 }
808
809 // Create the output TTree and create the user-requested branches
810 auto outTree =
811 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
812 TTree *inputTree{};
813 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
814 inputTree = treeDS->GetTree();
815 SetEmptyBranches(inputTree, *outTree);
816
817 fOutputFile->Write();
818 }
819
820 // flush all buffers to disk by destroying the TBufferMerger
821 fOutputFiles.clear();
822 fMerger.reset();
823
824 // Now connect the data source to the loop manager so it can be used for further processing
825 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
826}
827
828/**
829 * \brief Create a new UntypedSnapshotTTreeHelperMT with a different output file name
830 *
831 * \param newName A type-erased string with the output file name
832 * \return UntypedSnapshotTTreeHelperMT
833 *
834 * This MakeNew implementation is tied to the cloning feature of actions
835 * of the computation graph. In particular, cloning a Snapshot node usually
836 * also involves changing the name of the output file, otherwise the cloned
837 * Snapshot would overwrite the same file.
838 */
841{
842 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
843 std::vector<std::string> inputBranchNames;
844 std::vector<std::string> outputBranchNames;
845 std::vector<bool> isDefine;
846 std::vector<const std::type_info *> inputColumnTypeIDs;
847 for (const auto &bd : fBranchData.front()) {
848 if (bd.fInputBranchName.empty())
849 break;
850 inputBranchNames.push_back(bd.fInputBranchName);
851 outputBranchNames.push_back(bd.fOutputBranchName);
852 isDefine.push_back(bd.fIsDefine);
853 inputColumnTypeIDs.push_back(bd.fInputTypeID);
854 }
855
857 finalName,
858 fDirName,
859 fTreeName,
860 std::move(inputBranchNames),
861 std::move(outputBranchNames),
862 fOptions,
863 std::move(isDefine),
864 fOutputLoopManager,
865 fInputLoopManager,
866 std::move(inputColumnTypeIDs)};
867}
868
870 unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename,
871 const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options,
873 const std::vector<const std::type_info *> &colTypeIDs)
874 : fFileName(filename),
875 fDirName(dirname),
876 fNTupleName(ntuplename),
877 fOptions(options),
878 fInputLoopManager(inputLM),
879 fOutputLoopManager(outputLM),
880 fInputFieldNames(vfnames),
881 fOutputFieldNames(ReplaceDotWithUnderscore(fnames)),
882 fNSlots(nSlots),
883 fFillContexts(nSlots),
884 fEntries(nSlots),
885 fInputColumnTypeIDs(colTypeIDs)
886{
888}
889
890// Define special member methods here where the definition of all the data member types is available
895
897{
898 if (!fNTupleName.empty() /* not moved from */ && !fOutputFile /* did not run */ && fOptions.fLazy)
899 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
900}
901
903{
904 auto model = ROOT::RNTupleModel::CreateBare();
905 auto nFields = fOutputFieldNames.size();
906 fFieldTokens.resize(nFields);
907 for (decltype(nFields) i = 0; i < nFields; i++) {
908 // Need to retrieve the type of every field to create as a string
909 // If the input type for a field does not have RTTI, internally we store it as the tag UseNativeDataType. When
910 // that is detected, we need to ask the data source which is the type name based on the on-disk information.
911 const auto typeName = *fInputColumnTypeIDs[i] == typeid(ROOT::Internal::RDF::UseNativeDataType)
912 ? ROOT::Internal::RDF::GetTypeNameWithOpts(*fInputLoopManager->GetDataSource(),
913 fInputFieldNames[i], fOptions.fVector2RVec)
914 : ROOT::Internal::RDF::TypeID2TypeName(*fInputColumnTypeIDs[i]);
915 model->AddField(ROOT::RFieldBase::Create(fOutputFieldNames[i], typeName).Unwrap());
916 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
917 }
918 model->Freeze();
919
921 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
922 writeOptions.SetInitialUnzippedPageSize(fOptions.fInitialUnzippedPageSize);
923 writeOptions.SetMaxUnzippedPageSize(fOptions.fMaxUnzippedPageSize);
924 writeOptions.SetApproxZippedClusterSize(fOptions.fApproxZippedClusterSize);
925 writeOptions.SetMaxUnzippedClusterSize(fOptions.fMaxUnzippedClusterSize);
926 writeOptions.SetEnablePageChecksums(fOptions.fEnablePageChecksums);
927 writeOptions.SetEnableSamePageMerging(fOptions.fEnableSamePageMerging);
928
929 fOutputFile.reset(TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
930 if (!fOutputFile)
931 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
932
933 TDirectory *outputDir = fOutputFile.get();
934 if (!fDirName.empty()) {
935 TString checkupdate = fOptions.fMode;
936 checkupdate.ToLower();
937 if (checkupdate == "update")
938 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
939 else
940 outputDir = fOutputFile->mkdir(fDirName.c_str());
941 }
942
943 // The RNTupleParallelWriter has exclusive access to the underlying TFile, no further synchronization is needed for
944 // calls to Fill() (in Exec) and FlushCluster() (in FinalizeTask).
945 fWriter = ROOT::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
946}
947
949{
950 if (!fFillContexts[slot]) {
951 fFillContexts[slot] = fWriter->CreateFillContext();
952 fEntries[slot] = fFillContexts[slot]->GetModel().CreateBareEntry();
953 }
954}
955
956void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Exec(unsigned int slot, const std::vector<void *> &values)
957{
958 auto &fillContext = fFillContexts[slot];
959 auto &outputEntry = fEntries[slot];
960 assert(values.size() == fFieldTokens.size());
961 for (decltype(values.size()) i = 0; i < values.size(); i++) {
962 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
963 }
964 fillContext->Fill(*outputEntry);
965}
966
968{
969 // In principle we would not need to flush a cluster here, but we want to benefit from parallelism for compression.
970 // NB: RNTupleFillContext::FlushCluster() is a nop if there is no new entry since the last flush.
971 fFillContexts[slot]->FlushCluster();
972}
973
975{
976 // First clear and destroy all entries, which were created from the RNTupleFillContexts.
977 fEntries.clear();
978 fFillContexts.clear();
979 // Then destroy the RNTupleParallelWriter and write the metadata.
980 fWriter.reset();
981 // We can now set the data source of the loop manager for the RDataFrame that is returned by the Snapshot call.
982 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName + "/" + fNTupleName, fFileName));
983}
984
985/**
986 * Create a new UntypedSnapshotRNTupleHelper with a different output file name.
987 *
988 * \param[in] newName A type-erased string with the output file name
989 * \return UntypedSnapshotRNTupleHelper
990 *
991 * This MakeNew implementation is tied to the cloning feature of actions
992 * of the computation graph. In particular, cloning a Snapshot node usually
993 * also involves changing the name of the output file, otherwise the cloned
994 * Snapshot would overwrite the same file.
995 */
998{
999 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1001 fNSlots, finalName, fDirName, fNTupleName, fInputFieldNames,
1002 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
1003}
1004
1005/*
1006 * ------------------------------------
1007 * Snapshot with systematic variations
1008 * ------------------------------------
1009 */
1010namespace ROOT::Internal::RDF {
1011/// An object to store an output file and a tree in one common place to share them between instances
1012/// of Snapshot with systematic uncertainties.
1014 std::unique_ptr<TFile> fFile;
1015 std::unique_ptr<TTree> fTree;
1016 std::string fDirectoryName;
1018
1019 // Bitmasks to indicate whether syst. uncertainties have been computed. Bound to TBranches, so need to be stable in
1020 // memory.
1021 struct Bitmask {
1022 std::string branchName;
1023 std::bitset<64> bitset{};
1024 std::unique_ptr<uint64_t> branchBuffer{new uint64_t{}};
1025 };
1026 std::vector<Bitmask> fBitMasks;
1027
1028 std::unordered_map<std::string, unsigned int> fBranchToVariationMapping;
1029 // The corresponding ROOT dictionary is declared in core/clingutils/src
1030 std::unordered_map<std::string, std::pair<std::string, unsigned int>> fBranchToBitmaskMapping;
1031 unsigned int fNBits = 0;
1032
1035 {
1036 if (!fBranchToBitmaskMapping.empty()) {
1037 fFile->WriteObject(&fBranchToBitmaskMapping,
1038 (std::string{"R_rdf_branchToBitmaskMapping_"} + fTree->GetName()).c_str());
1039 }
1040 if (fTree) {
1041 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1042 fTree->AutoSave("flushbaskets");
1043
1044 // Now connect the data source to the loop manager so it can be used for further processing
1045 std::string tree = fTree->GetName();
1046 if (!fDirectoryName.empty())
1047 tree = fDirectoryName + '/' + tree;
1048 std::string file = fFile->GetName();
1049
1050 fTree.reset();
1051 fFile.reset();
1052
1054 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(tree, file));
1055 }
1056 }
1057 SnapshotOutputWriter(SnapshotOutputWriter const &) = delete; // Anyway deleted because of the unique_ptrs
1060 delete; // Can be done, but need to make move-from object safe to destruct
1062
1063 /// Register a branch and corresponding systematic uncertainty.
1064 /// This will create an entry in the mapping from branch names to bitmasks, so the corresponding
1065 /// column can be masked if it doesn't contain valid entries. This mapping is written next to the
1066 /// tree into the output file.
1067 void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
1068 {
1069 if (auto it = fBranchToVariationMapping.find(branchName); it != fBranchToVariationMapping.end()) {
1070 if (variationIndex != it->second) {
1071 throw std::logic_error("Branch " + branchName +
1072 " is being registered with different variation index than the expected one: " +
1073 std::to_string(variationIndex));
1074 }
1075 return;
1076 }
1077
1078 // Neither branch nor systematic are known, so a new entry needs to be created
1079 fNBits = std::max(fNBits, variationIndex);
1080 const auto vectorIndex = variationIndex / 64u;
1081 const auto bitIndex = variationIndex % 64u;
1082
1083 // Create bitmask branches as long as necessary to capture the bit
1084 while (vectorIndex >= fBitMasks.size()) {
1085 std::string bitmaskBranchName =
1086 std::string{"R_rdf_mask_"} + fTree->GetName() + '_' + std::to_string(fBitMasks.size());
1088 fTree->Branch(bitmaskBranchName.c_str(), fBitMasks.back().branchBuffer.get());
1089 }
1090
1092 fBranchToBitmaskMapping[branchName] = std::make_pair(fBitMasks[vectorIndex].branchName, bitIndex);
1093 }
1094
1095 /// Clear all bits, as if none of the variations passed its filter.
1097 {
1098 for (auto &mask : fBitMasks)
1099 mask.bitset.reset();
1100 }
1101
1102 /// Set a bit signalling that the variation at `index` passed its filter.
1103 void SetMaskBit(unsigned int index)
1104 {
1105 const auto vectorIndex = index / 64;
1106 const auto bitIndex = index % 64;
1107 fBitMasks[vectorIndex].bitset.set(bitIndex, true);
1108 }
1109
1110 /// Test if any of the mask bits are set.
1111 bool MaskEmpty() const
1112 {
1113 return std::none_of(fBitMasks.begin(), fBitMasks.end(), [](Bitmask const &mask) { return mask.bitset.any(); });
1114 }
1115
1116 /// Write the current event and the bitmask to the output dataset.
1117 void Write() const
1118 {
1119 if (!fTree)
1120 throw std::runtime_error("The TTree associated to the Snapshot action doesn't exist, any more.");
1121
1122 for (auto const &mask : fBitMasks) {
1123 *mask.branchBuffer = mask.bitset.to_ullong();
1124 }
1125
1126 fTree->Fill();
1127 }
1128};
1129
1130} // namespace ROOT::Internal::RDF
1131
1133 std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames,
1134 const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector<bool> &&isDefine,
1136 const std::vector<const std::type_info *> &colTypeIDs)
1137 : fOptions(options), fInputLoopManager{inputLoopMgr}, fOutputLoopManager{outputLoopMgr}
1138{
1139 EnsureValidSnapshotTTreeOutput(fOptions, std::string(treename), std::string(filename));
1140
1142 fOutputHandle = std::make_shared<SnapshotOutputWriter>(
1143 TFile::Open(filename.data(), fOptions.fMode.c_str(), /*ftitle=*/"",
1145 if (!fOutputHandle->fFile)
1146 throw std::runtime_error(std::string{"Snapshot: could not create output file "} + std::string{filename});
1147
1148 TDirectory *outputDir = fOutputHandle->fFile.get();
1149 if (!dirname.empty()) {
1150 fOutputHandle->fDirectoryName = dirname;
1152 checkupdate.ToLower();
1153 if (checkupdate == "update")
1154 outputDir =
1155 fOutputHandle->fFile->mkdir(std::string{dirname}.c_str(), "", true); // do not overwrite existing directory
1156 else
1157 outputDir = fOutputHandle->fFile->mkdir(std::string{dirname}.c_str());
1158 }
1159
1160 fOutputHandle->fTree = std::make_unique<TTree>(std::string{treename}.c_str(), std::string{treename}.c_str(),
1161 fOptions.fSplitLevel, /*dir=*/outputDir);
1162 fOutputHandle->fOutputLoopManager = fOutputLoopManager;
1163 if (fOptions.fAutoFlush)
1164 fOutputHandle->fTree->SetAutoFlush(fOptions.fAutoFlush);
1165
1167
1168 fBranchData.reserve(vbnames.size());
1169 for (unsigned int i = 0; i < vbnames.size(); ++i) {
1170 fOutputHandle->RegisterBranch(outputBranchNames[i], 0);
1171 fBranchData.emplace_back(vbnames[i], outputBranchNames[i], isDefine[i], colTypeIDs[i]);
1172 }
1173}
1174
1175/// Register a new column as a variation of the column at `originalColumnIndex`, and clone its properties.
1176/// If a nominal column is registered here, it is written without changes, but it means that it will be masked
1177/// in case its selection cuts don't pass.
1178/// \param slot Task ID for MT runs.
1179/// \param columnIndex Index where the data of this column will be passed into the helper.
1180/// \param originalColumnIndex If the column being registered is a variation of a "nominal" column, this designates the
1181/// original.
1182/// Properties such as name and output type are cloned from the original.
1183/// \param variationName The variation that this column belongs to. If "nominal" is used, this column is considered as
1184/// the original.
1186 unsigned int columnIndex,
1187 unsigned int originalColumnIndex,
1188 unsigned int variationIndex,
1189 std::string const &variationName)
1190{
1192 fBranchData[columnIndex].fVariationIndex = variationIndex; // The base column has variations
1193 fOutputHandle->RegisterBranch(fBranchData[columnIndex].fOutputBranchName, variationIndex);
1194 } else if (columnIndex >= fBranchData.size()) {
1195 // First task, need to create branches
1196 fBranchData.resize(columnIndex + 1);
1197 auto &bd = fBranchData[columnIndex];
1198 bd = fBranchData[originalColumnIndex];
1199 std::string newOutputName = bd.fOutputBranchName + "__" + variationName;
1200 std::replace(newOutputName.begin(), newOutputName.end(), ':', '_');
1201 bd.fOutputBranchName = std::move(newOutputName);
1202 bd.fVariationIndex = variationIndex;
1203
1204 fOutputHandle->RegisterBranch(bd.fOutputBranchName, variationIndex);
1205 } else {
1206 assert(static_cast<unsigned int>(fBranchData[columnIndex].fVariationIndex) == variationIndex);
1207 }
1208}
1209
1210/// Bind all output branches to RDF columns for the given slots.
1212{
1213 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
1214 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
1215 if (auto treeDS = dynamic_cast<ROOT::Internal::RDF::RTTreeDS *>(fInputLoopManager->GetDataSource()))
1216 fInputTree = treeDS->GetTree();
1217
1218 // Create all output branches; and bind them to empty values
1219 for (std::size_t i = 0; i < fBranchData.size(); i++) { // fBranchData can grow due to insertions
1220 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1221 fBranchData[i].EmptyInstance(/*pointerToPointer=*/false));
1222 }
1223
1224 AssertNoNullBranchAddresses(fBranchData);
1225}
1226
1227/// Connect all output fields to the values pointed to by `values`, fill the output dataset,
1228/// call the Fill of the output tree, and clear the mask bits that show whether a variation was reached.
1229void ROOT::Internal::RDF::SnapshotHelperWithVariations::Exec(unsigned int /*slot*/, const std::vector<void *> &values,
1230 std::vector<bool> const &filterPassed)
1231{
1232 // Rebind branch pointers to RDF values
1233 assert(fBranchData.size() == values.size());
1234 for (std::size_t i = 0; i < values.size(); i++) {
1235 const auto variationIndex = fBranchData[i].fVariationIndex;
1236 if (variationIndex < 0) {
1237 // Branch without variations
1238 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1239 } else if (filterPassed[variationIndex]) {
1240 // Branch with variations
1241 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1242 if (!fundamentalType) {
1243 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1244 }
1245 fOutputHandle->SetMaskBit(variationIndex);
1246 }
1247 }
1248
1249 assert(!fOutputHandle->MaskEmpty()); // Exec should not have been called if nothing passes
1250
1251 fOutputHandle->Write();
1252 fOutputHandle->ClearMaskBits();
1253 for (auto &branchData : fBranchData) {
1254 branchData.ClearBranchContents();
1255 }
1256}
1257
1259{
1260 fOutputHandle.reset();
1261}
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
static TBranch * SearchForBranch(TTree *tree, const char *name)
Definition TTreePyz.cxx:50
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
void InitTask(TTreeReader *r, unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:67
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1526
A Branch for the case of an object.
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
Basic data type descriptor (datatype information is obtained from CINT).
Definition TDataType.h:44
Int_t GetType() const
Definition TDataType.h:68
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3764
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Mother of all ROOT objects.
Definition TObject.h:41
Basic string class.
Definition TString.h:138
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:297
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:397
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:342
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:627
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:206
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10132
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
Definition TTree.cxx:10126
std::vector< std::string > ColumnNames_t
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:344
A collection of options to steer the creation of the dataset on disk through Snapshot().
int fAutoFlush
*(TTree only)* AutoFlush value for output tree
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
*(TTree only)* Split level of output tree
int fCompressionLevel
Compression level of output file.