Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
SnapshotHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/SnapshotHelpers.hxx
3 \author Enrico Guiraud, CERN
4 \author Danilo Piparo, CERN
5 \date 2016-12
6 \author Vincenzo Eduardo Padulano
7 \author Stephan Hageboeck
8 \date 2025-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
19#ifndef RDF_SNAPSHOTHELPERS
20#define RDF_SNAPSHOTHELPERS
21
23
26#include <ROOT/RDF/Utils.hxx>
27
28#include <array>
29#include <memory>
30#include <variant>
31
32class TBranch;
33class TFile;
34
35namespace ROOT {
36class REntry;
37class RFieldToken;
40class TBufferMerger;
42} // namespace ROOT
43
44namespace ROOT::Internal::RDF {
45
46class R__CLING_PTRCHECK(off) UntypedSnapshotRNTupleHelper final : public RActionImpl<UntypedSnapshotRNTupleHelper> {
47 std::string fFileName;
48 std::string fDirName;
49 std::string fNTupleName;
50
51 std::unique_ptr<TFile> fOutputFile;
52
56 ColumnNames_t fInputFieldNames; // This contains the resolved aliases
58 std::unique_ptr<ROOT::RNTupleParallelWriter> fWriter;
59 std::vector<ROOT::RFieldToken> fFieldTokens;
60
61 unsigned int fNSlots;
62 std::vector<std::shared_ptr<ROOT::RNTupleFillContext>> fFillContexts;
63 std::vector<std::unique_ptr<ROOT::REntry>> fEntries;
64
65 std::vector<const std::type_info *> fInputColumnTypeIDs; // Types for the input columns
66
67public:
68 UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname,
69 std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames,
72 const std::vector<const std::type_info *> &colTypeIDs);
73
79
80 void Initialize();
81
82 void Exec(unsigned int slot, const std::vector<void *> &values);
83
84 void InitTask(TTreeReader *, unsigned int slot);
85
86 void FinalizeTask(unsigned int slot);
87
88 void Finalize();
89
90 std::string GetActionName() { return "Snapshot"; }
91
93 {
94 return [](unsigned int, const RSampleInfo &) mutable {};
95 }
96
97 UntypedSnapshotRNTupleHelper MakeNew(void *newName);
98};
99
100/// Stores properties of each output branch in a Snapshot.
102 /// Stores variations of a fundamental type.
103 /// The bytes hold anything up to double or 64-bit numbers, and are cleared for every event.
104 /// This allows for binding the branches directly to these bytes.
106 static constexpr std::size_t fNBytes = 8;
107 alignas(8) std::array<std::byte, fNBytes> fBytes{std::byte{0}}; // 8 bytes to store any fundamental type
108 unsigned short fSize = 0;
109 FundamentalType(unsigned short size) : fSize(size) { assert(size <= fNBytes); }
110 };
111 /// Stores empty instances of classes, so a dummy object can be written when a systematic variation
112 /// doesn't pass a selection cut.
114 const TClass *fTClass = nullptr;
115 std::shared_ptr<void> fEmptyInstance = nullptr;
116 void *fRawPtrToEmptyInstance = nullptr; // Needed because TTree expects pointer to pointer
117 };
118
119 std::string fInputBranchName; // This contains resolved aliases
120 std::string fOutputBranchName;
121 const std::type_info *fInputTypeID = nullptr;
123 void *fBranchAddressForCArrays = nullptr; // Used to detect if branch addresses need to be updated
124
125 // A negative index indicates no variations, 0 is for nominal, >0 marks columns that are only valid if a specific
126 // filter passed
128 std::variant<FundamentalType, EmptyDynamicType> fTypeData = FundamentalType{0};
129 bool fIsCArray = false;
130 bool fIsDefine = false;
131
132 RBranchData() = default;
133 RBranchData(std::string inputBranchName, std::string outputBranchName, bool isDefine, const std::type_info *typeID);
134
136 {
137 fOutputBranch = nullptr;
138 fBranchAddressForCArrays = nullptr;
139 }
140 void *EmptyInstance(bool pointerToPointer);
141 void ClearBranchContents();
142 /// For fundamental types represented by TDataType, fetch a value from the pointer into the local branch buffer.
143 /// If the branch holds a class type, nothing happens.
144 /// \return true if the branch holds a fundamental type, false if it holds a class type.
145 bool WriteValueIfFundamental(void *valuePtr)
146 {
147 if (auto fundamentalType = std::get_if<FundamentalType>(&fTypeData); fundamentalType) {
148 std::memcpy(fundamentalType->fBytes.data(), valuePtr, fundamentalType->fSize);
149 return true;
150 }
151 return false;
152 }
153};
154
155class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelper final : public RActionImpl<UntypedSnapshotTTreeHelper> {
156 std::string fFileName;
157 std::string fDirName;
158 std::string fTreeName;
160 std::unique_ptr<TFile> fOutputFile;
161 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
163 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
164 std::vector<RBranchData> fBranchData; // Information for all output branches
167
168public:
169 UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
170 const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
171 const RSnapshotOptions &options, std::vector<bool> &&isDefine,
173 const std::vector<const std::type_info *> &colTypeIDs);
174
180
181 void InitTask(TTreeReader *, unsigned int);
182
183 void Exec(unsigned int, const std::vector<void *> &values);
184
185 void UpdateCArraysPtrs(const std::vector<void *> &values);
186
187 void SetBranches(const std::vector<void *> &values);
188
189 void SetEmptyBranches(TTree *inputTree, TTree &outputTree);
190
191 void Initialize();
192
193 void Finalize();
194
195 std::string GetActionName() { return "Snapshot"; }
196
198 {
199 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
200 }
201
202 UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view /*variation*/ = "nominal");
203};
204
205class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelperMT final : public RActionImpl<UntypedSnapshotTTreeHelperMT> {
206
207 // IMT-specific data members
208
209 unsigned int fNSlots;
210 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
211 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
212 std::vector<std::unique_ptr<TTree>> fOutputTrees;
213 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
214 std::vector<TTree *> fInputTrees; // Current input trees, one per slot. Set at initialization time (`InitTask`)
215 std::vector<std::vector<RBranchData>> fBranchData; // Information for all output branches of each slot
216
217 // Attributes of the output TTree
218
219 std::string fFileName;
220 std::string fDirName;
221 std::string fTreeName;
222 TFile *fOutputFile; // Non-owning view on the output file
224
225 // Attributes related to the computation graph
226
229
230public:
231 UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname,
232 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
233 const RSnapshotOptions &options, std::vector<bool> &&isDefine,
235 const std::vector<const std::type_info *> &colTypeIDs);
236
242
243 void InitTask(TTreeReader *r, unsigned int slot);
244
245 void FinalizeTask(unsigned int slot);
246
247 void Exec(unsigned int slot, const std::vector<void *> &values);
248
249 void UpdateCArraysPtrs(unsigned int slot, const std::vector<void *> &values);
250
251 void SetBranches(unsigned int slot, const std::vector<void *> &values);
252
253 void SetEmptyBranches(TTree *inputTree, TTree &outputTree);
254
255 void Initialize();
256
257 void Finalize();
258
259 std::string GetActionName() { return "Snapshot"; }
260
262 {
263 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
264 }
265
266 UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view /*variation*/ = "nominal");
267};
268
269struct SnapshotOutputWriter;
270
271/// TTree snapshot helper with systematic variations.
272class R__CLING_PTRCHECK(off) SnapshotHelperWithVariations
273 : public ROOT::Detail::RDF::RActionImpl<SnapshotHelperWithVariations> {
275 std::shared_ptr<SnapshotOutputWriter> fOutputHandle;
276 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
277 std::vector<RBranchData> fBranchData;
280
282
283public:
284 SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename,
285 const ColumnNames_t & /*vbnames*/, const ColumnNames_t &bnames,
286 const RSnapshotOptions &options, std::vector<bool> && /*isDefine*/,
287 ROOT::Detail::RDF::RLoopManager *outputLoopMgr,
289 const std::vector<const std::type_info *> &colTypeIDs);
290
296
297 void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex,
298 unsigned int varationIndex, std::string const &variationName);
299
300 void InitTask(TTreeReader *, unsigned int slot);
301
302 void Exec(unsigned int /*slot*/, const std::vector<void *> &values, std::vector<bool> const &filterPassed);
303
304 /// Nothing to do. All initialisations run in the constructor or InitTask().
305 void Initialize() {}
306
307 void Finalize();
308
309 std::string GetActionName() { return "SnapshotWithVariations"; }
310};
311
312} // namespace ROOT::Internal::RDF
313
314#endif
ROOT::R::TRInterface & r
Definition Object.C:4
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Base class for action helpers, see RInterface::Book() for more information.
The head node of a RDF computation graph.
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
void Initialize()
Nothing to do. All initialisations run in the constructor or InitTask().
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
ROOT::Detail::RDF::RLoopManager * fInputLoopManager
SnapshotHelperWithVariations(SnapshotHelperWithVariations const &) noexcept=delete
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
SnapshotHelperWithVariations(SnapshotHelperWithVariations &&) noexcept=default
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
ROOT::RDF::SampleCallback_t GetSampleCallback() final
Override this method to register a callback that is executed before the processing a new data sample ...
std::vector< std::shared_ptr< ROOT::RNTupleFillContext > > fFillContexts
UntypedSnapshotRNTupleHelper(const UntypedSnapshotRNTupleHelper &)=delete
std::unique_ptr< ROOT::RNTupleParallelWriter > fWriter
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
UntypedSnapshotRNTupleHelper & operator=(const UntypedSnapshotRNTupleHelper &)=delete
std::vector< std::unique_ptr< ROOT::REntry > > fEntries
std::vector< const std::type_info * > fInputColumnTypeIDs
void Exec(unsigned int slot, const std::vector< void * > &values)
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
UntypedSnapshotRNTupleHelper(UntypedSnapshotRNTupleHelper &&) noexcept
ROOT::Detail::RDF::RLoopManager * fInputLoopManager
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
ROOT::Detail::RDF::RLoopManager * fInputLoopManager
UntypedSnapshotTTreeHelperMT(UntypedSnapshotTTreeHelperMT &&) noexcept
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
std::vector< std::shared_ptr< ROOT::TBufferMergerFile > > fOutputFiles
std::vector< std::vector< RBranchData > > fBranchData
ROOT::RDF::SampleCallback_t GetSampleCallback() final
Override this method to register a callback that is executed before the processing a new data sample ...
void InitTask(TTreeReader *r, unsigned int slot)
UntypedSnapshotTTreeHelperMT & operator=(const UntypedSnapshotTTreeHelperMT &)=delete
void Exec(unsigned int slot, const std::vector< void * > &values)
std::vector< std::unique_ptr< TTree > > fOutputTrees
std::unique_ptr< ROOT::TBufferMerger > fMerger
void SetBranches(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotTTreeHelperMT(const UntypedSnapshotTTreeHelperMT &)=delete
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
ROOT::Detail::RDF::RLoopManager * fInputLoopManager
UntypedSnapshotTTreeHelper(UntypedSnapshotTTreeHelper &&) noexcept
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(const UntypedSnapshotTTreeHelper &)=delete
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
UntypedSnapshotTTreeHelper & operator=(const UntypedSnapshotTTreeHelper &)=delete
ROOT::RDF::SampleCallback_t GetSampleCallback() final
Override this method to register a callback that is executed before the processing a new data sample ...
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
The REntry is a collection of values in an RNTuple corresponding to a complete row in the data set.
Definition REntry.hxx:54
A field token identifies a (sub)field in an entry.
A context for filling entries (data) into clusters of an RNTuple.
A writer to fill an RNTuple from multiple contexts.
A TBufferMergerFile is similar to a TMemFile, but when data is written to it, it is appended to the T...
TBufferMerger is a class to facilitate writing data in parallel from multiple threads,...
A TTree is a list of TBranches.
Definition TBranch.h:93
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
STL class.
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with an RDataFrame computation graph via e....
std::vector< std::string > ColumnNames_t
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
FundamentalType(unsigned short size)
RBranchData()=default
Stores variations of a fundamental type.
bool WriteValueIfFundamental(void *valuePtr)
For fundamental types represented by TDataType, fetch a value from the pointer into the local branch ...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
A collection of options to steer the creation of the dataset on disk through Snapshot().