45static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
46static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
54 if (
branchData.fOutputBranch->GetAddress() ==
nullptr)
65 std::string
msg =
"RDataFrame::Snapshot:";
68 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
74 msg.resize(
msg.size() - 2);
75 msg +=
" are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
77 msg +=
" not part of the set of branches that are being written out.";
78 throw std::runtime_error(
msg);
107 const auto bname =
leaf->GetName();
140 const auto btype =
leaf->GetTypeName();
144 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
145 "leaf is of type '%s'. This column will not be written out.",
203 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
204 "column will not be written out.",
205 bd.fOutputBranchName.c_str());
216 const std::string &fileName)
226 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
233 if (
opts.fOverwriteIfExists) {
234 if (
outTree->InheritsFrom(
"TTree")) {
240 const std::string
msg =
"Snapshot: tree \"" +
treeName +
"\" already present in file \"" + fileName +
241 "\". If you want to delete the original tree and write another, please set "
242 "RSnapshotOptions::fOverwriteIfExists to true.";
243 throw std::invalid_argument(
msg);
251 const std::string &fileName)
261 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
266 if (
opts.fOverwriteIfExists) {
270 const std::string
msg =
"Snapshot: RNTuple \"" +
ntupleName +
"\" already present in file \"" + fileName +
271 "\". If you want to delete the original ntuple and write another, please set "
272 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
273 throw std::invalid_argument(
msg);
284 if (
opts.fOverwriteIfExists) {
291 const std::string
msg =
"Snapshot: object \"" +
ntupleName +
"\" already present in file \"" + fileName +
292 "\". If you want to delete the original object and write a new RNTuple, please set "
293 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
294 throw std::invalid_argument(
msg);
364 throw std::logic_error(
365 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
370 const std::type_info *
typeID)
395 auto &
dynamic = std::get<EmptyDynamicType>(fTypeData);
403 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
409 return &
dynamic.fRawPtrToEmptyInstance;
411 return dynamic.fEmptyInstance.get();
428 fOutputBranch->SetAddress(EmptyInstance(
true));
436 const std::vector<const std::type_info *> &
colTypeIDs)
448 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
461 if (!fTreeName.empty() && !fOutputFile && fOptions.fLazy) {
465 return checkupdate ==
"update" ?
"updated" :
"created";
468 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
469 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
470 "its result in a variable and for example calling the GetValue() method on it.",
480 fInputTree =
treeDS->GetTree();
481 fBranchAddressesNeedReset =
true;
486 if (!fBranchAddressesNeedReset) {
487 UpdateCArraysPtrs(values);
490 fBranchAddressesNeedReset =
false;
502 assert(values.size() == fBranchData.size());
505 if (fBranchData[i].fIsCArray) {
509 if (
auto *
data =
rawRVec->data(); fBranchData[i].fBranchAddressForCArrays !=
data) {
510 fBranchData[i].fOutputBranch->SetAddress(
data);
511 fBranchData[i].fBranchAddressForCArrays =
data;
520 assert(fBranchData.size() == values.size());
521 for (std::size_t i = 0; i < fBranchData.size(); i++) {
522 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
530 for (std::size_t i = 0; i < fBranchData.size(); i++) {
538 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(),
"",
541 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
544 if (!fDirName.empty()) {
548 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
550 outputDir = fOutputFile->mkdir(fDirName.c_str());
553 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
outputDir);
555 if (fOptions.fAutoFlush)
556 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
561 assert(fOutputTree !=
nullptr);
562 assert(fOutputFile !=
nullptr);
566 if (fOutputTree->GetEntries() == 0) {
567 SetEmptyBranches(fInputTree, *fOutputTree);
570 fOutputTree->AutoSave(
"flushbaskets");
573 fOutputFile->Close();
576 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
577 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
594 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
599 for (
const auto &
bd : fBranchData) {
600 if (
bd.fInputBranchName.empty())
624 const std::vector<const std::type_info *> &
colTypeIDs)
626 fOutputFiles(fNSlots),
627 fOutputTrees(fNSlots),
628 fBranchAddressesNeedReset(fNSlots, 1),
629 fInputTrees(fNSlots),
645 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
659 if (!fTreeName.empty() && fOptions.fLazy && !fOutputFiles.empty() &&
660 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](
const auto &
f) { return !f; }) ) {
664 return checkupdate ==
"update" ?
"updated" :
"created";
667 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
668 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
669 "its result in a variable and for example calling the GetValue() method on it.",
677 if (!fOutputFiles[
slot]) {
679 fOutputFiles[
slot] = fMerger->GetFile();
682 if (!fDirName.empty()) {
689 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
692 fOutputTrees[
slot]->SetImplicitMT(
false);
693 if (fOptions.fAutoFlush)
694 fOutputTrees[
slot]->SetAutoFlush(fOptions.fAutoFlush);
697 fInputTrees[
slot] =
r->GetTree();
701 fBranchAddressesNeedReset[
slot] = 1;
706 if (fOutputTrees[
slot]->GetEntries() > 0)
707 fOutputFiles[
slot]->Write();
711 fOutputTrees[
slot].reset(
nullptr);
716 if (fBranchAddressesNeedReset[
slot] == 0) {
717 UpdateCArraysPtrs(
slot, values);
719 SetBranches(
slot, values);
720 fBranchAddressesNeedReset[
slot] = 0;
722 fOutputTrees[
slot]->Fill();
723 auto entries = fOutputTrees[
slot]->GetEntries();
726 fOutputFiles[
slot]->Write();
730 const std::vector<void *> &values)
736 assert(values.size() == fBranchData[
slot].size());
754 const std::vector<void *> &values)
759 for (std::size_t i = 0; i <
branchData.size(); i++) {
770 for (std::size_t i = 0; i <
branchData.size(); i++) {
779 std::unique_ptr<TFile>{
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), fFileName.c_str(),
cs)};
781 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
783 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(
outFile));
789 for (
auto &file : fOutputFiles) {
798 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
799 assert(fOutputFile &&
"Missing output file in Snapshot finalization.");
804 if (!fDirName.empty()) {
805 treeDirectory = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
811 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
817 fOutputFile->Write();
821 fOutputFiles.clear();
825 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
842 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
847 for (
const auto &
bd : fBranchData.front()) {
848 if (
bd.fInputBranchName.empty())
873 const std::vector<const std::type_info *> &
colTypeIDs)
898 if (!fNTupleName.empty() && !fOutputFile && fOptions.fLazy)
899 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
905 auto nFields = fOutputFieldNames.size();
913 fInputFieldNames[i], fOptions.fVector2RVec)
916 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
921 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
923 fOutputFile.reset(
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
925 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
928 if (!fDirName.empty()) {
932 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
934 outputDir = fOutputFile->mkdir(fDirName.c_str());
944 if (!fFillContexts[
slot]) {
945 fFillContexts[
slot] = fWriter->CreateFillContext();
946 fEntries[
slot] = fFillContexts[
slot]->GetModel().CreateBareEntry();
954 assert(values.size() == fFieldTokens.size());
955 for (
decltype(values.size()) i = 0; i < values.size(); i++) {
956 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
965 fFillContexts[
slot]->FlushCluster();
972 fFillContexts.clear();
976 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName +
"/" + fNTupleName, fFileName));
993 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
995 fNSlots,
finalName, fDirName, fNTupleName, fInputFieldNames,
996 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
1032 (std::string{
"R_rdf_branchToBitmaskMapping_"} +
fTree->GetName()).c_str());
1036 fTree->AutoSave(
"flushbaskets");
1039 std::string tree =
fTree->GetName();
1042 std::string file =
fFile->GetName();
1065 throw std::logic_error(
"Branch " + branchName +
1066 " is being registered with different variation index than the expected one: " +
1080 std::string{
"R_rdf_mask_"} +
fTree->GetName() +
'_' + std::to_string(
fBitMasks.size());
1093 mask.bitset.reset();
1114 throw std::runtime_error(
"The TTree associated to the Snapshot action doesn't exist, any more.");
1117 *
mask.branchBuffer =
mask.bitset.to_ullong();
1130 const std::vector<const std::type_info *> &
colTypeIDs)
1140 throw std::runtime_error(std::string{
"Snapshot: could not create output file "} + std::string{
filename});
1163 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
1210 fInputTree =
treeDS->GetTree();
1213 for (std::size_t i = 0; i < fBranchData.size(); i++) {
1214 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1215 fBranchData[i].EmptyInstance(
false));
1227 assert(fBranchData.size() == values.size());
1228 for (std::size_t i = 0; i < values.size(); i++) {
1232 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1235 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1237 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1243 assert(!fOutputHandle->MaskEmpty());
1245 fOutputHandle->Write();
1246 fOutputHandle->ClearMaskBits();
1254 fOutputHandle.reset();
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
static TBranch * SearchForBranch(TTree *tree, const char *name)
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
RSnapshotOptions fOptions
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
std::vector< RBranchData > fBranchData
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
void FinalizeTask(unsigned int slot)
RSnapshotOptions fOptions
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
RSnapshotOptions fOptions
void InitTask(TTreeReader *r, unsigned int slot)
void FinalizeTask(unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
RSnapshotOptions fOptions
std::vector< RBranchData > fBranchData
void InitTask(TTreeReader *, unsigned int)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A Branch for the case of an object.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
TClass instances represent classes, structs and namespaces in the ROOT type system.
Basic data type descriptor (datatype information is obtained from CINT).
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Mother of all ROOT objects.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
char TypeID2ROOTTypeName(const std::type_info &tid)
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
std::vector< std::string > ColumnNames_t
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
std::unique_ptr< uint64_t > branchBuffer
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
std::unique_ptr< TTree > fTree
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
RLoopManager * fOutputLoopManager
std::string fDirectoryName
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
std::unique_ptr< TFile > fFile
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
SnapshotOutputWriter(TFile *file)
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
std::vector< Bitmask > fBitMasks
Tag to let data sources use the native data type when creating a column reader.
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fCompressionLevel
Compression level of output file.