45static_assert(std::is_nothrow_move_assignable_v<RBranchData>);
46static_assert(std::is_nothrow_move_constructible_v<RBranchData>);
54 if (
branchData.fOutputBranch->GetAddress() ==
nullptr)
65 std::string
msg =
"RDataFrame::Snapshot:";
68 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
74 msg.resize(
msg.size() - 2);
75 msg +=
" are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
77 msg +=
" not part of the set of branches that are being written out.";
78 throw std::runtime_error(
msg);
107 const auto bname =
leaf->GetName();
140 const auto btype =
leaf->GetTypeName();
144 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
145 "leaf is of type '%s'. This column will not be written out.",
203 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
204 "column will not be written out.",
205 bd.fOutputBranchName.c_str());
216 const std::string &fileName)
226 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
233 if (
opts.fOverwriteIfExists) {
243 const std::string
msg =
"Snapshot: object \"" +
objName +
"\" already present in file \"" + fileName +
244 "\". If you want to delete the original object and write another, please set the "
245 "'fOverwriteIfExists' option to true in RSnapshotOptions.";
246 throw std::invalid_argument(
msg);
316 throw std::logic_error(
317 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
331 }
else if (options.
fOutputFormat == OutputFormat::kRNTuple) {
338 throw std::invalid_argument(
"RDataFrame::Snapshot: unrecognized output format");
344 const std::type_info *
typeID)
369 auto &
dynamic = std::get<EmptyDynamicType>(fTypeData);
377 dynamic.fEmptyInstance = std::shared_ptr<void>{tclass->New(), tclass->GetDestructor()};
383 return &
dynamic.fRawPtrToEmptyInstance;
385 return dynamic.fEmptyInstance.get();
402 fOutputBranch->SetAddress(EmptyInstance(
true));
410 const std::vector<const std::type_info *> &
colTypeIDs)
422 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
435 if (!fTreeName.empty() && !fOutputFile && fOptions.fLazy) {
439 return checkupdate ==
"update" ?
"updated" :
"created";
442 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
443 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
444 "its result in a variable and for example calling the GetValue() method on it.",
454 fInputTree =
treeDS->GetTree();
455 fBranchAddressesNeedReset =
true;
460 if (!fBranchAddressesNeedReset) {
461 UpdateCArraysPtrs(values);
464 fBranchAddressesNeedReset =
false;
476 assert(values.size() == fBranchData.size());
479 if (fBranchData[i].fIsCArray) {
483 if (
auto *
data =
rawRVec->data(); fBranchData[i].fBranchAddressForCArrays !=
data) {
484 fBranchData[i].fOutputBranch->SetAddress(
data);
485 fBranchData[i].fBranchAddressForCArrays =
data;
494 assert(fBranchData.size() == values.size());
495 for (std::size_t i = 0; i < fBranchData.size(); i++) {
496 SetBranchesHelper(fInputTree, *fOutputTree, fBranchData, i, fOptions.fBasketSize, values[i]);
504 for (std::size_t i = 0; i < fBranchData.size(); i++) {
514 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
517 if (!fDirName.empty()) {
521 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
523 outputDir = fOutputFile->mkdir(fDirName.c_str());
526 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
outputDir);
528 if (fOptions.fAutoFlush)
529 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
534 assert(fOutputTree !=
nullptr);
535 assert(fOutputFile !=
nullptr);
539 if (fOutputTree->GetEntries() == 0) {
540 SetEmptyBranches(fInputTree, *fOutputTree);
543 fOutputTree->AutoSave(
"flushbaskets");
546 fOutputFile->Close();
549 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
550 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
567 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
572 for (
const auto &
bd : fBranchData) {
573 if (
bd.fInputBranchName.empty())
597 const std::vector<const std::type_info *> &
colTypeIDs)
599 fOutputFiles(fNSlots),
600 fOutputTrees(fNSlots),
601 fBranchAddressesNeedReset(fNSlots, 1),
602 fInputTrees(fNSlots),
618 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
632 if (!fTreeName.empty() && fOptions.fLazy && !fOutputFiles.empty() &&
633 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](
const auto &
f) { return !f; }) ) {
637 return checkupdate ==
"update" ?
"updated" :
"created";
640 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
641 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
642 "its result in a variable and for example calling the GetValue() method on it.",
650 if (!fOutputFiles[
slot]) {
652 fOutputFiles[
slot] = fMerger->GetFile();
655 if (!fDirName.empty()) {
662 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
665 fOutputTrees[
slot]->SetImplicitMT(
false);
666 if (fOptions.fAutoFlush)
667 fOutputTrees[
slot]->SetAutoFlush(fOptions.fAutoFlush);
670 fInputTrees[
slot] =
r->GetTree();
674 fBranchAddressesNeedReset[
slot] = 1;
679 if (fOutputTrees[
slot]->GetEntries() > 0)
680 fOutputFiles[
slot]->Write();
684 fOutputTrees[
slot].reset(
nullptr);
689 if (fBranchAddressesNeedReset[
slot] == 0) {
690 UpdateCArraysPtrs(
slot, values);
692 SetBranches(
slot, values);
693 fBranchAddressesNeedReset[
slot] = 0;
695 fOutputTrees[
slot]->Fill();
696 auto entries = fOutputTrees[
slot]->GetEntries();
699 fOutputFiles[
slot]->Write();
703 const std::vector<void *> &values)
709 assert(values.size() == fBranchData[
slot].size());
727 const std::vector<void *> &values)
732 for (std::size_t i = 0; i <
branchData.size(); i++) {
743 for (std::size_t i = 0; i <
branchData.size(); i++) {
751 std::unique_ptr<TFile>{
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), fFileName.c_str(),
754 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
756 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(
outFile));
762 for (
auto &file : fOutputFiles) {
771 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
772 assert(fOutputFile &&
"Missing output file in Snapshot finalization.");
778 if (!fDirName.empty()) {
779 treeDirectory = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
785 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
791 fOutputFile->Write();
795 fOutputFiles.clear();
799 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
816 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
821 for (
const auto &
bd : fBranchData.front()) {
822 if (
bd.fInputBranchName.empty())
847 const std::vector<const std::type_info *> &
colTypeIDs)
872 if (!fNTupleName.empty() && !fOutputFile && fOptions.fLazy)
873 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
879 auto nFields = fOutputFieldNames.size();
887 fInputFieldNames[i], fOptions.fVector2RVec)
891 if (typeName.substr(0, 25) ==
"ROOT::RNTupleCardinality<") {
893 std::string
cardinalityType = typeName.substr(25, typeName.size() - 26);
895 "Column \"%s\" is a read-only \"%s\" column. It will be snapshot as its inner type \"%s\" instead.",
896 fInputFieldNames[i].c_str(), typeName.c_str(),
cardinalityType.c_str());
901 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
907 writeOptions.SetInitialUnzippedPageSize(fOptions.fInitialUnzippedPageSize);
908 writeOptions.SetMaxUnzippedPageSize(fOptions.fMaxUnzippedPageSize);
909 writeOptions.SetApproxZippedClusterSize(fOptions.fApproxZippedClusterSize);
910 writeOptions.SetMaxUnzippedClusterSize(fOptions.fMaxUnzippedClusterSize);
911 writeOptions.SetEnablePageChecksums(fOptions.fEnablePageChecksums);
912 writeOptions.SetEnableSamePageMerging(fOptions.fEnableSamePageMerging);
914 fOutputFile.reset(
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
916 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
919 if (!fDirName.empty()) {
923 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
925 outputDir = fOutputFile->mkdir(fDirName.c_str());
935 if (!fFillContexts[
slot]) {
936 fFillContexts[
slot] = fWriter->CreateFillContext();
937 fEntries[
slot] = fFillContexts[
slot]->GetModel().CreateBareEntry();
945 assert(values.size() == fFieldTokens.size());
946 for (
decltype(values.size()) i = 0; i < values.size(); i++) {
947 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
956 fFillContexts[
slot]->FlushCluster();
963 fFillContexts.clear();
967 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName +
"/" + fNTupleName, fFileName));
984 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
986 fNSlots,
finalName, fDirName, fNTupleName, fInputFieldNames,
987 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
1023 (std::string{
"R_rdf_column_to_bitmask_mapping_"} +
fTree->GetName()).c_str());
1027 fTree->AutoSave(
"flushbaskets");
1030 std::string tree =
fTree->GetName();
1033 std::string file =
fFile->GetName();
1056 throw std::logic_error(
"Branch " + branchName +
1057 " is being registered with different variation index than the expected one: " +
1071 std::string{
"R_rdf_mask_"} +
fTree->GetName() +
'_' + std::to_string(
fBitMasks.size());
1084 mask.bitset.reset();
1105 throw std::runtime_error(
"The TTree associated to the Snapshot action doesn't exist, any more.");
1108 *
mask.branchBuffer =
mask.bitset.to_ullong();
1121 const std::vector<const std::type_info *> &
colTypeIDs)
1130 throw std::runtime_error(std::string{
"Snapshot: could not create output file "} + std::string{
filename});
1153 for (
unsigned int i = 0; i <
vbnames.size(); ++i) {
1200 fInputTree =
treeDS->GetTree();
1203 for (std::size_t i = 0; i < fBranchData.size(); i++) {
1204 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize,
1205 fBranchData[i].EmptyInstance(
false));
1217 assert(fBranchData.size() == values.size());
1218 for (std::size_t i = 0; i < values.size(); i++) {
1222 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1225 const bool fundamentalType = fBranchData[i].WriteValueIfFundamental(values[i]);
1227 SetBranchesHelper(fInputTree, *fOutputHandle->fTree, fBranchData, i, fOptions.fBasketSize, values[i]);
1233 assert(!fOutputHandle->MaskEmpty());
1235 fOutputHandle->Write();
1236 fOutputHandle->ClearMaskBits();
1244 fOutputHandle.reset();
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
static TBranch * SearchForBranch(TTree *tree, const char *name)
The head node of a RDF computation graph.
void SetDataSource(std::unique_ptr< ROOT::RDF::RDataSource > dataSource)
std::shared_ptr< SnapshotOutputWriter > fOutputHandle
RSnapshotOptions fOptions
SnapshotHelperWithVariations(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&, ROOT::Detail::RDF::RLoopManager *outputLoopMgr, ROOT::Detail::RDF::RLoopManager *inputLoopMgr, const std::vector< const std::type_info * > &colTypeIDs)
void InitTask(TTreeReader *, unsigned int slot)
Bind all output branches to RDF columns for the given slots.
std::vector< RBranchData > fBranchData
ROOT::Detail::RDF::RLoopManager * fOutputLoopManager
void Exec(unsigned int, const std::vector< void * > &values, std::vector< bool > const &filterPassed)
Connect all output fields to the values pointed to by values, fill the output dataset,...
void RegisterVariedColumn(unsigned int slot, unsigned int columnIndex, unsigned int originalColumnIndex, unsigned int varationIndex, std::string const &variationName)
Register a new column as a variation of the column at originalColumnIndex, and clone its properties.
void FinalizeTask(unsigned int slot)
RSnapshotOptions fOptions
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
std::vector< std::vector< RBranchData > > fBranchData
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
RSnapshotOptions fOptions
void InitTask(TTreeReader *r, unsigned int slot)
void FinalizeTask(unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
RSnapshotOptions fOptions
std::vector< RBranchData > fBranchData
void InitTask(TTreeReader *, unsigned int)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an RNTuple to the existing file.
Common user-tunable settings for storing RNTuples.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A Branch for the case of an object.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
TClass instances represent classes, structs and namespaces in the ROOT type system.
Basic data type descriptor (datatype information is obtained from CINT).
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
char TypeID2ROOTTypeName(const std::type_info &tid)
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
std::vector< std::string > ColumnNames_t
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Stores empty instances of classes, so a dummy object can be written when a systematic variation doesn...
Stores variations of a fundamental type.
Stores properties of each output branch in a Snapshot.
void * EmptyInstance(bool pointerToPointer)
Return a pointer to an empty instance of the type represented by this branch.
void ClearBranchContents()
Point the branch address to an empty instance of the type represented by this branch or write null by...
std::variant< FundamentalType, EmptyDynamicType > fTypeData
const std::type_info * fInputTypeID
std::unique_ptr< uint64_t > branchBuffer
An object to store an output file and a tree in one common place to share them between instances of S...
void Write() const
Write the current event and the bitmask to the output dataset.
std::unique_ptr< TTree > fTree
void ClearMaskBits()
Clear all bits, as if none of the variations passed its filter.
SnapshotOutputWriter(SnapshotOutputWriter const &)=delete
RLoopManager * fOutputLoopManager
std::string fDirectoryName
std::unordered_map< std::string, std::pair< std::string, unsigned int > > fBranchToBitmaskMapping
std::unique_ptr< TFile > fFile
void RegisterBranch(std::string const &branchName, unsigned int variationIndex)
Register a branch and corresponding systematic uncertainty.
SnapshotOutputWriter(TFile *file)
void SetMaskBit(unsigned int index)
Set a bit signalling that the variation at index passed its filter.
bool MaskEmpty() const
Test if any of the mask bits are set.
SnapshotOutputWriter & operator=(SnapshotOutputWriter const &)=delete
std::unordered_map< std::string, unsigned int > fBranchToVariationMapping
SnapshotOutputWriter(SnapshotOutputWriter &&) noexcept=delete
std::vector< Bitmask > fBitMasks
Tag to let data sources use the native data type when creating a column reader.
EValues
Note: this is only temporarily a struct and will become a enum class hence the name convention used.
A collection of options to steer the creation of the dataset on disk through Snapshot().
int fAutoFlush
*(TTree only)* AutoFlush value for output tree
ESnapshotOutputFormat fOutputFormat
Which data format to write to.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
*(TTree only)* Split level of output tree
int fCompressionLevel
Compression level of output file.