64 const auto bname =
leaf->GetName();
83 const auto btype =
leaf->GetTypeName();
87 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
88 "leaf is of type '%s'. This column will not be written out.",
145 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
146 "column will not be written out.",
159 const std::string &fileName)
169 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
176 if (
opts.fOverwriteIfExists) {
177 if (
outTree->InheritsFrom(
"TTree")) {
183 const std::string
msg =
"Snapshot: tree \"" +
treeName +
"\" already present in file \"" + fileName +
184 "\". If you want to delete the original tree and write another, please set "
185 "RSnapshotOptions::fOverwriteIfExists to true.";
186 throw std::invalid_argument(
msg);
194 const std::string &fileName)
204 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
209 if (
opts.fOverwriteIfExists) {
213 const std::string
msg =
"Snapshot: RNTuple \"" +
ntupleName +
"\" already present in file \"" + fileName +
214 "\". If you want to delete the original ntuple and write another, please set "
215 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
216 throw std::invalid_argument(
msg);
227 if (
opts.fOverwriteIfExists) {
234 const std::string
msg =
"Snapshot: object \"" +
ntupleName +
"\" already present in file \"" + fileName +
235 "\". If you want to delete the original object and write a new RNTuple, please set "
236 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
237 throw std::invalid_argument(
msg);
310 throw std::logic_error(
311 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
325 if (
auto it = std::find(fNames.begin(), fNames.end(),
name); it != fNames.end())
326 return fIsCArray[std::distance(fNames.begin(), it)];
332 if (address ==
nullptr) {
333 throw std::logic_error(
"Trying to insert a null branch address.");
335 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
336 throw std::logic_error(
"Trying to insert a branch address that's already present.");
338 if (std::find(fNames.begin(), fNames.end(),
name) != fNames.end()) {
339 throw std::logic_error(
"Trying to insert a branch name that's already present.");
341 fNames.emplace_back(
name);
342 fBranches.emplace_back(address);
357 [](
TBranch *
b) { return b->GetAddress() == nullptr; });
366 std::string
msg =
"RDataFrame::Snapshot:";
369 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
375 msg.resize(
msg.size() - 2);
376 msg +=
" are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
378 msg +=
" not part of the set of branches that are being written out.";
379 throw std::runtime_error(
msg);
386 const std::vector<const std::type_info *> &
colTypeIDs)
411 if (!fTreeName.empty() && !fOutputFile && fOptions.fLazy) {
415 return checkupdate ==
"update" ?
"updated" :
"created";
418 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
419 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
420 "its result in a variable and for example calling the GetValue() method on it.",
430 fInputTree =
treeDS->GetTree();
431 fBranchAddressesNeedReset =
true;
436 if (!fBranchAddressesNeedReset) {
437 UpdateCArraysPtrs(values);
440 fBranchAddressesNeedReset =
false;
452 assert(values.size() == fBranches.size());
455 if (fBranches[i] && fOutputBranches.IsCArray(fOutputBranchNames[i])) {
461 fBranches[i]->SetAddress(
data);
462 fBranchAddresses[i] =
data;
473 SetBranchesHelper(fInputTree, *fOutputTree, fOutputBranches, fOptions.fBasketSize, fInputBranchNames[i],
474 fOutputBranchNames[i], *fInputColumnTypeIDs[i], values[i], fBranches[i], fBranchAddresses[i],
477 fOutputBranches.AssertNoNullBranchAddresses();
486 auto nBranches = fInputBranchNames.size();
497 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(),
"",
500 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
503 if (!fDirName.empty()) {
507 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
509 outputDir = fOutputFile->mkdir(fDirName.c_str());
512 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
outputDir);
514 if (fOptions.fAutoFlush)
515 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
520 assert(fOutputTree !=
nullptr);
521 assert(fOutputFile !=
nullptr);
525 if (fOutputTree->GetEntries() == 0) {
526 SetEmptyBranches(fInputTree, *fOutputTree);
529 fOutputTree->AutoSave(
"flushbaskets");
532 fOutputFile->Close();
535 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
536 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
553 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
560 std::vector<bool>(fIsDefine),
563 fInputColumnTypeIDs};
570 const std::vector<const std::type_info *> &
colTypeIDs)
572 fOutputFiles(fNSlots),
573 fOutputTrees(fNSlots),
574 fBranchAddressesNeedReset(fNSlots, 1),
575 fInputTrees(fNSlots),
577 fBranchAddresses(fNSlots, std::vector<void *>(
vbnames.
size(), nullptr)),
578 fOutputBranches(fNSlots),
601 if (!fTreeName.empty() && fOptions.fLazy && !fOutputFiles.empty() &&
602 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](
const auto &
f) { return !f; }) ) {
606 return checkupdate ==
"update" ?
"updated" :
"created";
609 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
610 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
611 "its result in a variable and for example calling the GetValue() method on it.",
619 if (!fOutputFiles[
slot]) {
621 fOutputFiles[
slot] = fMerger->GetFile();
624 if (!fDirName.empty()) {
631 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
634 fOutputTrees[
slot]->SetImplicitMT(
false);
635 if (fOptions.fAutoFlush)
636 fOutputTrees[
slot]->SetAutoFlush(fOptions.fAutoFlush);
639 fInputTrees[
slot] =
r->GetTree();
643 fBranchAddressesNeedReset[
slot] = 1;
648 if (fOutputTrees[
slot]->GetEntries() > 0)
649 fOutputFiles[
slot]->Write();
651 fOutputTrees[
slot].reset(
nullptr);
652 fOutputBranches[
slot].Clear();
657 if (fBranchAddressesNeedReset[
slot] == 0) {
658 UpdateCArraysPtrs(
slot, values);
660 SetBranches(
slot, values);
661 fBranchAddressesNeedReset[
slot] = 0;
663 fOutputTrees[
slot]->Fill();
664 auto entries = fOutputTrees[
slot]->GetEntries();
667 fOutputFiles[
slot]->Write();
671 const std::vector<void *> &values)
677 assert(values.size() == fBranches[
slot].size());
680 if (fBranches[
slot][i] && fOutputBranches[
slot].IsCArray(fOutputBranchNames[i])) {
686 fBranches[
slot][i]->SetAddress(
data);
694 const std::vector<void *> &values)
700 fInputBranchNames[i], fOutputBranchNames[i], *fInputColumnTypeIDs[i], values[i],
701 fBranches[
slot][i], fBranchAddresses[
slot][i], fIsDefine[i]);
703 fOutputBranches[
slot].AssertNoNullBranchAddresses();
712 auto nBranches = fInputBranchNames.size();
724 std::unique_ptr<TFile>{
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), fFileName.c_str(),
cs)};
726 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
728 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(
outFile));
734 for (
auto &file : fOutputFiles) {
743 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
744 assert(fOutputFile &&
"Missing output file in Snapshot finalization.");
749 if (!fDirName.empty()) {
750 treeDirectory = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
756 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
762 fOutputFile->Write();
766 fOutputFiles.clear();
770 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
787 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
795 std::vector<bool>(fIsDefine),
798 fInputColumnTypeIDs};
805 const std::vector<const std::type_info *> &
colTypeIDs)
809 fOutputFile(nullptr),
816 fOutputEntry(nullptr),
831 if (!fNTupleName.empty() && !fOutputLoopManager->GetDataSource() && fOptions.fLazy)
832 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
837 assert(values.size() == fOutputFieldNames.size());
838 for (
decltype(values.size()) i = 0; i < values.size(); i++) {
839 fOutputEntry->BindRawPtr(fOutputFieldNames[i], values[i]);
847 auto nFields = fOutputFieldNames.size();
854 fInputFieldNames[i], fOptions.fVector2RVec)
858 fOutputEntry = &model->GetDefaultEntry();
861 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
863 fOutputFile.reset(
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
865 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
868 if (!fDirName.empty()) {
872 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
874 outputDir = fOutputFile->mkdir(fDirName.c_str());
884 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName +
"/" + fNTupleName, fFileName));
901 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
903 fInputFieldNames, fOutputFieldNames, fOptions,
904 fInputLoopManager, fOutputLoopManager, std::vector<bool>(fIsDefine),
905 fInputColumnTypeIDs};
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
static TBranch * SearchForBranch(TTree *tree, const char *name)
The head node of a RDF computation graph.
void Insert(const std::string &name, TBranch *address, bool isCArray=false)
std::vector< TBranch * > fBranches
std::vector< std::string > fNames
void AssertNoNullBranchAddresses()
bool IsCArray(const std::string &name) const
TBranch * Get(const std::string &name) const
RSnapshotOptions fOptions
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
UntypedSnapshotRNTupleHelper(std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, std::vector< bool > &&isDefine, const std::vector< const std::type_info * > &colTypeIDs)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
RSnapshotOptions fOptions
void InitTask(TTreeReader *r, unsigned int slot)
void FinalizeTask(unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
RSnapshotOptions fOptions
void InitTask(TTreeReader *, unsigned int)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > Create()
Common user-tunable settings for storing RNTuples.
static std::unique_ptr< RNTupleWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Creates an RNTupleWriter that writes into an existing TFile or TDirectory, without overwriting its co...
Representation of an RNTuple data set in a ROOT file.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A Branch for the case of an object.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
TClass instances represent classes, structs and namespaces in the ROOT type system.
Basic data type descriptor (datatype information is obtained from CINT).
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Mother of all ROOT objects.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
char TypeID2ROOTTypeName(const std::type_info &tid)
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
std::vector< std::string > ColumnNames_t
Namespace for new ROOT classes and functions.
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Tag to let data sources use the native data type when creating a column reader.
A collection of options to steer the creation of the dataset on file.