66 const auto bname =
leaf->GetName();
85 const auto btype =
leaf->GetTypeName();
89 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
90 "leaf is of type '%s'. This column will not be written out.",
147 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
148 "column will not be written out.",
161 const std::string &fileName)
171 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
178 if (
opts.fOverwriteIfExists) {
179 if (
outTree->InheritsFrom(
"TTree")) {
185 const std::string
msg =
"Snapshot: tree \"" +
treeName +
"\" already present in file \"" + fileName +
186 "\". If you want to delete the original tree and write another, please set "
187 "RSnapshotOptions::fOverwriteIfExists to true.";
188 throw std::invalid_argument(
msg);
196 const std::string &fileName)
206 throw std::invalid_argument(
"Snapshot: cannot open file \"" + fileName +
"\" in update mode");
211 if (
opts.fOverwriteIfExists) {
215 const std::string
msg =
"Snapshot: RNTuple \"" +
ntupleName +
"\" already present in file \"" + fileName +
216 "\". If you want to delete the original ntuple and write another, please set "
217 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
218 throw std::invalid_argument(
msg);
229 if (
opts.fOverwriteIfExists) {
236 const std::string
msg =
"Snapshot: object \"" +
ntupleName +
"\" already present in file \"" + fileName +
237 "\". If you want to delete the original object and write a new RNTuple, please set "
238 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
239 throw std::invalid_argument(
msg);
312 throw std::logic_error(
313 "RDataFrame::Snapshot: something went wrong when creating a TTree branch, please report this as a bug.");
327 if (
auto it = std::find(fNames.begin(), fNames.end(),
name); it != fNames.end())
328 return fIsCArray[std::distance(fNames.begin(), it)];
334 if (address ==
nullptr) {
335 throw std::logic_error(
"Trying to insert a null branch address.");
337 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
338 throw std::logic_error(
"Trying to insert a branch address that's already present.");
340 if (std::find(fNames.begin(), fNames.end(),
name) != fNames.end()) {
341 throw std::logic_error(
"Trying to insert a branch name that's already present.");
343 fNames.emplace_back(
name);
344 fBranches.emplace_back(address);
359 [](
TBranch *
b) { return b->GetAddress() == nullptr; });
368 std::string
msg =
"RDataFrame::Snapshot:";
371 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
377 msg.resize(
msg.size() - 2);
378 msg +=
" are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
380 msg +=
" not part of the set of branches that are being written out.";
381 throw std::runtime_error(
msg);
388 const std::vector<const std::type_info *> &
colTypeIDs)
413 if (!fTreeName.empty() && !fOutputFile && fOptions.fLazy) {
417 return checkupdate ==
"update" ?
"updated" :
"created";
420 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
421 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
422 "its result in a variable and for example calling the GetValue() method on it.",
432 fInputTree =
treeDS->GetTree();
433 fBranchAddressesNeedReset =
true;
438 if (!fBranchAddressesNeedReset) {
439 UpdateCArraysPtrs(values);
442 fBranchAddressesNeedReset =
false;
454 assert(values.size() == fBranches.size());
457 if (fBranches[i] && fOutputBranches.IsCArray(fOutputBranchNames[i])) {
463 fBranches[i]->SetAddress(
data);
464 fBranchAddresses[i] =
data;
475 SetBranchesHelper(fInputTree, *fOutputTree, fOutputBranches, fOptions.fBasketSize, fInputBranchNames[i],
476 fOutputBranchNames[i], *fInputColumnTypeIDs[i], values[i], fBranches[i], fBranchAddresses[i],
479 fOutputBranches.AssertNoNullBranchAddresses();
488 auto nBranches = fInputBranchNames.size();
499 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(),
"",
502 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
505 if (!fDirName.empty()) {
509 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
511 outputDir = fOutputFile->mkdir(fDirName.c_str());
514 fOutputTree = std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
outputDir);
516 if (fOptions.fAutoFlush)
517 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
522 assert(fOutputTree !=
nullptr);
523 assert(fOutputFile !=
nullptr);
527 if (fOutputTree->GetEntries() == 0) {
528 SetEmptyBranches(fInputTree, *fOutputTree);
531 fOutputTree->AutoSave(
"flushbaskets");
534 fOutputFile->Close();
537 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
538 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
555 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
562 std::vector<bool>(fIsDefine),
565 fInputColumnTypeIDs};
572 const std::vector<const std::type_info *> &
colTypeIDs)
574 fOutputFiles(fNSlots),
575 fOutputTrees(fNSlots),
576 fBranchAddressesNeedReset(fNSlots, 1),
577 fInputTrees(fNSlots),
579 fBranchAddresses(fNSlots, std::vector<void *>(
vbnames.
size(), nullptr)),
580 fOutputBranches(fNSlots),
603 if (!fTreeName.empty() && fOptions.fLazy && !fOutputFiles.empty() &&
604 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](
const auto &
f) { return !f; }) ) {
608 return checkupdate ==
"update" ?
"updated" :
"created";
611 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
612 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
613 "its result in a variable and for example calling the GetValue() method on it.",
621 if (!fOutputFiles[
slot]) {
623 fOutputFiles[
slot] = fMerger->GetFile();
626 if (!fDirName.empty()) {
633 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
636 fOutputTrees[
slot]->SetImplicitMT(
false);
637 if (fOptions.fAutoFlush)
638 fOutputTrees[
slot]->SetAutoFlush(fOptions.fAutoFlush);
641 fInputTrees[
slot] =
r->GetTree();
645 fBranchAddressesNeedReset[
slot] = 1;
650 if (fOutputTrees[
slot]->GetEntries() > 0)
651 fOutputFiles[
slot]->Write();
653 fOutputTrees[
slot].reset(
nullptr);
654 fOutputBranches[
slot].Clear();
659 if (fBranchAddressesNeedReset[
slot] == 0) {
660 UpdateCArraysPtrs(
slot, values);
662 SetBranches(
slot, values);
663 fBranchAddressesNeedReset[
slot] = 0;
665 fOutputTrees[
slot]->Fill();
666 auto entries = fOutputTrees[
slot]->GetEntries();
669 fOutputFiles[
slot]->Write();
673 const std::vector<void *> &values)
679 assert(values.size() == fBranches[
slot].size());
682 if (fBranches[
slot][i] && fOutputBranches[
slot].IsCArray(fOutputBranchNames[i])) {
688 fBranches[
slot][i]->SetAddress(
data);
696 const std::vector<void *> &values)
702 fInputBranchNames[i], fOutputBranchNames[i], *fInputColumnTypeIDs[i], values[i],
703 fBranches[
slot][i], fBranchAddresses[
slot][i], fIsDefine[i]);
705 fOutputBranches[
slot].AssertNoNullBranchAddresses();
714 auto nBranches = fInputBranchNames.size();
726 std::unique_ptr<TFile>{
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), fFileName.c_str(),
cs)};
728 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
730 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(
outFile));
736 for (
auto &file : fOutputFiles) {
745 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName +
'/' + fTreeName;
746 assert(fOutputFile &&
"Missing output file in Snapshot finalization.");
751 if (!fDirName.empty()) {
752 treeDirectory = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
758 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel,
treeDirectory);
764 fOutputFile->Write();
768 fOutputFiles.clear();
772 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(
fullTreeName, fFileName));
789 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
797 std::vector<bool>(fIsDefine),
800 fInputColumnTypeIDs};
807 const std::vector<const std::type_info *> &
colTypeIDs)
832 if (!fNTupleName.empty() && !fOutputFile && fOptions.fLazy)
833 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
839 auto nFields = fOutputFieldNames.size();
847 fInputFieldNames[i], fOptions.fVector2RVec)
850 fFieldTokens[i] = model->GetToken(fOutputFieldNames[i]);
855 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
857 fOutputFile.reset(
TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
859 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
862 if (!fDirName.empty()) {
866 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
868 outputDir = fOutputFile->mkdir(fDirName.c_str());
878 if (!fFillContexts[
slot]) {
879 fFillContexts[
slot] = fWriter->CreateFillContext();
880 fEntries[
slot] = fFillContexts[
slot]->GetModel().CreateBareEntry();
888 assert(values.size() == fFieldTokens.size());
889 for (
decltype(values.size()) i = 0; i < values.size(); i++) {
890 outputEntry->BindRawPtr(fFieldTokens[i], values[i]);
899 fFillContexts[
slot]->FlushCluster();
906 fFillContexts.clear();
910 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::RDF::RNTupleDS>(fDirName +
"/" + fNTupleName, fFileName));
927 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
929 fNSlots,
finalName, fDirName, fNTupleName, fInputFieldNames,
930 fOutputFieldNames, fOptions, fInputLoopManager, fOutputLoopManager, fInputColumnTypeIDs};
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
static TBranch * SearchForBranch(TTree *tree, const char *name)
The head node of a RDF computation graph.
static std::unique_ptr< RNTupleParallelWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Append an ntuple to the existing file, which must not be accessed while data is filled into any creat...
void Insert(const std::string &name, TBranch *address, bool isCArray=false)
std::vector< TBranch * > fBranches
std::vector< std::string > fNames
void AssertNoNullBranchAddresses()
bool IsCArray(const std::string &name) const
TBranch * Get(const std::string &name) const
void FinalizeTask(unsigned int slot)
RSnapshotOptions fOptions
UntypedSnapshotRNTupleHelper(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view ntuplename, const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options, ROOT::Detail::RDF::RLoopManager *inputLM, ROOT::Detail::RDF::RLoopManager *outputLM, const std::vector< const std::type_info * > &colTypeIDs)
void Exec(unsigned int slot, const std::vector< void * > &values)
UntypedSnapshotRNTupleHelper MakeNew(void *newName)
Create a new UntypedSnapshotRNTupleHelper with a different output file name.
void InitTask(TTreeReader *, unsigned int slot)
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(unsigned int slot, const std::vector< void * > &values)
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
UntypedSnapshotTTreeHelperMT MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelperMT with a different output file name.
RSnapshotOptions fOptions
void InitTask(TTreeReader *r, unsigned int slot)
void FinalizeTask(unsigned int slot)
void Exec(unsigned int slot, const std::vector< void * > &values)
void SetBranches(unsigned int slot, const std::vector< void * > &values)
RSnapshotOptions fOptions
void InitTask(TTreeReader *, unsigned int)
UntypedSnapshotTTreeHelper MakeNew(void *newName, std::string_view="nominal")
Create a new UntypedSnapshotTTreeHelper with a different output file name.
void SetEmptyBranches(TTree *inputTree, TTree &outputTree)
void SetBranches(const std::vector< void * > &values)
void Exec(unsigned int, const std::vector< void * > &values)
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options, std::vector< bool > &&isDefine, ROOT::Detail::RDF::RLoopManager *loopManager, ROOT::Detail::RDF::RLoopManager *inputLM, const std::vector< const std::type_info * > &colTypeIDs)
void UpdateCArraysPtrs(const std::vector< void * > &values)
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
Common user-tunable settings for storing RNTuples.
Representation of an RNTuple data set in a ROOT file.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A Branch for the case of an object.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
TClass instances represent classes, structs and namespaces in the ROOT type system.
Basic data type descriptor (datatype information is obtained from CINT).
static TDictionary * GetDictionary(const char *name)
Retrieve the type (class, fundamental type, typedef etc) named "name".
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Mother of all ROOT objects.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
char TypeID2ROOTTypeName(const std::type_info &tid)
TBranch * CallBranchImp(TTree &tree, const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
TBranch * CallBranchImpRef(TTree &tree, const char *branchname, TClass *ptrClass, EDataType datatype, void *addobj, Int_t bufsize=32000, Int_t splitlevel=99)
std::vector< std::string > ColumnNames_t
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
Tag to let data sources use the native data type when creating a column reader.
A collection of options to steer the creation of the dataset on file.