45 static constexpr std::uint64_t gUpdateFrequencyBytes = 100 * 1000 * 1000;
46 std::uint64_t fNbytesNext = gUpdateFrequencyBytes;
49 ~RDefaultProgressCallback()
override {}
50 void Call(std::uint64_t nbytesWritten, std::uint64_t neventsWritten)
final
53 if (nbytesWritten < fNbytesNext)
55 std::cout <<
"Wrote " << nbytesWritten / 1000 / 1000 <<
"MB, " << neventsWritten <<
" entries\n";
56 fNbytesNext += gUpdateFrequencyBytes;
57 if (nbytesWritten > fNbytesNext) {
59 fNbytesNext = nbytesWritten + gUpdateFrequencyBytes;
63 void Finish(std::uint64_t nbytesWritten, std::uint64_t neventsWritten)
final
65 std::cout <<
"Done, wrote " << nbytesWritten / 1000 / 1000 <<
"MB, " << neventsWritten <<
" entries\n";
78std::unique_ptr<ROOT::Experimental::RNTupleImporter>
80 std::string_view destFileName)
82 auto importer = std::unique_ptr<RNTupleImporter>(
new RNTupleImporter());
83 importer->fNTupleName = treeName;
84 importer->fSourceFile = std::unique_ptr<TFile>(
TFile::Open(std::string(sourceFileName).c_str()));
85 if (!importer->fSourceFile || importer->fSourceFile->IsZombie()) {
86 throw RException(
R__FAIL(
"cannot open source file " + std::string(sourceFileName)));
89 importer->fSourceTree = importer->fSourceFile->Get<
TTree>(std::string(treeName).c_str());
90 if (!importer->fSourceTree) {
91 throw RException(
R__FAIL(
"cannot read TTree " + std::string(treeName) +
" from " + std::string(sourceFileName)));
96 auto result = importer->InitDestination(destFileName);
104std::unique_ptr<ROOT::Experimental::RNTupleImporter>
107 auto importer = std::unique_ptr<RNTupleImporter>(
new RNTupleImporter());
114 importer->fNTupleName = sourceTree->
GetName();
117 importer->fSourceTree = sourceTree;
121 auto result = importer->InitDestination(destFileName);
143 std::cout <<
"Importing '" <<
f.fField->GetFieldName() <<
"' [" <<
f.fField->GetTypeName() <<
"]\n";
146 std::cout <<
"Importing (projected) '" <<
f->GetFieldName() <<
"' [" <<
f->GetTypeName() <<
"]\n";
169 const auto firstLeaf =
static_cast<TLeaf *
>(
b->GetListOfLeaves()->First());
172 const bool isLeafList =
b->GetNleaves() > 1;
173 const bool isCountLeaf = firstLeaf->IsRange();
175 if (isLeafList && isClass)
176 return R__FAIL(
"unsupported: classes in leaf list, branch " + std::string(
b->GetName()));
177 if (isLeafList && isCountLeaf)
178 return R__FAIL(
"unsupported: count leaf arrays in leaf list, branch " + std::string(
b->GetName()));
183 Int_t firstLeafCountval;
184 const bool isCString = !isLeafList && (firstLeaf->IsA() ==
TLeafC::Class()) &&
185 (!firstLeaf->GetLeafCounter(firstLeafCountval)) && (firstLeafCountval == 1);
193 c.fMaxLength = firstLeaf->GetMaximum();
194 c.fCountVal = std::make_unique<Int_t>();
196 fSourceTree->SetBranchAddress(
b->GetName(),
static_cast<void *
>(
c.fCountVal.get()));
201 std::size_t branchBufferSize = 0;
203 std::vector<std::unique_ptr<ROOT::RFieldBase>> recordItems;
206 bool isLeafCountArray =
false;
209 return R__FAIL(
"unsupported: TObject branches, branch: " + std::string(
b->GetName()));
214 Int_t countval =
l->GetLenStatic();
215 auto *countleaf =
l->GetLeafCount();
216 const bool isFixedSizeArray = !isCString && (countleaf ==
nullptr) && (countval > 1);
217 isLeafCountArray = (countleaf !=
nullptr);
222 std::string fieldName =
b->GetName();
223 std::string fieldType =
l->GetTypeName();
226 fieldName =
l->GetName();
229 fieldType =
"std::string";
232 fieldType =
b->GetClassName();
234 if (isFixedSizeArray)
235 fieldType =
"std::array<" + fieldType +
"," + std::to_string(countval) +
">";
239 std::replace(fieldName.begin(), fieldName.end(),
'.',
'_');
246 auto field = fieldOrError.Unwrap();
248 branchBufferSize =
l->GetMaximum();
249 f.fValue = std::make_unique<ROOT::RFieldBase::RValue>(field->CreateValue());
250 f.fFieldBuffer =
f.fValue->GetPtr<
void>().get();
257 branchBufferSize =
sizeof(
void *) * countval;
258 }
else if (isLeafCountArray) {
261 branchBufferSize =
l->GetOffset() + field->GetValueSize();
264 f.fField = field.get();
267 recordItems.emplace_back(std::move(field));
268 }
else if (isLeafCountArray) {
269 const std::string countleafName = countleaf->GetName();
272 R__ASSERT(
b->GetListOfLeaves()->GetEntries() == 1);
275 fModel->AddField(std::move(field));
279 if (!recordItems.empty()) {
280 auto recordField = std::make_unique<ROOT::RRecordField>(
b->GetName(), std::move(recordItems));
282 f.fField = recordField.get();
284 fModel->AddField(std::move(recordField));
289 ib.
fBranchBuffer = std::make_unique<unsigned char[]>(branchBufferSize);
293 return R__FAIL(
"unable to load class " + std::string(
b->GetClassName()) +
" for branch " +
294 std::string(
b->GetName()));
296 auto ptrBuf =
reinterpret_cast<void **
>(ib.
fBranchBuffer.get());
303 if (!isLeafCountArray && !
fImportFields.back().fFieldBuffer) {
311 int iLeafCountCollection = 0;
315 auto countLeafName = p.first;
318 c.fFieldName =
"_collection" + std::to_string(iLeafCountCollection);
319 auto recordField = std::make_unique<ROOT::RRecordField>(
"_0", std::move(
c.fLeafFields));
321 c.fRecordField =
static_cast<RRecordField *
>(collectionField->GetMutableSubfields()[0]);
322 fModel->AddField(std::move(collectionField));
325 for (
const auto leaf :
c.fRecordField->GetConstSubfields()) {
326 const auto name = leaf->GetFieldName();
327 auto projectedField =
329 fModel->AddProjectedField(std::move(projectedField), [&
name, &
c](
const std::string &fieldName) {
330 if (fieldName ==
name)
333 return c.fFieldName +
"._0." +
name;
339 std::replace(countLeafName.begin(), countLeafName.end(),
'.',
'_');
344 fModel->AddProjectedField(std::move(projectedField), [&
c](
const std::string &) {
return c.fFieldName; });
346 iLeafCountCollection++;
350 for (
auto &field :
fModel->GetMutableFieldZero()) {
359 fEntry->BindRawPtr(
f.fField->GetFieldName(),
f.fFieldBuffer);
362 fEntry->BindRawPtr<
void>(
c.fFieldName, &
c.fFieldBuffer);
377 if (lastSlash != std::string::npos) {
381 targetDir =
fDestFile->mkdir(dirName.c_str(),
"",
true);
387 if (targetDir->
FindKey(ntupleName.c_str()) !=
nullptr) {
393 std::unique_ptr<ROOT::Internal::RPageSink> sink =
394 std::make_unique<ROOT::Internal::RPageSinkFile>(ntupleName, *targetDir,
fWriteOptions);
395 sink->GetMetrics().Enable();
396 auto ctrZippedBytes = sink->GetMetrics().GetCounter(
"RPageSinkFile.szWritePayload");
399 sink = std::make_unique<ROOT::Internal::RPageSinkBuf>(std::move(sink));
414 for (
decltype(nEntries) i = 0; i < nEntries; ++i) {
418 const auto sizeOfRecord =
c.fRecordField->GetValueSize();
419 c.fFieldBuffer.resize(sizeOfRecord * (*
c.fCountVal));
421 const auto nLeafs =
c.fRecordField->GetConstSubfields().size();
422 for (std::size_t
l = 0;
l < nLeafs; ++
l) {
423 const auto offset =
c.fRecordField->GetOffsets()[
l];
424 const auto sizeOfLeaf =
c.fRecordField->GetConstSubfields()[
l]->GetValueSize();
425 const auto idxImportBranch =
c.fLeafBranchIndexes[
l];
426 for (
Int_t j = 0; j < *
c.fCountVal; ++j) {
427 memcpy(
c.fFieldBuffer.data() + j * sizeOfRecord + offset,
428 fImportBranches[idxImportBranch].fBranchBuffer.get() + (j * sizeOfLeaf), sizeOfLeaf);
439 ntplWriter->Fill(*
fEntry);
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking).
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
int Int_t
Signed integer 4 bytes (int).
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Used to report every ~100 MB (compressed), and at the end about the status of the import.
bool fConvertDotsInBranchNames
Whether or not dot characters in branch names should be converted to underscores.
std::unique_ptr< ROOT::REntry > fEntry
std::unique_ptr< TFile > fDestFile
std::int64_t fMaxEntries
The maximum number of entries to import. When this value is -1 (default), import all entries.
std::map< std::string, RImportLeafCountCollection > fLeafCountCollections
Maps the count leaf to the information about the corresponding untyped collection.
RNTupleImporter()=default
std::unique_ptr< ROOT::RNTupleModel > fModel
std::vector< RImportBranch > fImportBranches
ROOT::RResult< void > InitDestination(std::string_view destFileName)
static std::unique_ptr< RNTupleImporter > Create(std::string_view sourceFileName, std::string_view treeName, std::string_view destFileName)
Opens the input file for reading and the output file for writing (update).
std::unique_ptr< RProgressCallback > fProgressCallback
void Import()
Import works in two steps:
ROOT::RNTupleWriteOptions fWriteOptions
std::string fDestFileName
bool fIsQuiet
No standard output, conversely if set to false, schema information and progress is printed.
std::vector< RImportField > fImportFields
FieldModifier_t fFieldModifier
std::vector< std::unique_ptr< RImportTransformation > > fImportTransformations
The list of transformations to be performed for every entry.
ROOT::RResult< void > PrepareSchema()
Sets up the connection from TTree branches to RNTuple fields, including initialization of the memory ...
Base class for all ROOT issued exceptions.
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
Factory method to resurrect a field from the stored on-disk type information.
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
The field for an untyped record.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Describe directory structure in memory.
virtual TKey * FindKey(const char *) const
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
const char * GetName() const override
Returns name of object.
A TTree represents a columnar dataset.
virtual void SetImplicitMT(bool enabled)
virtual TTree * GetTree() const
virtual Long64_t LoadTree(Long64_t entry)
Set current entry.
TClass * IsA() const override
std::unique_ptr< RNTupleWriter > CreateRNTupleWriter(std::unique_ptr< ROOT::RNTupleModel > model, std::unique_ptr< Internal::RPageSink > sink)
RProjectedFields & GetProjectedFieldsOfModel(RNTupleModel &model)
std::string fBranchName
Top-level branch name from the input TTree.
std::unique_ptr< unsigned char[]> fBranchBuffer
The destination of SetBranchAddress() for fBranchName.
void * fFieldBuffer
Usually points to the corresponding RImportBranch::fBranchBuffer but not always.
When the schema is set up and the import started, it needs to be reset before the next Import() call ...
Leaf count arrays require special treatment.