Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RFile.cxx
Go to the documentation of this file.
1/// \file v7/src/RFile.cxx
2/// \ingroup Base ROOT7
3/// \author Giacomo Parolini <giacomo.parolini@cern.ch>
4/// \date 2025-03-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8#include "ROOT/RFile.hxx"
9
10#include <ROOT/StringUtils.hxx>
11#include <ROOT/RError.hxx>
12
13#include <Byteswap.h>
14#include <TError.h>
15#include <TFile.h>
16#include <TKey.h>
17#include <TROOT.h>
18
19#include <algorithm>
20#include <cstring>
21
27
30
31static void CheckExtension(std::string_view path)
32{
33 if (ROOT::EndsWith(path, ".xml")) {
34 throw ROOT::RException(R__FAIL("ROOT::RFile doesn't support XML files."));
35 }
36
37 if (!ROOT::EndsWith(path, ".root")) {
38 R__LOG_WARNING(RFileLog()) << "ROOT::RFile only supports ROOT files. The preferred file extension is \".root\"";
39 }
40}
41
42namespace {
43enum class ENameCycleError {
44 kNoError,
45 kAnyCycle,
46 kInvalidSyntax,
47 kCycleTooLarge,
48 kNameEmpty,
49 kCOUNT
50};
51
52struct RNameCycleResult {
53 std::string fName;
54 std::optional<std::int16_t> fCycle;
55 ENameCycleError fError;
56};
57} // namespace
58
59static const char *ToString(ENameCycleError err)
60{
61 static const char *const kErrorStr[] = {"", "", "invalid syntax", "cycle is too large", "name is empty"};
62 static_assert(std::size(kErrorStr) == static_cast<std::size_t>(ENameCycleError::kCOUNT));
63 return kErrorStr[static_cast<std::size_t>(err)];
64}
65
66static ENameCycleError DecodeNumericCycle(const char *str, std::optional<std::int16_t> &out)
67{
68 uint32_t res = 0;
69 do {
70 if (!isdigit(*str))
71 return ENameCycleError::kInvalidSyntax;
72 if (res * 10 > std::numeric_limits<std::int16_t>::max())
73 return ENameCycleError::kCycleTooLarge;
74 res *= 10;
75 res += *str - '0';
76 } while (*++str);
77
78 assert(res < std::numeric_limits<std::int16_t>::max());
79 out = static_cast<std::int16_t>(res);
80
81 return ENameCycleError::kNoError;
82}
83
84static RNameCycleResult DecodeNameCycle(std::string_view nameCycleRaw)
85{
86 RNameCycleResult result{};
87
88 if (nameCycleRaw.empty())
89 return result;
90
91 // Scan the string to find the name length and the semicolon
92 std::size_t semicolonIdx = nameCycleRaw.find_first_of(';');
93
94 if (semicolonIdx == 0) {
95 result.fError = ENameCycleError::kNameEmpty;
96 return result;
97 }
98
99 // Verify that we have at most one ';'
100 if (nameCycleRaw.substr(semicolonIdx + 1).find_first_of(';') != std::string_view::npos) {
101 result.fError = ENameCycleError::kInvalidSyntax;
102 return result;
103 }
104
105 result.fName = nameCycleRaw.substr(0, semicolonIdx);
106 if (semicolonIdx < std::string_view::npos) {
107 if (semicolonIdx == nameCycleRaw.length() - 1 && nameCycleRaw[semicolonIdx] == '*')
108 result.fError = ENameCycleError::kAnyCycle;
109 else
110 result.fError = DecodeNumericCycle(nameCycleRaw.substr(semicolonIdx + 1).data(), result.fCycle);
111 }
112
113 return result;
114}
115
116/// This function first validates, then normalizes the given path in place.
117///
118/// Returns an empty string if `path` is a suitable path to store an object into a RFile,
119/// otherwise returns a description of why that is not the case.
120///
121/// A valid object path must:
122/// - not be empty
123/// - not contain the character '.'
124/// - not contain ASCII control characters or whitespace characters (including tab or newline).
125/// - not contain more than RFile::kMaxPathNesting path fragments (i.e. more than RFile::kMaxPathNesting - 1 '/')
126/// - not end with a '/'
127///
128/// In addition, when *writing* an object to RFile, the character ';' is also banned.
129///
130/// Passing an invalid path to Put will cause it to throw an exception, and
131/// passing an invalid path to Get will always return nullptr.
132///
133/// If required, `path` is modified to make its hierarchy-related meaning consistent. This entails:
134/// - combining any consecutive '/' into a single one;
135/// - stripping any leading '/'.
136///
137static std::string ValidateAndNormalizePath(std::string &path)
138{
139 ////// First, validate path.
140
141 if (path.empty())
142 return "path cannot be empty";
143
144 if (path.back() == '/')
145 return "path cannot end with a '/'";
146
147 bool valid = true;
148 for (char ch : path) {
149 // Disallow control characters, tabs, newlines, whitespace and dot.
150 // NOTE: not short-circuiting or early returning to enable loop vectorization.
151 valid &= !(ch < 33 || ch == '.');
152 }
153 if (!valid)
154 return "path cannot contain control characters, whitespaces or dots";
155
156 //// Path is valid so far, normalize it.
157
158 // Strip all leading '/'
159 {
160 auto nToStrip = 0u;
161 const auto len = path.length();
162 while (nToStrip < len && path[nToStrip] == '/')
163 ++nToStrip;
164
165 if (nToStrip > 0)
166 path.erase(0, nToStrip);
167 }
168
169 // Remove duplicate consecutive '/'
170 const auto it = std::unique(path.begin(), path.end(), [](char a, char b) { return (a == '/' && b == '/'); });
171 path.erase(it, path.end());
172
173 //// After the path has been normalized, check the nesting level by counting how many slashes it contains.
174 const auto nesting = std::count(path.begin(), path.end(), '/');
176 return "pathView contains too many levels of nesting";
177
178 return "";
179}
180
181/////////////////////////////////////////////////////////////////////////////////////////////////
182
183RFile::RFile(std::unique_ptr<TFile> file) : fFile(std::move(file)) {}
184
185RFile::~RFile() = default;
186
187std::unique_ptr<RFile> RFile::Open(std::string_view path)
188{
189 CheckExtension(path);
190
191 TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe?
192 auto tfile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "READ_WITHOUT_GLOBALREGISTRATION"));
193 if (!tfile || tfile->IsZombie())
194 throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for reading"));
195
196 auto rfile = std::unique_ptr<RFile>(new RFile(std::move(tfile)));
197 return rfile;
198}
199
200std::unique_ptr<RFile> RFile::Update(std::string_view path)
201{
202 CheckExtension(path);
203
204 TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe?
205 auto tfile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "UPDATE_WITHOUT_GLOBALREGISTRATION"));
206 if (!tfile || tfile->IsZombie())
207 throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for updating"));
208
209 auto rfile = std::unique_ptr<RFile>(new RFile(std::move(tfile)));
210 return rfile;
211}
212
213std::unique_ptr<RFile> RFile::Recreate(std::string_view path)
214{
215 CheckExtension(path);
216
217 TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe?
218 auto tfile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "RECREATE_WITHOUT_GLOBALREGISTRATION"));
219 if (!tfile || tfile->IsZombie())
220 throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for writing"));
221
222 auto rfile = std::unique_ptr<RFile>(new RFile(std::move(tfile)));
223 return rfile;
224}
225
226TKey *RFile::GetTKey(std::string_view path) const
227{
228 // In RFile, differently from TFile, when dealing with a path like "a/b/c", we always consider it to mean
229 // "object 'c' in subdirectory 'b' of directory 'a'". We don't try to get any other of the possible combinations,
230 // including the object called "a/b/c".
231 std::string fullPath = std::string(path);
232 char *dirName = fullPath.data();
233 char *restOfPath = strchr(dirName, '/');
234 TDirectory *dir = fFile.get();
235 while (restOfPath) {
236 // Truncate `dirName` to the position of this '/'.
237 *restOfPath = 0;
238 ++restOfPath;
239 // `restOfPath` should always be a non-empty string unless `path` ends with '/' (which it shouldn't, as we are
240 // supposed to have normalized it before calling this function).
242
243 dir = dir->GetDirectory(dirName);
244 if (!dir)
245 return nullptr;
246
249 }
250 // NOTE: after this loop `dirName` contains the base name of the object.
251
252 // Get the leaf object from the innermost directory.
253 TKey *key = dir->FindKey(dirName);
254 if (key) {
255 // For some reason, FindKey will not return nullptr if we asked for a specific cycle and that cycle
256 // doesn't exist. It will instead return any key whose cycle is *at most* the requested one.
257 // This is very confusing, so in RFile we actually return null if the requested cycle is not there.
258 RNameCycleResult res = DecodeNameCycle(dirName);
259 if (res.fError != ENameCycleError::kAnyCycle) {
260 if (res.fError != ENameCycleError::kNoError) {
261 R__LOG_ERROR(RFileLog()) << "error decoding namecycle '" << dirName << "': " << ToString(res.fError);
262 key = nullptr;
263 } else if (res.fCycle && *res.fCycle != key->GetCycle()) {
264 key = nullptr;
265 }
266 }
267 }
268 return key;
269}
270
271void *RFile::GetUntyped(std::string_view pathSV, const std::type_info &type) const
272{
273 if (!fFile)
274 throw ROOT::RException(R__FAIL("File has been closed"));
275
276 std::string path{pathSV};
277
279 if (!cls)
280 throw ROOT::RException(R__FAIL(std::string("Could not determine type of object ") + path));
281
282 if (auto err = ValidateAndNormalizePath(path); !err.empty())
283 throw RException(R__FAIL("Invalid object path '" + path + "': " + err));
284
285 TKey *key = GetTKey(path);
286 void *obj = key ? key->ReadObjectAny(cls) : nullptr;
287
288 if (obj) {
289 // Disavow any ownership on `obj`
290 if (auto autoAddFunc = cls->GetDirectoryAutoAdd(); autoAddFunc) {
291 autoAddFunc(obj, nullptr);
292 }
293 } else if (key && !GetROOT()->IsBatch()) {
294 R__LOG_WARNING(RFileLog()) << "Tried to get object '" << path << "' of type " << cls->GetName()
295 << " but that path contains an object of type " << key->GetClassName();
296 }
297
298 return obj;
299}
300
301void RFile::PutUntyped(std::string_view pathSV, const std::type_info &type, const void *obj, std::uint32_t flags)
302{
304 if (!cls)
305 throw ROOT::RException(R__FAIL(std::string("Could not determine type of object ") + std::string(pathSV)));
306
307 std::string path{pathSV};
308 if (auto err = ValidateAndNormalizePath(path); !err.empty())
309 throw RException(R__FAIL("Invalid object path '" + path + "': " + err));
310
311 if (path.find_first_of(';') != std::string_view::npos) {
312 throw RException(
313 R__FAIL("Invalid object path '" + path +
314 "': character ';' is used to specify an object cycle, which only makes sense when reading."));
315 }
316
317 if (!fFile)
318 throw ROOT::RException(R__FAIL("File has been closed"));
319
320 if (!fFile->IsWritable())
321 throw ROOT::RException(R__FAIL("File is not writable"));
322
323 // If `path` refers to a subdirectory, make sure we always write in an actual TDirectory,
324 // otherwise we may have a mix of top-level objects called "a/b/c" and actual directory
325 // structures.
326 // Sadly, TFile does nothing to prevent this and will happily write "a/b" even if there
327 // is already a directory "a" containing an object "b". We don't want that ambiguity here, so we take extra steps
328 // to ensure it doesn't happen.
329 const auto tokens = ROOT::Split(path, "/");
330 const auto FullPathUntil = [&tokens](auto idx) {
331 return ROOT::Join("/", std::span<const std::string>{tokens.data(), idx + 1});
332 };
333 TDirectory *dir = fFile.get();
334 for (auto tokIdx = 0u; tokIdx < tokens.size() - 1; ++tokIdx) {
335 // Alas, not only does mkdir not fail if the file already contains an object "a/b" and you try
336 // to create dir "a", but even when it does fail it doesn't tell you why.
337 // We obviously don't want to allow the coexistence of regular object named "a/b" and the directory
338 // named "a", so we manually check if each level of nesting doesn't exist already as a non-directory.
339 const TKey *existing = dir->GetKey(tokens[tokIdx].c_str());
340 if (existing && strcmp(existing->GetClassName(), "TDirectory") != 0 &&
341 strcmp(existing->GetClassName(), "TDirectoryFile") != 0) {
342 throw ROOT::RException(R__FAIL("error adding object '" + path + "': failed to create directory '" +
343 FullPathUntil(tokIdx) + "': name already taken by an object of type '" +
344 existing->GetClassName() + "'"));
345 }
346 dir = dir->mkdir(tokens[tokIdx].c_str(), "", true);
347 if (!dir) {
348 throw ROOT::RException(R__FAIL(std::string("failed to create directory ") + FullPathUntil(tokIdx)));
349 }
350 }
351
352 const bool allowOverwrite = (flags & kPutAllowOverwrite) != 0;
353 const bool backupCycle = (flags & kPutOverwriteKeepCycle) != 0;
354 const Option_t *writeOpts = "";
355 if (!allowOverwrite) {
356 const TKey *existing = dir->GetKey(tokens[tokens.size() - 1].c_str());
357 if (existing) {
358 throw ROOT::RException(R__FAIL(std::string("trying to overwrite object ") + path + " of type " +
359 existing->GetClassName() + " with another object of type " + cls->GetName()));
360 }
361 } else if (!backupCycle) {
362 writeOpts = "WriteDelete";
363 }
364
365 int success = dir->WriteObjectAny(obj, cls, tokens[tokens.size() - 1].c_str(), writeOpts);
366
367 if (!success) {
368 throw ROOT::RException(R__FAIL(std::string("Failed to write ") + path + " to file"));
369 }
370}
371
373{
374 return fFile->Write();
375}
376
378{
379 // NOTE: this also flushes the file internally
380 fFile.reset();
381}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
static const char * ToString(ENameCycleError err)
Definition RFile.cxx:59
static ENameCycleError DecodeNumericCycle(const char *str, std::optional< std::int16_t > &out)
Definition RFile.cxx:66
static void CheckExtension(std::string_view path)
Definition RFile.cxx:31
static RNameCycleResult DecodeNameCycle(std::string_view nameCycleRaw)
Definition RFile.cxx:84
static std::string ValidateAndNormalizePath(std::string &path)
This function first validates, then normalizes the given path in place.
Definition RFile.cxx:137
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define R__LOG_ERROR(...)
Definition RLogger.hxx:357
#define b(i)
Definition RSha256.hxx:100
#define a(i)
Definition RSha256.hxx:99
const char Option_t
Option string (const char)
Definition RtypesCore.h:80
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
An interface to read from, or write to, a ROOT file, as well as performing other common operations.
Definition RFile.hxx:98
void Close()
Flushes the RFile if needed and closes it, disallowing any further reading or writing.
Definition RFile.cxx:377
static constexpr int kMaxPathNesting
Definition RFile.hxx:130
static std::unique_ptr< RFile > Recreate(std::string_view path)
Opens the file for reading/writing, overwriting it if it already exists.
Definition RFile.cxx:213
std::unique_ptr< TFile > fFile
Definition RFile.hxx:104
size_t Flush()
Writes all objects and the file structure to disk.
Definition RFile.cxx:372
static std::unique_ptr< RFile > Update(std::string_view path)
Opens the file for updating, creating a new one if it doesn't exist.
Definition RFile.cxx:200
TKey * GetTKey(std::string_view path) const
Given path, returns the TKey corresponding to the object at that path (assuming the path is fully spl...
Definition RFile.cxx:226
static std::unique_ptr< RFile > Open(std::string_view path)
Opens the file for reading.
Definition RFile.cxx:187
void PutUntyped(std::string_view path, const std::type_info &type, const void *obj, std::uint32_t flags)
Writes obj to file, without taking its ownership.
Definition RFile.cxx:301
RFile(std::unique_ptr< TFile > file)
Definition RFile.cxx:183
void * GetUntyped(std::string_view path, const std::type_info &type) const
Gets object path from the file and returns an owning pointer to it.
Definition RFile.cxx:271
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
A log configuration for a channel, e.g.
Definition RLogger.hxx:98
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2973
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
virtual Int_t WriteObjectAny(const void *, const char *, const char *, Option_t *="", Int_t=0)
Definition TDirectory.h:301
virtual TKey * GetKey(const char *, Short_t=9999) const
Definition TDirectory.h:222
virtual TKey * FindKey(const char *) const
Definition TDirectory.h:198
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3764
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28
virtual const char * GetClassName() const
Definition TKey.h:75
virtual void * ReadObjectAny(const TClass *expectedClass)
To read an object (non deriving from TObject) from the file.
Definition TKey.cxx:1025
Short_t GetCycle() const
Return cycle number associated to this key.
Definition TKey.cxx:579
ROOT::RLogChannel & RFileLog()
Definition RFile.cxx:22
bool EndsWith(std::string_view string, std::string_view suffix)
std::string Join(const std::string &sep, StringCollection_t &&strings)
Concatenate a list of strings with a separator.
std::vector< std::string > Split(std::string_view str, std::string_view delims, bool skipEmpty=false)
Splits a string at each character in delims.
TROOT * GetROOT()
Definition TROOT.cxx:477