Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ReadSpeedCLI.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, David Poulton 2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include "ReadSpeedCLI.hxx"
12
13#ifdef R__USE_IMT
14#include <ROOT/TTreeProcessorMT.hxx> // for TTreeProcessorMT::SetTasksPerWorkerHint
15#endif
16
17#include <iostream>
18#include <cstring>
19
20using namespace ReadSpeed;
21
22const auto usageText = "Usage:\n"
23 " rootreadspeed --files fname1 [fname2 ...]\n"
24 " --trees tname1 [tname2 ...]\n"
25 " (--all-branches | --branches bname1 [bname2 ...] | --branches-regex bregex1 "
26 "[bregex2 ...])\n"
27 " [--threads nthreads]\n"
28 " [--tasks-per-worker ntasks]\n"
29 " rootreadspeed (--help|-h)\n"
30 " \n"
31 " Use -h for usage help, --help for detailed information.\n";
32
33const auto argUsageText =
34 "Arguments:\n"
35 " Specifying files and trees:\n"
36 " --files fname1 [fname2...]\n"
37 " The list of root files to read from.\n"
38 "\n"
39 " --trees tname1 [tname2...]\n"
40 " The list of trees to read from the files. If only one tree is provided then it will"
41 " be used for all files. If multiple trees are specified, each tree is read from the"
42 " respective file."
43 "\n"
44 "\n"
45 " Specifying branches:\n"
46 " Branches can be specified using one of the following flags. Currently only one can be used"
47 " at a time.\n"
48 " --all-branches\n"
49 " Reads every branch from the specified files and trees."
50 "\n"
51 " --branches bname1 [bname2...]\n"
52 " Reads the branches with matching names. Will error if any of the branches are not found."
53 "\n"
54 " --branches-regex bregex1 [bregex2 ...]\n"
55 " Reads any branches with a name matching the provided regex. Will error if any provided"
56 " regex does not match at least one branch."
57 "\n"
58 "\n"
59 " Meta arguments:\n"
60 " --threads nthreads\n"
61 " The number of threads to use for file reading. Will automatically cap to the number of"
62 " available threads on the machine."
63 "\n"
64 " --tasks-per-worker ntasks\n"
65 " The number of tasks to generate for each worker thread when using multithreading.";
66
67const auto fullUsageText =
68 "Description:\n"
69 " rootreadspeed is a tool used to help identify bottlenecks in root analysis programs"
70 " by providing an idea of what throughput you can expect when reading ROOT files in"
71 " certain configurations."
72 " \n"
73 " It does this by providing information about the number of bytes read from your files,"
74 " how long this takes, and the different throughputs in MB/s, both in total and per thread."
75 "\n"
76 "\n"
77 "Compressed vs Uncompressed Throughput:\n"
78 " Throughput speeds are provided as compressed and uncompressed - ROOT files are usually"
79 " saved in compressed format, so these will often differ. Compressed bytes is the total"
80 " number of bytes read from TFiles during the readspeed test (possibly including meta-data)."
81 " Uncompressed bytes is the number of bytes processed by reading the branch values in the TTree."
82 " Throughput is calculated as the total number of bytes over the total runtime (including"
83 " decompression time) in the uncompressed and compressed cases."
84 "\n"
85 "\n"
86 "Interpreting results:\n"
87 " \n"
88 " There are three possible scenarios when using rootreadspeed, namely:"
89 " \n"
90 " -The 'Real Time' is significantly lower than your own analysis runtime."
91 " This would imply your actual application code is dominating the runtime of your analysis,"
92 " ie. your analysis logic or framework is taking up the time."
93 " \n"
94 " The best way to decrease the runtime would be to optimize your code, attempt to parallelize"
95 " it onto multiple threads if possible, or use a machine with a more performant CPU."
96 " The best way to decrease the runtime would be to optimize your code (or the framework's),"
97 " parallelize it onto multiple threads if possible (for example with"
98 " RDataFrame and EnableImplicitMT) or switch to a machine with a more performant CPU."
99 " \n"
100 " \n"
101 " -The 'Real Time' is significantly higher than 'CPU Time / number of threads'*."
102 " If the real time is higher than the CPU time per core it implies the reading of data is the"
103 " bottleneck, as the CPU cores are wasting time waiting for data to arrive from your disk/drive"
104 " or network connection in order to decompress it."
105 " \n"
106 " The best way to decrease your runtime would be transferring the data you need onto a faster"
107 " storage medium (ie. a faster disk/drive such as an SSD, or connecting to a faster network"
108 " for remote file access), or to use a compression algorithm with a higher compression ratio,"
109 " possibly at the cost of the decompression rate."
110 " \n"
111 " Changing the number of threads is unlikely to help, and in fact using too many threads may"
112 " degrade performance if they make requests to different regions of your local storage. "
113 " \n"
114 " * If no '--threads' argument was provided this is 1, otherwise it is the minimum of the value"
115 " provided and the number of threads your CPU can run in parallel. It is worth noting that -"
116 " on shared systems or if running other heavy applications - the number of your own threads"
117 " running at any time may be lower than the limit due to demand on the CPU."
118 " \n"
119 " \n"
120 " -The 'Real Time' is similar to 'CPU Time / number of threads'"
121 " -AND 'Compressed Throughput' is lower than expected for your storage medium:"
122 " This would imply that your CPU threads aren't decompressing data as fast as your storage medium"
123 " can provide it, and so decompression is the bottleneck."
124 " \n"
125 " The best way to decrease your runtime would be to utilise a system with a faster CPU, or make use"
126 " use of more threads when running, or use a compression algorithm with a higher decompression rate"
127 " such as LZ4, possibly at the cost of some extra file size."
128 "\n"
129 "\n"
130 "A note on caching:\n"
131 " If your data is stored on a local disk, the system may cache some/all of the file in memory after it is"
132 " first read. If this is realistic of how your analysis will run - then there is no concern. However, if"
133 " you expect to only read files once in a while - and as such the files are unlikely to be in the cache -"
134 " consider clearing the cache before running rootreadspeed."
135 " On Linux this can be done by running 'echo 3 > /proc/sys/vm/drop_caches' as a superuser"
136 " or a specific file can be dropped from the cache with"
137 " `dd of=<FILENAME> oflag=nocache conv=notrunc,fdatasync count=0 > /dev/null 2>&1`."
138 "\n"
139 "\n"
140 " Known overhead of TTreeReader, RDataFrame:\n"
141 " `rootreadspeed` is designed to read all data present in the specified branches, trees and files at the highest "
142 " possible speed. When the application bottleneck is not in the computations performed by analysis logic, higher-level "
143 " interfaces built on top of TTree such as TTreeReader and RDataFrame are known to add a significant runtime overhead "
144 " with respect to the runtimes reported by `rootreadspeed` (up to a factor 2). In realistic analysis applications it has "
145 " been observed that a large part of that overhead is compensated by the ability of TTreeReader and RDataFrame to read "
146 " branch values selectively, based on event cuts, and this overhead will be reduced significantly when using RDataFrame "
147 " in conjunction with RNTuple.";
148
150{
151 std::cout << "Thread pool size:\t\t" << r.fThreadPoolSize << '\n';
152
153 if (r.fMTSetupRealTime > 0.) {
154 std::cout << "Real time to setup MT run:\t" << r.fMTSetupRealTime << " s\n";
155 std::cout << "CPU time to setup MT run:\t" << r.fMTSetupCpuTime << " s\n";
156 }
157
158 std::cout << "Real time:\t\t\t" << r.fRealTime << " s\n";
159 std::cout << "CPU time:\t\t\t" << r.fCpuTime << " s\n";
160
161 std::cout << "Uncompressed data read:\t\t" << r.fUncompressedBytesRead << " bytes\n";
162 std::cout << "Compressed data read:\t\t" << r.fCompressedBytesRead << " bytes\n";
163
164 const unsigned int effectiveThreads = std::max(r.fThreadPoolSize, 1u);
165
166 std::cout << "Uncompressed throughput:\t" << r.fUncompressedBytesRead / r.fRealTime / 1024 / 1024 << " MB/s\n";
167 std::cout << "\t\t\t\t" << r.fUncompressedBytesRead / r.fRealTime / 1024 / 1024 / effectiveThreads
168 << " MB/s/thread for " << effectiveThreads << " threads\n";
169 std::cout << "Compressed throughput:\t\t" << r.fCompressedBytesRead / r.fRealTime / 1024 / 1024 << " MB/s\n";
170 std::cout << "\t\t\t\t" << r.fCompressedBytesRead / r.fRealTime / 1024 / 1024 / effectiveThreads
171 << " MB/s/thread for " << effectiveThreads << " threads\n\n";
172
173 const float cpuEfficiency = (r.fCpuTime / effectiveThreads) / r.fRealTime;
174
175 std::cout << "CPU Efficiency: \t\t" << (cpuEfficiency * 100) << "%\n";
176 std::cout << "Reading data is ";
177 if (cpuEfficiency > 0.80f) {
178 std::cout << "likely CPU bound (decompression).\n";
179 } else if (cpuEfficiency < 0.50f) {
180 std::cout << "likely I/O bound.\n";
181 } else {
182 std::cout << "likely balanced (more threads may help though).\n";
183 }
184 std::cout << "For details run with the --help command.\n";
185}
186
187Args ReadSpeed::ParseArgs(const std::vector<std::string> &args)
188{
189 // Print help message and exit if "--help"
190 const auto argsProvided = args.size() >= 2;
191 const auto helpUsed = argsProvided && (args[1] == "--help" || args[1] == "-h");
192 const auto longHelpUsed = argsProvided && args[1] == "--help";
193
194 if (!argsProvided || helpUsed) {
195 std::cout << usageText;
196 if (helpUsed)
197 std::cout << "\n" << argUsageText;
198 if (longHelpUsed)
199 std::cout << "\n\n" << fullUsageText;
200 std::cout << std::endl;
201
202 return {};
203 }
204
205 Data d;
206 unsigned int nThreads = 0;
207
208 enum class EArgState { kNone, kTrees, kFiles, kBranches, kThreads, kTasksPerWorkerHint } argState = EArgState::kNone;
209 enum class EBranchState { kNone, kRegular, kRegex, kAll } branchState = EBranchState::kNone;
210 const auto branchOptionsErrMsg =
211 "Options --all-branches, --branches, and --branches-regex are mutually exclusive. You can use only one.\n";
212
213 for (size_t i = 1; i < args.size(); ++i) {
214 const auto &arg = args[i];
215
216 if (arg == "--trees") {
217 argState = EArgState::kTrees;
218 } else if (arg == "--files") {
219 argState = EArgState::kFiles;
220 } else if (arg == "--all-branches") {
221 argState = EArgState::kNone;
222 if (branchState != EBranchState::kNone && branchState != EBranchState::kAll) {
223 std::cerr << branchOptionsErrMsg;
224 return {};
225 }
226 branchState = EBranchState::kAll;
227 d.fUseRegex = true;
228 d.fBranchNames = {".*"};
229 } else if (arg == "--branches") {
230 argState = EArgState::kBranches;
231 if (branchState != EBranchState::kNone && branchState != EBranchState::kRegular) {
232 std::cerr << branchOptionsErrMsg;
233 return {};
234 }
235 branchState = EBranchState::kRegular;
236 } else if (arg == "--branches-regex") {
237 argState = EArgState::kBranches;
238 if (branchState != EBranchState::kNone && branchState != EBranchState::kRegex) {
239 std::cerr << branchOptionsErrMsg;
240 return {};
241 }
242 branchState = EBranchState::kRegex;
243 d.fUseRegex = true;
244 } else if (arg == "--threads") {
245 argState = EArgState::kThreads;
246 } else if (arg == "--tasks-per-worker") {
247 argState = EArgState::kTasksPerWorkerHint;
248 } else if (arg[0] == '-') {
249 std::cerr << "Unrecognized option '" << arg << "'\n";
250 return {};
251 } else {
252 switch (argState) {
253 case EArgState::kTrees: d.fTreeNames.emplace_back(arg); break;
254 case EArgState::kFiles: d.fFileNames.emplace_back(arg); break;
255 case EArgState::kBranches: d.fBranchNames.emplace_back(arg); break;
256 case EArgState::kThreads:
257 nThreads = std::stoi(arg);
258 argState = EArgState::kNone;
259 break;
260 case EArgState::kTasksPerWorkerHint:
261#ifdef R__USE_IMT
263 argState = EArgState::kNone;
264#else
265 std::cerr << "ROOT was built without implicit multi-threading (IMT) support. The --tasks-per-worker option "
266 "will be ignored.\n";
267#endif
268 break;
269 default: std::cerr << "Unrecognized option '" << arg << "'\n"; return {};
270 }
271 }
272 }
273
274 return Args{std::move(d), nThreads, branchState == EBranchState::kAll, /*fShouldRun=*/true};
275}
276
277Args ReadSpeed::ParseArgs(int argc, char **argv)
278{
279 std::vector<std::string> args;
280 args.reserve(argc);
281
282 for (int i = 0; i < argc; ++i) {
283 args.emplace_back(argv[i]);
284 }
285
286 return ParseArgs(args);
287}
const Handle_t kNone
Definition GuiTypes.h:88
#define d(i)
Definition RSha256.hxx:102
const auto fullUsageText
const auto usageText
const auto argUsageText
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
static void SetTasksPerWorkerHint(unsigned int m)
Set the hint for the desired number of tasks created per worker.
void PrintThroughput(const Result &r)
Args ParseArgs(const std::vector< std::string > &args)
bool fUseRegex
If the branch names should use regex matching.
Definition ReadSpeed.hxx:30