Logo ROOT   6.16/01
Reference Guide
TSimpleAnalysis.cxx
Go to the documentation of this file.
1// @(#)root/treeplayer:$Id$
2// Author: Luca Giommi 22/08/16
3
4/*************************************************************************
5 * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#include "TSimpleAnalysis.h"
13
14#include "TFile.h"
15#include "TChain.h"
16#include "TChainElement.h"
17#include "TH1.h"
18#include "TError.h"
19#include "TKey.h"
20#ifdef R__USE_IMT
22#endif
23#include "TROOT.h"
24
25#include <string>
26#include <fstream>
27#include <vector>
28#include <map>
29#include <iostream>
30
31/** \class TSimpleAnalysis
32
33A TSimpleAnalysis object creates histograms from a TChain. These histograms
34are stored to an output file. The histogrammed (TTreeFormula) expressions,
35their cuts, the input and output files are configured through a simple config
36file that allows comments starting with '#'.
37Here an example of configuration file:
38
39~~~ {.cpp}
40# This is an example of configuration file
41file_output.root #the output file in which histograms are stored
42
43# The next line has the name of the tree of the input data. It is
44# optional if there is exactly one tree in the first input file.
45ntuple #name of the input tree
46
47# The lines of the next block correspond to .root input files that
48# contain the tree
49hsimple1.root #first .root input file
50hsimple2.root #second .root input file
51
52# The next block is composed by lines that allow to configure the
53# histograms. They have the following syntax:
54# NAME = EXPRESSION if CUT
55# which corresponds to chain->Draw("EXPRESSION >> NAME", "CUT")
56# i.e. it will create a histogram called NAME and store it in
57# file_output.root.
58# "if CUT" is optional
59hpx=px if px<-3 #first histogram
60hpxpy=px:py #second histogram
61
62# End of the configuration file
63~~~
64
65It is possible to use the script rootdrawtree that allows to use the class
66just in command line through the bash shell.
67*/
68
69////////////////////////////////////////////////////////////////////////////////
70/// Delete comments, leading and trailing white spaces in a string.
71///
72/// param[in] line - line read from the input file
73
74static void DeleteCommentsAndSpaces(std::string& line)
75{
76 // Delete comments
77 std::size_t comment = line.find('#');
78 line = line.substr(0, comment);
79 // Delete leading spaces
80 std::size_t firstNotSpace = line.find_first_not_of(" \t");
81 if (firstNotSpace != std::string::npos)
82 line = line.substr(firstNotSpace);
83 else {
84 line.clear();
85 return;
86 }
87 // Delete trailing spaces
88 std::size_t lastNotSpace = line.find_last_not_of(" \t");
89 if (lastNotSpace != std::string::npos)
90 line = line.substr(0, lastNotSpace + 1);
91}
92
93////////////////////////////////////////////////////////////////////////////////
94/// Handle the expression lines of the input file in order to pass the
95/// elements to the members of the object.
96///
97/// param[in] line - TTreeFormula expression, either read form the configuration
98/// file or passed as expression to the constructor
99
100std::string TSimpleAnalysis::HandleExpressionConfig(const std::string& line)
101{
102 static const std::string kCutIntr = " if ";
103
104 std::size_t equal = line.find("=");
105 if (equal == std::string::npos)
106 return "Error: missing '='";
107
108 // Set the histName value
109 std::string histName = line.substr(0, equal);
110 DeleteCommentsAndSpaces(histName);
111 if (histName.empty())
112 return "Error: no histName found";
113
114 //Set the histExpression value
115 std::size_t cutPos = line.find(kCutIntr, equal);
116 std::string histExpression;
117 if (cutPos == std::string::npos)
118 histExpression = line.substr(equal + 1);
119 else
120 histExpression = line.substr(equal + 1, cutPos - equal - 1);
121 DeleteCommentsAndSpaces(histExpression);
122 if (histExpression.empty())
123 return "Error: no expression found";
124
125 // Set the histCut value
126 std::string histCut;
127 if (cutPos != std::string::npos) {
128 histCut = line.substr(cutPos + kCutIntr.size());
130 if (histCut.empty())
131 return "Error: missing cut expression after 'if'";
132 }
133 else
134 histCut = "";
135
136 // Set the map that contains the histName, histExpressions and histCut values
137 auto check = fHists.insert(std::make_pair((const std::string&)histName,
138 std::make_pair(histExpression, histCut)));
139
140 // Check if there are histograms with the same name
141 if (!check.second)
142 return "Duplicate histogram name";
143 return "";
144}
145
146////////////////////////////////////////////////////////////////////////////////
147/// Constructor for the case of command line parsing arguments. It sets the members
148/// of the object.
149///
150/// \param[in] output - name of the output file
151/// \param[in] inputFiles - name of the input .root files
152/// \param[in] expressions - what is shown in the histograms
153/// \param[in] treeName - name of the tree
154/// \throws std::runtime_error in case of ill-formed expressions
155
157 const std::vector<std::string>& inputFiles,
158 const std::vector<std::string>& expressions,
159 const std::string& treeName = ""):
160 fInputFiles(inputFiles), fOutputFile(output), fTreeName(treeName)
161{
162 for (const std::string& expr: expressions) {
163 std::string errMessage = HandleExpressionConfig(expr);
164 if (!errMessage.empty())
165 throw std::runtime_error(errMessage + " in " + expr);
166 }
167}
168
169////////////////////////////////////////////////////////////////////////////////
170/// Extract the name of the tree from the first input file when the tree name
171/// isn't in the configuration file. Returns the name of the tree.
172
173static std::string ExtractTreeName(std::string& firstInputFile)
174{
175 std::string treeName = "";
176 std::unique_ptr<TFile> inputFile{TFile::Open(firstInputFile.c_str())};
177
178 // Loop over all the keys inside the first input file
179 for (TObject* keyAsObj : *inputFile->GetListOfKeys()) {
180 TKey* key = static_cast<TKey*>(keyAsObj);
181 TClass* clObj = TClass::GetClass(key->GetClassName());
182 if (!clObj)
183 continue;
184 // If the key is related to and object that inherits from TTree::Class we
185 // set treeName with the name of this key if treeName is empty, otherwise
186 // error occurs
187 if (clObj->InheritsFrom(TTree::Class())) {
188 if (treeName.empty())
189 treeName = key->GetName();
190 else {
191 ::Error("TSimpleAnalysis::Analyze", "Multiple trees inside %s", firstInputFile.c_str());
192 return "";
193 }
194 }
195 }
196 // If treeName is yet empty, error occurs
197 if (treeName.empty()) {
198 ::Error("TSimpleAnalysis::Analyze", "No tree inside %s", firstInputFile.c_str());
199 return "";
200 }
201 return treeName;
202}
203
204////////////////////////////////////////////////////////////////////////////////
205/// Returns true if there are no errors in TChain::LoadTree()
206
207static bool CheckChainLoadResult(TChain* chain)
208{
209 // Possible return values of TChain::LoadTree()
210 static const char* errors[] {
211 "all good", // 0
212 "empty chain", // -1
213 "invalid entry number", // -2
214 "cannot open the file", // -3
215 "missing tree", // -4
216 "internal error" // -5
217 };
218
219 bool ret = true;
220 TObjArray *fileElements = chain->GetListOfFiles();
221 TIter next(fileElements);
222 while (TChainElement* chEl = (TChainElement*)next()) {
223 if (chEl->GetLoadResult() < 0) {
224 ::Error("TSimpleAnalysis::Run", "Load failure in file %s: %s",
225 chEl->GetTitle(), errors[-(chEl->GetLoadResult())]);
226 ret = false;
227 }
228 }
229 return ret;
230}
231
232////////////////////////////////////////////////////////////////////////////////
233/// Disambiguate tree name from first input file and set up fTreeName if it is
234/// empty
235
237{
238 // Disambiguate tree name from first input file:
239 // just try to open it, if that works it's an input file.
240 if (!fTreeName.empty()) {
241 // Silence possible error message from TFile constructor if this is a tree name.
242 int oldLevel = gErrorIgnoreLevel;
244 if (TFile* probe = TFile::Open(fTreeName.c_str())) {
245 if (!probe->IsZombie()) {
246 fInputFiles.insert(fInputFiles.begin(), fTreeName);
247 fTreeName.clear();
248 }
249 delete probe;
250 }
251 gErrorIgnoreLevel = oldLevel;
252 }
253 // If fTreeName is empty we try to find the name of the tree through reading
254 // of the first input file
255 if (fTreeName.empty())
257 if (fTreeName.empty()) // No tree name found
258 return false;
259 return true;
260}
261
262////////////////////////////////////////////////////////////////////////////////
263/// Execute all the TChain::Draw() as configured and stores the output histograms.
264/// Returns true if the analysis succeeds.
265
267{
268 if (!SetTreeName())
269 return false;
270
271 // Create the output file and check if it fails
272 TFile ofile(fOutputFile.c_str(), "RECREATE");
273 if (ofile.IsZombie()) {
274 ::Error("TSimpleAnalysis::Run", "Impossible to create %s", fOutputFile.c_str());
275 return false;
276 }
277
278 // Store the histograms into a vector
279 auto generateHisto = [&](const std::pair<TChain*, TDirectory*>& job) {
280 TChain* chain = job.first;
281 TDirectory* taskDir = job.second;
282 taskDir->cd();
283 std::vector<TH1F *> vPtrHisto(fHists.size());
284 // Index for a correct set up of vPtrHisto
285 int i = 0;
286
287 // Loop over all the histograms
288 for (const auto &histo : fHists) {
289 const std::string& expr = histo.second.first;
290 const std::string& histoName = histo.first;
291 const std::string& cut = histo.second.second;
292
293 chain->Draw((expr + ">>" + histoName).c_str(), cut.c_str(), "goff");
294 TH1F *ptrHisto = (TH1F*)taskDir->Get(histoName.c_str());
295
296 // Check if there are errors inside the chain
297 if (!CheckChainLoadResult(chain))
298 return std::vector<TH1F *>();
299
300 vPtrHisto[i] = ptrHisto;
301 ++i;
302 }
303 return vPtrHisto;
304 };
305
306#if 0
307 // The MT version is currently disabled because reading emulated objects
308 // triggers a lock for every object read. This in turn increases the run
309 // time way beyond the serial case.
310
311
313 ROOT::TThreadExecutor pool(8);
314
315 // Do the chain of the fInputFiles
316 std::vector<std::pair<TChain*, TDirectory*>> vChains;
317 for (size_t i = 0; i < fInputFiles.size(); ++i){
318 const std::string& inputfile = fInputFiles[i];
319 TChain *ch;
320 ch = new TChain(fTreeName.c_str());
321 ch->Add(inputfile.c_str());
322
323 // Create task-specific TDirectory, so avoid parallel tasks to interfere
324 // in gDirectory with histogram registration.
325 TDirectory* taskDir = gROOT->mkdir(TString::Format("TSimpleAnalysis_taskDir_%d", (int)i));
326
327 vChains.emplace_back(std::make_pair(ch, taskDir));
328 }
329
330 auto vFileswHists = pool.Map(generateHisto, vChains);
331
332 // If a file does not exist, one of the vFileswHists
333 // will be a vector of length 0. Detect that.
334 for (auto&& histsOfJob: vFileswHists) {
335 if (histsOfJob.empty())
336 return false;
337 }
338
339 // Merge the results. Initialize the result with the first task's results,
340 // then add the other tasks.
341 std::vector<TH1F *> vPtrHisto{vFileswHists[0]};
342 ofile.cd();
343 for (unsigned j = 0; j < fHists.size(); j++) {
344 for (unsigned i = 1; i < vFileswHists.size(); i++) {
345 if (!vFileswHists[i][j]) {
346 // ignore that sum histogram:
347 delete vPtrHisto[j];
348 vPtrHisto[j] = nullptr;
349 continue;
350 }
351 if (vPtrHisto[j])
352 vPtrHisto[j]->Add(vFileswHists[i][j]);
353 }
354 if (vPtrHisto[j])
355 vPtrHisto[j]->Write();
356 }
357 return true;
358
359#else
360
361 // Do the chain of the fInputFiles
362 TChain* chain = new TChain(fTreeName.c_str());
363 for (const std::string& inputfile: fInputFiles)
364 chain->Add(inputfile.c_str());
365
366 // Generate histograms
367 auto vHisto = generateHisto({chain, gDirectory});
368 if (vHisto.empty())
369 return false;
370 ofile.cd();
371 // Store the histograms
372 for (auto histo: vHisto) {
373 if (histo)
374 histo->Write();
375 }
376 return true;
377
378#endif
379}
380
381////////////////////////////////////////////////////////////////////////////////
382/// Returns false if not a tree name, otherwise sets the name of the tree.
383///
384/// param[in] line - line read from the input file
385
387{
388 if (line.find("=") == std::string::npos) {
389 fInputFiles.push_back(line);
390 return true;
391 }
392 return false; // It's an expression
393}
394
395////////////////////////////////////////////////////////////////////////////////
396/// Skip subsequent empty lines read from fIn and returns the next not empty line.
397///
398/// param[in] numbLine - number of the input file line
399
400std::string TSimpleAnalysis::GetLine(int& numbLine)
401{
402 std::string notEmptyLine;
403
404 do {
405 getline(fIn, notEmptyLine);
406 DeleteCommentsAndSpaces(notEmptyLine);
407 numbLine++;
408 } while (fIn && notEmptyLine.empty());
409
410 return notEmptyLine;
411}
412
413////////////////////////////////////////////////////////////////////////////////
414/// This function has the aim of setting the arguments read from the input file.
415
417{
418 int readingSection = kReadingOutput;
419 std::string line;
420 int numbLine = 0;
421
422 // Error if the input file does not exist
423 fIn.open(fConfigFile);
424 if (!fIn) {
425 ::Error("TSimpleAnalysis::Configure", "File %s not found", fConfigFile.c_str());
426 return false;
427 }
428
429 while (!fIn.eof()) {
430 line = GetLine(numbLine);
431 if (line.empty()) // It can happen if fIn.eof()
432 continue;
433 std::string errMessage;
434
435 switch (readingSection) {
436
437 // Set the name of the output file
438 case kReadingOutput:
440 readingSection++;
441 break;
442
443 // Set the name of the tree
444 case kReadingTreeName:
445 fTreeName = line;
446 readingSection++;
447 break;
448
449 // Set the input files
450 case kReadingInput:
452 // Not an input file name; try to parse as an expression
453 errMessage = HandleExpressionConfig(line);
454 readingSection = kReadingExpressions;
455 }
456 break;
457
458 // Set the expressions
460 errMessage = HandleExpressionConfig(line);
461 break;
462 }
463
464 // Report any errors if occur during the configuration proceedings
465 if (!errMessage.empty()) {
466 ::Error("TSimpleAnalysis::Configure", "%s in %s:%d", errMessage.c_str(),
467 fConfigFile.c_str(), numbLine);
468 return false;
469 }
470 } // while (!fIn.eof())
471 return true;
472}
473
474////////////////////////////////////////////////////////////////////////////////
475/// Function that allows to create the TSimpleAnalysis object and execute its
476/// Configure and Analyze functions.
477///
478/// param[in] configurationFile - name of the input file used to create the TSimpleAnalysis object
479
480bool RunSimpleAnalysis (const char* configurationFile) {
481 TSimpleAnalysis obj(configurationFile);
482 if (!obj.Configure())
483 return false;
484 if (!obj.Run())
485 return false;
486 return true; // Return true only if Configure() and Run() functions were performed correctly
487}
void Class()
Definition: Class.C:29
#define gDirectory
Definition: TDirectory.h:213
const Int_t kFatal
Definition: TError.h:42
void Error(const char *location, const char *msgfmt,...)
R__EXTERN Int_t gErrorIgnoreLevel
Definition: TError.h:105
#define gROOT
Definition: TROOT.h:410
static void DeleteCommentsAndSpaces(std::string &line)
Delete comments, leading and trailing white spaces in a string.
static bool CheckChainLoadResult(TChain *chain)
Returns true if there are no errors in TChain::LoadTree()
bool RunSimpleAnalysis(const char *configurationFile)
Function that allows to create the TSimpleAnalysis object and execute its Configure and Analyze funct...
static std::string ExtractTreeName(std::string &firstInputFile)
Extract the name of the tree from the first input file when the tree name isn't in the configuration ...
This class provides a simple interface to execute the same task multiple times in parallel,...
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
A TChainElement describes a component of a TChain.
Definition: TChainElement.h:28
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
virtual Long64_t Draw(const char *varexp, const TCut &selection, Option_t *option="", Long64_t nentries=kMaxEntries, Long64_t firstentry=0)
Draw expression varexp for selected entries.
Definition: TChain.cxx:778
TObjArray * GetListOfFiles() const
Definition: TChain.h:107
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:222
The ROOT global object gROOT contains a list of all defined classes.
Definition: TClass.h:75
Bool_t InheritsFrom(const char *cl) const
Return kTRUE if this class inherits from a class with name "classname".
Definition: TClass.cxx:4720
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2885
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Describe directory structure in memory.
Definition: TDirectory.h:34
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:805
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3975
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
virtual const char * GetClassName() const
Definition: TKey.h:71
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
An array of TObjects.
Definition: TObjArray.h:37
Mother of all ROOT objects.
Definition: TObject.h:37
R__ALWAYS_INLINE Bool_t IsZombie() const
Definition: TObject.h:134
A TSimpleAnalysis object creates histograms from a TChain.
std::string fConfigFile
Name of the configuration file.
std::vector< std::string > fInputFiles
.root input files
std::string GetLine(int &numbLine)
Skip subsequent empty lines read from fIn and returns the next not empty line.
@ kReadingInput
Reading the name of the .root input files.
@ kReadingOutput
Reading the name of the output file.
@ kReadingTreeName
Reading the name of the tree.
@ kReadingExpressions
Reading the expressions.
std::string HandleExpressionConfig(const std::string &line)
Handle the expression lines of the input file in order to pass the elements to the members of the obj...
std::ifstream fIn
Stream for the input file.
TSimpleAnalysis(const std::string &file)
bool Run()
Execute all the TChain::Draw() as configured and stores the output histograms.
std::string fOutputFile
Output file in which are stored the histograms.
bool HandleInputFileNameConfig(const std::string &line)
Returns false if not a tree name, otherwise sets the name of the tree.
bool SetTreeName()
Disambiguate tree name from first input file and set up fTreeName if it is empty.
bool Configure()
This function has the aim of setting the arguments read from the input file.
std::map< std::string, std::pair< std::string, std::string > > fHists
std::string fTreeName
Name of the input tree.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2286
TLine * line
void EnableThreadSafety()
Enables the global mutex to make ROOT thread safe/aware.
Definition: TROOT.cxx:545