Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TSimpleAnalysis.cxx
Go to the documentation of this file.
1// @(#)root/treeplayer:$Id$
2// Author: Luca Giommi 22/08/16
3
4/*************************************************************************
5 * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#include "TSimpleAnalysis.h"
13
14#include "TFile.h"
15#include "TChain.h"
16#include "TChainElement.h"
17#include "TH1.h"
18#include "TError.h"
19#include "TKey.h"
20#ifdef R__USE_IMT
22#endif
23#include "TROOT.h"
24
25#include <string>
26#include <vector>
27#include <map>
28
29/** \class TSimpleAnalysis
30
31A TSimpleAnalysis object creates histograms from a TChain. These histograms
32are stored to an output file. The histogrammed (TTreeFormula) expressions,
33their cuts, the input and output files are configured through a simple config
34file that allows comments starting with '#'.
35Here an example of configuration file:
36
37~~~ {.cpp}
38# This is an example of configuration file
39file_output.root #the output file in which histograms are stored
40
41# The next line has the name of the tree of the input data. It is
42# optional if there is exactly one tree in the first input file.
43ntuple #name of the input tree
44
45# The lines of the next block correspond to .root input files that
46# contain the tree
47hsimple1.root #first .root input file
48hsimple2.root #second .root input file
49
50# The next block is composed by lines that allow to configure the
51# histograms. They have the following syntax:
52# NAME = EXPRESSION if CUT
53# which corresponds to chain->Draw("EXPRESSION >> NAME", "CUT")
54# i.e. it will create a histogram called NAME and store it in
55# file_output.root.
56# "if CUT" is optional
57hpx=px if px<-3 #first histogram
58hpxpy=px:py #second histogram
59
60# End of the configuration file
61~~~
62
63It is possible to use the script rootdrawtree that allows to use the class
64just in command line through the bash shell.
65*/
66
67////////////////////////////////////////////////////////////////////////////////
68/// Delete comments, leading and trailing white spaces in a string.
69///
70/// param[in] line - line read from the input file
71
72static void DeleteCommentsAndSpaces(std::string& line)
73{
74 // Delete comments
75 std::size_t comment = line.find('#');
76 line = line.substr(0, comment);
77 // Delete leading spaces
78 std::size_t firstNotSpace = line.find_first_not_of(" \t");
79 if (firstNotSpace != std::string::npos)
80 line = line.substr(firstNotSpace);
81 else {
82 line.clear();
83 return;
84 }
85 // Delete trailing spaces
86 std::size_t lastNotSpace = line.find_last_not_of(" \t");
87 if (lastNotSpace != std::string::npos)
88 line = line.substr(0, lastNotSpace + 1);
89}
90
91////////////////////////////////////////////////////////////////////////////////
92/// Handle the expression lines of the input file in order to pass the
93/// elements to the members of the object.
94///
95/// param[in] line - TTreeFormula expression, either read form the configuration
96/// file or passed as expression to the constructor
97
98std::string TSimpleAnalysis::HandleExpressionConfig(const std::string& line)
99{
100 static const std::string kCutIntr = " if ";
101
102 std::size_t equal = line.find("=");
103 if (equal == std::string::npos)
104 return "Error: missing '='";
105
106 // Set the histName value
107 std::string histName = line.substr(0, equal);
108 DeleteCommentsAndSpaces(histName);
109 if (histName.empty())
110 return "Error: no histName found";
111
112 //Set the histExpression value
113 std::size_t cutPos = line.find(kCutIntr, equal);
114 std::string histExpression;
115 if (cutPos == std::string::npos)
116 histExpression = line.substr(equal + 1);
117 else
118 histExpression = line.substr(equal + 1, cutPos - equal - 1);
119 DeleteCommentsAndSpaces(histExpression);
120 if (histExpression.empty())
121 return "Error: no expression found";
122
123 // Set the histCut value
124 std::string histCut;
125 if (cutPos != std::string::npos) {
126 histCut = line.substr(cutPos + kCutIntr.size());
128 if (histCut.empty())
129 return "Error: missing cut expression after 'if'";
130 }
131 else
132 histCut = "";
133
134 // Set the map that contains the histName, histExpressions and histCut values
135 auto check = fHists.insert(std::make_pair((const std::string&)histName,
136 std::make_pair(histExpression, histCut)));
137
138 // Check if there are histograms with the same name
139 if (!check.second)
140 return "Duplicate histogram name";
141 return "";
142}
143
144////////////////////////////////////////////////////////////////////////////////
145/// Constructor for the case of command line parsing arguments. It sets the members
146/// of the object.
147///
148/// \param[in] output - name of the output file
149/// \param[in] inputFiles - name of the input .root files
150/// \param[in] expressions - what is shown in the histograms
151/// \param[in] treeName - name of the tree
152/// \throws std::runtime_error in case of ill-formed expressions
153
155 const std::vector<std::string>& inputFiles,
156 const std::vector<std::string>& expressions,
157 const std::string& treeName = ""):
158 fInputFiles(inputFiles), fOutputFile(output), fTreeName(treeName)
159{
160 for (const std::string& expr: expressions) {
161 std::string errMessage = HandleExpressionConfig(expr);
162 if (!errMessage.empty())
163 throw std::runtime_error(errMessage + " in " + expr);
164 }
165}
166
167////////////////////////////////////////////////////////////////////////////////
168/// Extract the name of the tree from the first input file when the tree name
169/// isn't in the configuration file. Returns the name of the tree.
170
171static std::string ExtractTreeName(std::string& firstInputFile)
172{
173 std::string treeName = "";
174 std::unique_ptr<TFile> inputFile{TFile::Open(firstInputFile.c_str())};
175
176 // Loop over all the keys inside the first input file
177 for (TObject* keyAsObj : *inputFile->GetListOfKeys()) {
178 TKey* key = static_cast<TKey*>(keyAsObj);
179 TClass* clObj = TClass::GetClass(key->GetClassName());
180 if (!clObj)
181 continue;
182 // If the key is related to and object that inherits from TTree::Class we
183 // set treeName with the name of this key if treeName is empty, otherwise
184 // error occurs
185 if (clObj->InheritsFrom(TTree::Class())) {
186 if (treeName.empty())
187 treeName = key->GetName();
188 else {
189 ::Error("TSimpleAnalysis::Analyze", "Multiple trees inside %s", firstInputFile.c_str());
190 return "";
191 }
192 }
193 }
194 // If treeName is yet empty, error occurs
195 if (treeName.empty()) {
196 ::Error("TSimpleAnalysis::Analyze", "No tree inside %s", firstInputFile.c_str());
197 return "";
198 }
199 return treeName;
200}
201
202////////////////////////////////////////////////////////////////////////////////
203/// Returns true if there are no errors in TChain::LoadTree()
204
205static bool CheckChainLoadResult(TChain* chain)
206{
207 // Possible return values of TChain::LoadTree()
208 static const char* errors[] {
209 "all good", // 0
210 "empty chain", // -1
211 "invalid entry number", // -2
212 "cannot open the file", // -3
213 "missing tree", // -4
214 "internal error" // -5
215 };
216
217 bool ret = true;
218 TObjArray *fileElements = chain->GetListOfFiles();
219 TIter next(fileElements);
220 while (TChainElement* chEl = (TChainElement*)next()) {
221 if (chEl->GetLoadResult() < 0) {
222 ::Error("TSimpleAnalysis::Run", "Load failure in file %s: %s",
223 chEl->GetTitle(), errors[-(chEl->GetLoadResult())]);
224 ret = false;
225 }
226 }
227 return ret;
228}
229
230////////////////////////////////////////////////////////////////////////////////
231/// Disambiguate tree name from first input file and set up fTreeName if it is
232/// empty
233
235{
236 // Disambiguate tree name from first input file:
237 // just try to open it, if that works it's an input file.
238 if (!fTreeName.empty()) {
239 // Silence possible error message from TFile constructor if this is a tree name.
240 int oldLevel = gErrorIgnoreLevel;
242 if (TFile* probe = TFile::Open(fTreeName.c_str())) {
243 if (!probe->IsZombie()) {
244 fInputFiles.insert(fInputFiles.begin(), fTreeName);
245 fTreeName.clear();
246 }
247 delete probe;
248 }
249 gErrorIgnoreLevel = oldLevel;
250 }
251 // If fTreeName is empty we try to find the name of the tree through reading
252 // of the first input file
253 if (fTreeName.empty() && !fInputFiles.empty())
255 if (fTreeName.empty()) // No tree name found
256 return false;
257 return true;
258}
259
260////////////////////////////////////////////////////////////////////////////////
261/// Execute all the TChain::Draw() as configured and stores the output histograms.
262/// Returns true if the analysis succeeds.
263
265{
266 if (!SetTreeName())
267 return false;
268
269 // Create the output file and check if it fails
270 TFile ofile(fOutputFile.c_str(), "RECREATE");
271 if (ofile.IsZombie()) {
272 ::Error("TSimpleAnalysis::Run", "Impossible to create %s", fOutputFile.c_str());
273 return false;
274 }
275
276 // Store the histograms into a vector
277 auto generateHisto = [&](const std::pair<TChain*, TDirectory*>& job) {
278 TChain* chain = job.first;
279 TDirectory* taskDir = job.second;
280 taskDir->cd();
281 std::vector<TH1F *> vPtrHisto(fHists.size());
282 // Index for a correct set up of vPtrHisto
283 int i = 0;
284
285 // Loop over all the histograms
286 for (const auto &histo : fHists) {
287 const std::string& expr = histo.second.first;
288 const std::string& histoName = histo.first;
289 const std::string& cut = histo.second.second;
290
291 chain->Draw((expr + ">>" + histoName).c_str(), cut.c_str(), "goff");
292 TH1F *ptrHisto = (TH1F*)taskDir->Get(histoName.c_str());
293
294 // Check if there are errors inside the chain
295 if (!CheckChainLoadResult(chain))
296 return std::vector<TH1F *>();
297
298 vPtrHisto[i] = ptrHisto;
299 ++i;
300 }
301 return vPtrHisto;
302 };
303
304#if 0
305 // The MT version is currently disabled because reading emulated objects
306 // triggers a lock for every object read. This in turn increases the run
307 // time way beyond the serial case.
308
309
311 ROOT::TThreadExecutor pool(8);
312
313 // Do the chain of the fInputFiles
314 std::vector<std::pair<TChain*, TDirectory*>> vChains;
315 for (size_t i = 0; i < fInputFiles.size(); ++i){
316 const std::string& inputfile = fInputFiles[i];
317 TChain *ch;
318 ch = new TChain(fTreeName.c_str());
319 ch->Add(inputfile.c_str());
320
321 // Create task-specific TDirectory, so avoid parallel tasks to interfere
322 // in gDirectory with histogram registration.
323 TDirectory* taskDir = gROOT->mkdir(TString::Format("TSimpleAnalysis_taskDir_%d", (int)i));
324
325 vChains.emplace_back(std::make_pair(ch, taskDir));
326 }
327
328 auto vFileswHists = pool.Map(generateHisto, vChains);
329
330 // If a file does not exist, one of the vFileswHists
331 // will be a vector of length 0. Detect that.
332 for (auto&& histsOfJob: vFileswHists) {
333 if (histsOfJob.empty())
334 return false;
335 }
336
337 // Merge the results. Initialize the result with the first task's results,
338 // then add the other tasks.
339 std::vector<TH1F *> vPtrHisto{vFileswHists[0]};
340 ofile.cd();
341 for (unsigned j = 0; j < fHists.size(); j++) {
342 for (unsigned i = 1; i < vFileswHists.size(); i++) {
343 if (!vFileswHists[i][j]) {
344 // ignore that sum histogram:
345 delete vPtrHisto[j];
346 vPtrHisto[j] = nullptr;
347 continue;
348 }
349 if (vPtrHisto[j])
350 vPtrHisto[j]->Add(vFileswHists[i][j]);
351 }
352 if (vPtrHisto[j])
353 vPtrHisto[j]->Write();
354 }
355 return true;
356
357#else
358
359 // Do the chain of the fInputFiles
360 TChain* chain = new TChain(fTreeName.c_str());
361 for (const std::string& inputfile: fInputFiles)
362 chain->Add(inputfile.c_str());
363
364 // Generate histograms
365 auto vHisto = generateHisto({chain, gDirectory});
366 if (vHisto.empty())
367 return false;
368 ofile.cd();
369 // Store the histograms
370 for (auto histo: vHisto) {
371 if (histo)
372 histo->Write();
373 }
374 return true;
375
376#endif
377}
378
379////////////////////////////////////////////////////////////////////////////////
380/// Returns false if not a tree name, otherwise sets the name of the tree.
381///
382/// param[in] line - line read from the input file
383
385{
386 if (line.find("=") == std::string::npos) {
387 fInputFiles.push_back(line);
388 return true;
389 }
390 return false; // It's an expression
391}
392
393////////////////////////////////////////////////////////////////////////////////
394/// Skip subsequent empty lines read from fIn and returns the next not empty line.
395///
396/// param[in] numbLine - number of the input file line
397
398std::string TSimpleAnalysis::GetLine(int& numbLine)
399{
400 std::string notEmptyLine;
401
402 do {
403 getline(fIn, notEmptyLine);
404 DeleteCommentsAndSpaces(notEmptyLine);
405 numbLine++;
406 } while (fIn && notEmptyLine.empty());
407
408 return notEmptyLine;
409}
410
411////////////////////////////////////////////////////////////////////////////////
412/// This function has the aim of setting the arguments read from the input file.
413
415{
416 int readingSection = kReadingOutput;
417 std::string line;
418 int numbLine = 0;
419
420 // Error if the input file does not exist
421 fIn.open(fConfigFile);
422 if (!fIn) {
423 ::Error("TSimpleAnalysis::Configure", "File %s not found", fConfigFile.c_str());
424 return false;
425 }
426
427 while (!fIn.eof()) {
428 line = GetLine(numbLine);
429 if (line.empty()) // It can happen if fIn.eof()
430 continue;
431 std::string errMessage;
432
433 switch (readingSection) {
434
435 // Set the name of the output file
436 case kReadingOutput:
438 readingSection++;
439 break;
440
441 // Set the name of the tree
442 case kReadingTreeName:
443 fTreeName = line;
444 readingSection++;
445 break;
446
447 // Set the input files
448 case kReadingInput:
450 // Not an input file name; try to parse as an expression
451 errMessage = HandleExpressionConfig(line);
452 readingSection = kReadingExpressions;
453 }
454 break;
455
456 // Set the expressions
458 errMessage = HandleExpressionConfig(line);
459 break;
460 }
461
462 // Report any errors if occur during the configuration proceedings
463 if (!errMessage.empty()) {
464 ::Error("TSimpleAnalysis::Configure", "%s in %s:%d", errMessage.c_str(),
465 fConfigFile.c_str(), numbLine);
466 return false;
467 }
468 } // while (!fIn.eof())
469 return true;
470}
471
472////////////////////////////////////////////////////////////////////////////////
473/// Function that allows to create the TSimpleAnalysis object and execute its
474/// Configure and Analyze functions.
475///
476/// param[in] configurationFile - name of the input file used to create the TSimpleAnalysis object
477
478bool RunSimpleAnalysis (const char* configurationFile) {
479 TSimpleAnalysis obj(configurationFile);
480 if (!obj.Configure())
481 return false;
482 if (!obj.Run())
483 return false;
484 return true; // Return true only if Configure() and Run() functions were performed correctly
485}
#define gDirectory
Definition TDirectory.h:386
const Int_t kFatal
Definition TError.cxx:41
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:197
Int_t gErrorIgnoreLevel
Definition TError.cxx:43
#define gROOT
Definition TROOT.h:405
static void DeleteCommentsAndSpaces(std::string &line)
Delete comments, leading and trailing white spaces in a string.
static bool CheckChainLoadResult(TChain *chain)
Returns true if there are no errors in TChain::LoadTree()
bool RunSimpleAnalysis(const char *configurationFile)
Function that allows to create the TSimpleAnalysis object and execute its Configure and Analyze funct...
static std::string ExtractTreeName(std::string &firstInputFile)
Extract the name of the tree from the first input file when the tree name isn't in the configuration ...
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
A TChainElement describes a component of a TChain.
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
virtual Long64_t Draw(const char *varexp, const TCut &selection, Option_t *option="", Long64_t nentries=kMaxEntries, Long64_t firstentry=0)
Draw expression varexp for selected entries.
Definition TChain.cxx:812
TObjArray * GetListOfFiles() const
Definition TChain.h:111
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition TChain.cxx:218
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:81
Bool_t InheritsFrom(const char *cl) const override
Return kTRUE if this class inherits from a class with name "classname".
Definition TClass.cxx:4874
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2968
Bool_t cd() override
Change current directory to "this" directory.
Describe directory structure in memory.
Definition TDirectory.h:45
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
virtual Bool_t cd()
Change current directory to "this" directory.
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:51
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4053
1-D histogram with a float per channel (see TH1 documentation)}
Definition TH1.h:577
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28
virtual const char * GetClassName() const
Definition TKey.h:75
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
An array of TObjects.
Definition TObjArray.h:31
Mother of all ROOT objects.
Definition TObject.h:41
R__ALWAYS_INLINE Bool_t IsZombie() const
Definition TObject.h:153
A TSimpleAnalysis object creates histograms from a TChain.
std::string fConfigFile
Name of the configuration file.
std::vector< std::string > fInputFiles
.root input files
std::string GetLine(int &numbLine)
Skip subsequent empty lines read from fIn and returns the next not empty line.
@ kReadingInput
Reading the name of the .root input files.
@ kReadingOutput
Reading the name of the output file.
@ kReadingTreeName
Reading the name of the tree.
@ kReadingExpressions
Reading the expressions.
std::string HandleExpressionConfig(const std::string &line)
Handle the expression lines of the input file in order to pass the elements to the members of the obj...
std::ifstream fIn
Stream for the input file.
TSimpleAnalysis(const std::string &file)
bool Run()
Execute all the TChain::Draw() as configured and stores the output histograms.
std::string fOutputFile
Output file in which are stored the histograms.
bool HandleInputFileNameConfig(const std::string &line)
Returns false if not a tree name, otherwise sets the name of the tree.
bool SetTreeName()
Disambiguate tree name from first input file and set up fTreeName if it is empty.
bool Configure()
This function has the aim of setting the arguments read from the input file.
std::map< std::string, std::pair< std::string, std::string > > fHists
The map contains in the first part the names of the histograms written in the output file,...
std::string fTreeName
Name of the input tree.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2356
static TClass * Class()
TLine * line
void EnableThreadSafety()
Enable support for multi-threading within the ROOT code in particular, enables the global mutex to ma...
Definition TROOT.cxx:493
static void output()