Logo ROOT   6.12/07
Reference Guide
TSimpleAnalysis.cxx
Go to the documentation of this file.
1 // @(#)root/treeplayer:$Id$
2 // Author: Luca Giommi 22/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #include "TSimpleAnalysis.h"
13 
14 #include "TFile.h"
15 #include "TChain.h"
16 #include "TChainElement.h"
17 #include "TH1.h"
18 #include "TError.h"
19 #include "TKey.h"
20 #ifdef R__USE_IMT
21 #include "ROOT/TThreadExecutor.hxx"
22 #endif
23 #include "TROOT.h"
24 
25 #include <string>
26 #include <fstream>
27 #include <vector>
28 #include <map>
29 #include <iostream>
30 
31 /** \class TSimpleAnalysis
32 
33 A TSimpleAnalysis object creates histograms from a TChain. These histograms
34 are stored to an output file. The histogrammed (TTreeFormula) expressions,
35 their cuts, the input and output files are configured through a simple config
36 file that allows comments starting with '#'.
37 Here an example of configuration file:
38 
39 ~~~ {.cpp}
40 # This is an example of configuration file
41 file_output.root #the output file in which histograms are stored
42 
43 # The next line has the name of the tree of the input data. It is
44 # optional if there is exactly one tree in the first input file.
45 ntuple #name of the input tree
46 
47 # The lines of the next block correspond to .root input files that
48 # contain the tree
49 hsimple1.root #first .root input file
50 hsimple2.root #second .root input file
51 
52 # The next block is composed by lines that allow to configure the
53 # histograms. They have the following syntax:
54 # NAME = EXPRESSION if CUT
55 # which corresponds to chain->Draw("EXPRESSION >> NAME", "CUT")
56 # i.e. it will create a histogram called NAME and store it in
57 # file_output.root.
58 # "if CUT" is optional
59 hpx=px if px<-3 #first histogram
60 hpxpy=px:py #second histogram
61 
62 # End of the configuration file
63 ~~~
64 
65 It is possible to use the script rootdrawtree that allows to use the class
66 just in command line through the bash shell.
67 */
68 
69 ////////////////////////////////////////////////////////////////////////////////
70 /// Delete comments, leading and trailing white spaces in a string.
71 ///
72 /// param[in] line - line read from the input file
73 
74 static void DeleteCommentsAndSpaces(std::string& line)
75 {
76  // Delete comments
77  std::size_t comment = line.find('#');
78  line = line.substr(0, comment);
79  // Delete leading spaces
80  std::size_t firstNotSpace = line.find_first_not_of(" \t");
81  if (firstNotSpace != std::string::npos)
82  line = line.substr(firstNotSpace);
83  else {
84  line.clear();
85  return;
86  }
87  // Delete trailing spaces
88  std::size_t lastNotSpace = line.find_last_not_of(" \t");
89  if (lastNotSpace != std::string::npos)
90  line = line.substr(0, lastNotSpace + 1);
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// Handle the expression lines of the input file in order to pass the
95 /// elements to the members of the object.
96 ///
97 /// param[in] line - TTreeFormula expression, either read form the configuration
98 /// file or passed as expression to the constructor
99 
100 std::string TSimpleAnalysis::HandleExpressionConfig(const std::string& line)
101 {
102  static const std::string kCutIntr = " if ";
103 
104  std::size_t equal = line.find("=");
105  if (equal == std::string::npos)
106  return "Error: missing '='";
107 
108  // Set the histName value
109  std::string histName = line.substr(0, equal);
110  DeleteCommentsAndSpaces(histName);
111  if (histName.empty())
112  return "Error: no histName found";
113 
114  //Set the histExpression value
115  std::size_t cutPos = line.find(kCutIntr, equal);
116  std::string histExpression;
117  if (cutPos == std::string::npos)
118  histExpression = line.substr(equal + 1);
119  else
120  histExpression = line.substr(equal + 1, cutPos - equal - 1);
121  DeleteCommentsAndSpaces(histExpression);
122  if (histExpression.empty())
123  return "Error: no expression found";
124 
125  // Set the histCut value
126  std::string histCut;
127  if (cutPos != std::string::npos) {
128  histCut = line.substr(cutPos + kCutIntr.size());
129  DeleteCommentsAndSpaces(histCut);
130  if (histCut.empty())
131  return "Error: missing cut expression after 'if'";
132  }
133  else
134  histCut = "";
135 
136  // Set the map that contains the histName, histExpressions and histCut values
137  auto check = fHists.insert(std::make_pair((const std::string&)histName,
138  std::make_pair(histExpression, histCut)));
139 
140  // Check if there are histograms with the same name
141  if (!check.second)
142  return "Duplicate histogram name";
143  return "";
144 }
145 
146 ////////////////////////////////////////////////////////////////////////////////
147 /// Constructor for the case of command line parsing arguments. It sets the members
148 /// of the object.
149 ///
150 /// \param[in] output - name of the output file
151 /// \param[in] inputFiles - name of the input .root files
152 /// \param[in] expressions - what is shown in the histograms
153 /// \param[in] treeName - name of the tree
154 /// \throws std::runtime_error in case of ill-formed expressions
155 
157  const std::vector<std::string>& inputFiles,
158  const std::vector<std::string>& expressions,
159  const std::string& treeName = ""):
160  fInputFiles(inputFiles), fOutputFile(output), fTreeName(treeName)
161 {
162  for (const std::string& expr: expressions) {
163  std::string errMessage = HandleExpressionConfig(expr);
164  if (!errMessage.empty())
165  throw std::runtime_error(errMessage + " in " + expr);
166  }
167 }
168 
169 ////////////////////////////////////////////////////////////////////////////////
170 /// Extract the name of the tree from the first input file when the tree name
171 /// isn't in the configuration file. Returns the name of the tree.
172 
173 static std::string ExtractTreeName(std::string& firstInputFile)
174 {
175  std::string treeName = "";
176  std::unique_ptr<TFile> inputFile{TFile::Open(firstInputFile.c_str())};
177 
178  // Loop over all the keys inside the first input file
179  for (TObject* keyAsObj : *inputFile->GetListOfKeys()) {
180  TKey* key = static_cast<TKey*>(keyAsObj);
181  TClass* clObj = TClass::GetClass(key->GetClassName());
182  if (!clObj)
183  continue;
184  // If the key is related to and object that inherits from TTree::Class we
185  // set treeName with the name of this key if treeName is empty, otherwise
186  // error occurs
187  if (clObj->InheritsFrom(TTree::Class())) {
188  if (treeName.empty())
189  treeName = key->GetName();
190  else {
191  ::Error("TSimpleAnalysis::Analyze", "Multiple trees inside %s", firstInputFile.c_str());
192  return "";
193  }
194  }
195  }
196  // If treeName is yet empty, error occurs
197  if (treeName.empty()) {
198  ::Error("TSimpleAnalysis::Analyze", "No tree inside %s", firstInputFile.c_str());
199  return "";
200  }
201  return treeName;
202 }
203 
204 ////////////////////////////////////////////////////////////////////////////////
205 /// Returns true if there are no errors in TChain::LoadTree()
206 
207 static bool CheckChainLoadResult(TChain* chain)
208 {
209  // Possible return values of TChain::LoadTree()
210  static const char* errors[] {
211  "all good", // 0
212  "empty chain", // -1
213  "invalid entry number", // -2
214  "cannot open the file", // -3
215  "missing tree", // -4
216  "internal error" // -5
217  };
218 
219  bool ret = true;
220  TObjArray *fileElements = chain->GetListOfFiles();
221  TIter next(fileElements);
222  while (TChainElement* chEl = (TChainElement*)next()) {
223  if (chEl->GetLoadResult() < 0) {
224  ::Error("TSimpleAnalysis::Run", "Load failure in file %s: %s",
225  chEl->GetTitle(), errors[-(chEl->GetLoadResult())]);
226  ret = false;
227  }
228  }
229  return ret;
230 }
231 
232 ////////////////////////////////////////////////////////////////////////////////
233 /// Disambiguate tree name from first input file and set up fTreeName if it is
234 /// empty
235 
237 {
238  // Disambiguate tree name from first input file:
239  // just try to open it, if that works it's an input file.
240  if (!fTreeName.empty()) {
241  // Silence possible error message from TFile constructor if this is a tree name.
242  int oldLevel = gErrorIgnoreLevel;
244  if (TFile* probe = TFile::Open(fTreeName.c_str())) {
245  if (!probe->IsZombie()) {
246  fInputFiles.insert(fInputFiles.begin(), fTreeName);
247  fTreeName.clear();
248  }
249  delete probe;
250  }
251  gErrorIgnoreLevel = oldLevel;
252  }
253  // If fTreeName is empty we try to find the name of the tree through reading
254  // of the first input file
255  if (fTreeName.empty())
257  if (fTreeName.empty()) // No tree name found
258  return false;
259  return true;
260 }
261 
262 ////////////////////////////////////////////////////////////////////////////////
263 /// Execute all the TChain::Draw() as configured and stores the output histograms.
264 /// Returns true if the analysis succeeds.
265 
267 {
268  if (!SetTreeName())
269  return false;
270 
271  // Create the output file and check if it fails
272  TFile ofile(fOutputFile.c_str(), "RECREATE");
273  if (ofile.IsZombie()) {
274  ::Error("TSimpleAnalysis::Run", "Impossible to create %s", fOutputFile.c_str());
275  return false;
276  }
277 
278  // Store the histograms into a vector
279  auto generateHisto = [&](const std::pair<TChain*, TDirectory*>& job) {
280  TChain* chain = job.first;
281  TDirectory* taskDir = job.second;
282  taskDir->cd();
283  std::vector<TH1F *> vPtrHisto(fHists.size());
284  // Index for a correct set up of vPtrHisto
285  int i = 0;
286 
287  // Loop over all the histograms
288  for (const auto &histo : fHists) {
289  const std::string& expr = histo.second.first;
290  const std::string& histoName = histo.first;
291  const std::string& cut = histo.second.second;
292 
293  chain->Draw((expr + ">>" + histoName).c_str(), cut.c_str(), "goff");
294  TH1F *ptrHisto = (TH1F*)taskDir->Get(histoName.c_str());
295 
296  // Check if there are errors inside the chain
297  if (!CheckChainLoadResult(chain))
298  return std::vector<TH1F *>();
299 
300  vPtrHisto[i] = ptrHisto;
301  ++i;
302  }
303  return vPtrHisto;
304  };
305 
306 #if 0
307  // The MT version is currently disabled because reading emulated objects
308  // triggers a lock for every object read. This in turn increases the run
309  // time way beyond the serial case.
310 
311 
313  ROOT::TThreadExecutor pool(8);
314 
315  // Do the chain of the fInputFiles
316  std::vector<std::pair<TChain*, TDirectory*>> vChains;
317  for (size_t i = 0; i < fInputFiles.size(); ++i){
318  const std::string& inputfile = fInputFiles[i];
319  TChain *ch;
320  ch = new TChain(fTreeName.c_str());
321  ch->Add(inputfile.c_str());
322 
323  // Create task-specific TDirectory, so avoid parallel tasks to interfere
324  // in gDirectory with histogram registration.
325  TDirectory* taskDir = gROOT->mkdir(TString::Format("TSimpleAnalysis_taskDir_%d", (int)i));
326 
327  vChains.emplace_back(std::make_pair(ch, taskDir));
328  }
329 
330  auto vFileswHists = pool.Map(generateHisto, vChains);
331 
332  // If a file does not exist, one of the vFileswHists
333  // will be a vector of length 0. Detect that.
334  for (auto&& histsOfJob: vFileswHists) {
335  if (histsOfJob.empty())
336  return false;
337  }
338 
339  // Merge the results. Initialize the result with the first task's results,
340  // then add the other tasks.
341  std::vector<TH1F *> vPtrHisto{vFileswHists[0]};
342  ofile.cd();
343  for (unsigned j = 0; j < fHists.size(); j++) {
344  for (unsigned i = 1; i < vFileswHists.size(); i++) {
345  if (!vFileswHists[i][j]) {
346  // ignore that sum histogram:
347  delete vPtrHisto[j];
348  vPtrHisto[j] = nullptr;
349  continue;
350  }
351  if (vPtrHisto[j])
352  vPtrHisto[j]->Add(vFileswHists[i][j]);
353  }
354  if (vPtrHisto[j])
355  vPtrHisto[j]->Write();
356  }
357  return true;
358 
359 #else
360 
361  // Do the chain of the fInputFiles
362  TChain* chain = new TChain(fTreeName.c_str());
363  for (const std::string& inputfile: fInputFiles)
364  chain->Add(inputfile.c_str());
365 
366  // Generate histograms
367  auto vHisto = generateHisto({chain, gDirectory});
368  if (vHisto.empty())
369  return false;
370  ofile.cd();
371  // Store the histograms
372  for (auto histo: vHisto) {
373  if (histo)
374  histo->Write();
375  }
376  return true;
377 
378 #endif
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 /// Returns false if not a tree name, otherwise sets the name of the tree.
383 ///
384 /// param[in] line - line read from the input file
385 
387 {
388  if (line.find("=") == std::string::npos) {
389  fInputFiles.push_back(line);
390  return true;
391  }
392  return false; // It's an expression
393 }
394 
395 ////////////////////////////////////////////////////////////////////////////////
396 /// Skip subsequent empty lines read from fIn and returns the next not empty line.
397 ///
398 /// param[in] numbLine - number of the input file line
399 
400 std::string TSimpleAnalysis::GetLine(int& numbLine)
401 {
402  std::string notEmptyLine;
403 
404  do {
405  getline(fIn, notEmptyLine);
406  DeleteCommentsAndSpaces(notEmptyLine);
407  numbLine++;
408  } while (fIn && notEmptyLine.empty());
409 
410  return notEmptyLine;
411 }
412 
413 ////////////////////////////////////////////////////////////////////////////////
414 /// This function has the aim of setting the arguments read from the input file.
415 
417 {
418  int readingSection = kReadingOutput;
419  std::string line;
420  int numbLine = 0;
421 
422  // Error if the input file does not exist
423  fIn.open(fConfigFile);
424  if (!fIn) {
425  ::Error("TSimpleAnalysis::Configure", "File %s not found", fConfigFile.c_str());
426  return false;
427  }
428 
429  while (!fIn.eof()) {
430  line = GetLine(numbLine);
431  if (line.empty()) // It can happen if fIn.eof()
432  continue;
433  std::string errMessage;
434 
435  switch (readingSection) {
436 
437  // Set the name of the output file
438  case kReadingOutput:
439  fOutputFile = line;
440  readingSection++;
441  break;
442 
443  // Set the name of the tree
444  case kReadingTreeName:
445  fTreeName = line;
446  readingSection++;
447  break;
448 
449  // Set the input files
450  case kReadingInput:
451  if (!HandleInputFileNameConfig(line)) {
452  // Not an input file name; try to parse as an expression
453  errMessage = HandleExpressionConfig(line);
454  readingSection = kReadingExpressions;
455  }
456  break;
457 
458  // Set the expressions
459  case kReadingExpressions:
460  errMessage = HandleExpressionConfig(line);
461  break;
462  }
463 
464  // Report any errors if occur during the configuration proceedings
465  if (!errMessage.empty()) {
466  ::Error("TSimpleAnalysis::Configure", "%s in %s:%d", errMessage.c_str(),
467  fConfigFile.c_str(), numbLine);
468  return false;
469  }
470  } // while (!fIn.eof())
471  return true;
472 }
473 
474 ////////////////////////////////////////////////////////////////////////////////
475 /// Function that allows to create the TSimpleAnalysis object and execute its
476 /// Configure and Analyze functions.
477 ///
478 /// param[in] configurationFile - name of the input file used to create the TSimpleAnalysis object
479 
480 bool RunSimpleAnalysis (const char* configurationFile) {
481  TSimpleAnalysis obj(configurationFile);
482  if (!obj.Configure())
483  return false;
484  if (!obj.Run())
485  return false;
486  return true; // Return true only if Configure() and Run() functions were performed correctly
487 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string fConfigFile
Name of the configuration file.
std::map< std::string, std::pair< std::string, std::string > > fHists
An array of TObjects.
Definition: TObjArray.h:37
bool Run()
Execute all the TChain::Draw() as configured and stores the output histograms.
R__EXTERN Int_t gErrorIgnoreLevel
Definition: TError.h:105
A TSimpleAnalysis object creates histograms from a TChain.
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:805
TLine * line
std::string HandleExpressionConfig(const std::string &line)
Handle the expression lines of the input file in order to pass the elements to the members of the obj...
bool SetTreeName()
Disambiguate tree name from first input file and set up fTreeName if it is empty. ...
bool RunSimpleAnalysis(const char *configurationFile)
Function that allows to create the TSimpleAnalysis object and execute its Configure and Analyze funct...
virtual const char * GetClassName() const
Definition: TKey.h:71
static bool CheckChainLoadResult(TChain *chain)
Returns true if there are no errors in TChain::LoadTree()
bool Configure()
This function has the aim of setting the arguments read from the input file.
Reading the name of the .root input files.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
#define gROOT
Definition: TROOT.h:402
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
A TChainElement describes a component of a TChain.
Definition: TChainElement.h:28
std::string GetLine(int &numbLine)
Skip subsequent empty lines read from fIn and returns the next not empty line.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3950
TObjArray * GetListOfFiles() const
Definition: TChain.h:107
virtual Long64_t Draw(const char *varexp, const TCut &selection, Option_t *option="", Long64_t nentries=kMaxEntries, Long64_t firstentry=0)
Draw expression varexp for selected entries.
Definition: TChain.cxx:765
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2365
void Class()
Definition: Class.C:29
const Int_t kFatal
Definition: TError.h:42
std::string fTreeName
Name of the input tree.
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
void Error(const char *location, const char *msgfmt,...)
This class provides a simple interface to execute the same task multiple times in parallel...
std::ifstream fIn
Stream for the input file.
The ROOT global object gROOT contains a list of all defined classes.
Definition: TClass.h:75
Bool_t InheritsFrom(const char *cl) const
Return kTRUE if this class inherits from a class with name "classname".
Definition: TClass.cxx:4688
Reading the name of the tree.
bool HandleInputFileNameConfig(const std::string &line)
Returns false if not a tree name, otherwise sets the name of the tree.
void EnableThreadSafety()
Enables the global mutex to make ROOT thread safe/aware.
Definition: TROOT.cxx:528
static std::string ExtractTreeName(std::string &firstInputFile)
Extract the name of the tree from the first input file when the tree name isn&#39;t in the configuration ...
Describe directory structure in memory.
Definition: TDirectory.h:34
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2887
std::vector< std::string > fInputFiles
.root input files
Mother of all ROOT objects.
Definition: TObject.h:37
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
static void DeleteCommentsAndSpaces(std::string &line)
Delete comments, leading and trailing white spaces in a string.
Reading the name of the output file.
TSimpleAnalysis(const std::string &file)
#define gDirectory
Definition: TDirectory.h:213
std::string fOutputFile
Output file in which are stored the histograms.
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:221