Logo ROOT   6.10/09
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 //////////////////////////////////////////////////////////////////////////////
18 //
19 // RooStreamParser is a utility class to parse istreams into tokens and optionally
20 // convert them into basic types (double,int,string)
21 //
22 // The general tokenizing philosophy is that there are two kinds of tokens: value
23 // and punctuation. The former are variable length, the latter always
24 // one character. A token is terminated if one of the following conditions
25 // occur
26 // - space character found (' ',tab,newline)
27 // - change of token type (value -> punctuation or vv)
28 // - end of fixed-length token (punctuation only)
29 // - start or end of quoted string
30 //
31 // The parser is aware of floating point notation and will assign leading
32 // minus signs, decimal points etc to a value token when this is obvious
33 // from the context. The definition of what is punctuation can be redefined.
34 //
35 
36 
37 #include "RooFit.h"
38 
39 #include "Riostream.h"
40 #include "Riostream.h"
41 #include <stdlib.h>
42 #include <ctype.h>
43 
44 #ifndef _WIN32
45 #include <strings.h>
46 #endif
47 
48 #include "RooStreamParser.h"
49 #include "RooMsgService.h"
50 #include "RooNumber.h"
51 
52 
53 using namespace std;
54 
56 
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// Construct parser on given input stream
60 
62  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
63 {
64 }
65 
66 
67 ////////////////////////////////////////////////////////////////////////////////
68 /// Construct parser on given input stream. Use given errorPrefix to
69 /// prefix any parsing error messages
70 
71 RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
72  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
73 {
74 }
75 
76 
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// Destructor
80 
82 {
83 }
84 
85 
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// If true, parser is at end of line in stream
89 
91 {
92  Int_t nc(_is->peek()) ;
93  return (nc=='\n'||nc==-1) ;
94 }
95 
96 
97 
98 ////////////////////////////////////////////////////////////////////////////////
99 /// Change list of characters interpreted as punctuation
100 
101 void RooStreamParser::setPunctuation(const TString& punct)
102 {
103  _punct = punct ;
104 }
105 
106 
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// Check if given char is considered punctuation
110 
112 {
113  const char* punct = _punct.Data() ;
114  for (int i=0 ; i<_punct.Length() ; i++)
115  if (punct[i] == c) {
116  return kTRUE ;
117  }
118  return kFALSE ;
119 }
120 
121 
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 /// Read one token separated by any of the know punctuation characters
125 /// This function recognizes and handles comment lines in the istream (those
126 /// starting with '#', quoted strings ("") the content of which is not tokenized
127 /// and '+-.' characters that are part of a floating point numbers and are exempt
128 /// from being interpreted as a token separator in case '+-.' are defined as
129 /// token separators.
130 
132 {
133  // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
134  Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
135  char buffer[10240], c(0), cnext='\0', cprev=' ' ;
136  Bool_t haveINF(kFALSE) ;
137  Int_t bufptr(0) ;
138 
139  // Check for end of file
140  if (_is->eof() || _is->fail()) {
141  _atEOF = kTRUE ;
142  return TString("") ;
143  }
144 
145  //Ignore leading newline
146  if (_is->peek()=='\n') {
147  _is->get(c) ;
148 
149  // If new line starts with #, zap it
150  while (_is->peek()=='#') {
151  zapToEnd(kFALSE) ;
152  _is->get(c) ; // absorb newline
153  }
154  }
155 
156  while(1) {
157  // Buffer overflow protection
158  if (bufptr>=10239) {
159  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl ;
160  break ;
161  }
162 
163  // Read next char
164  _is->get(c) ;
165 
166 
167 
168  // Terminate at EOF, EOL or trouble
169  if (_is->eof() || _is->fail() || c=='\n') break ;
170 
171  // Terminate as SPACE, unless we haven't seen any non-SPACE yet
172  if (isspace(c)) {
173  if (first)
174  continue ;
175  else
176  if (!quotedString) {
177  break ;
178  }
179  }
180 
181  // If '-' or '/' see what the next character is
182  if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
183  _is->get(cnext) ;
184 
185 
186  if (cnext=='I' || cnext=='i') {
187  char tmp1,tmp2 ;
188  _is->get(tmp1) ;
189  _is->get(tmp2) ;
190  _is->putback(tmp2) ;
191  _is->putback(tmp1) ;
192  haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
193  } else {
194  haveINF = kFALSE ;
195  }
196 
197  _is->putback(cnext) ;
198  }
199 
200 
201  // Check for line continuation marker
202  if (c=='\\' && cnext=='\\') {
203  // Kill rest of line including endline marker
204  zapToEnd(kFALSE) ;
205  _is->get(c) ;
206  lineCont=kTRUE ;
207  break ;
208  }
209 
210  // Stop if begin of comments is encountered
211  if (c=='/' && cnext=='/') {
212  zapToEnd(kFALSE) ;
213  break ;
214  }
215 
216  // Special handling of quoted strings
217  if (c=='"') {
218  if (first) {
219  quotedString=kTRUE ;
220  } else if (!quotedString) {
221  // Terminate current token. Next token will be quoted string
222  _is->putback('"') ;
223  break ;
224  }
225  }
226 
227  if (!quotedString) {
228  // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
229  if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
230  && !((c=='-'||c=='+') && isdigit(cnext) && cprev=='e')
231  && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
232 
233  if (first) {
234  // Make this a one-char punctuation token
235  buffer[bufptr++]=c ;
236  break ;
237  } else {
238  // Put back punct. char and terminate current alphanum token
239  _is->putback(c) ;
240  break ;
241  }
242  }
243  } else {
244  // Inside quoted string conventional tokenizing rules do not apply
245 
246  // Terminate token on closing quote
247  if (c=='"' && !first) {
248  buffer[bufptr++]=c ;
249  quotedString=kFALSE ;
250  break ;
251  }
252  }
253 
254  // Store in buffer
255  buffer[bufptr++]=c ;
256  first=kFALSE ;
257  cprev=c ;
258  }
259 
260  if (_is->eof() || _is->bad()) {
261  _atEOF = kTRUE ;
262  }
263 
264  // Check if closing quote was encountered
265  if (quotedString) {
266  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
267  }
268 
269  // Absorb trailing white space or absorb rest of line if // is encountered
270  if (c=='\n') {
271  if (!lineCont) {
272  _is->putback(c) ;
273  }
274  } else {
275  c = _is->peek() ;
276 
277  while ((isspace(c) || c=='/') && c != '\n') {
278  if (c=='/') {
279  _is->get(c) ;
280  if (_is->peek()=='/') {
281  zapToEnd(kFALSE) ;
282  } else {
283  _is->putback('/') ;
284  }
285  break ;
286  } else {
287  _is->get(c) ;
288  c = _is->peek() ;
289  }
290  }
291  }
292 
293  // If no token was read line is continued, return first token on next line
294  if (bufptr==0 && lineCont) {
295  return readToken() ;
296  }
297 
298  // Zero terminate buffer and convert to TString
299  buffer[bufptr]=0 ;
300  return TString(buffer) ;
301 }
302 
303 
304 
305 ////////////////////////////////////////////////////////////////////////////////
306 /// Read an entire line from the stream and return as TString
307 /// This method recognizes the use of '\\' in the istream
308 /// as line continuation token.
309 
311 {
312  char c,buffer[10240] ;
313  Int_t nfree(10239) ;
314 
315  if (_is->peek()=='\n') _is->get(c) ;
316 
317  // Read till end of line
318  _is->getline(buffer,nfree,'\n') ;
319 
320  // Look for eventual continuation line sequence
321  char *pcontseq = strstr(buffer,"\\\\") ;
322  if (pcontseq) nfree -= (pcontseq-buffer) ;
323  while(pcontseq) {
324  _is->getline(pcontseq,nfree,'\n') ;
325 
326  char* nextpcontseq = strstr(pcontseq,"\\\\") ;
327  if (nextpcontseq) nfree -= (nextpcontseq-pcontseq) ;
328  pcontseq = nextpcontseq ;
329  }
330 
331  // Chop eventual comments
332  char *pcomment = strstr(buffer,"//") ;
333  if (pcomment) *pcomment=0 ;
334 
335  // Chop leading and trailing space
336  char *pstart=buffer ;
337  while (isspace(*pstart)) {
338  pstart++ ;
339  }
340  char *pend=buffer+strlen(buffer)-1 ;
341  if (pend>pstart)
342  while (isspace(*pend)) { *pend--=0 ; }
343 
344  if (_is->eof() || _is->fail()) {
345  _atEOF = kTRUE ;
346  }
347 
348  // Convert to TString
349  return TString(pstart) ;
350 }
351 
352 
353 
354 ////////////////////////////////////////////////////////////////////////////////
355 /// Eat all characters up to and including then end of the
356 /// current line. If inclContLines is kTRUE, all continuation lines
357 /// marked by the '\\' token are zapped as well
358 
360 {
361  // Skip over everything until the end of the current line
362  if (_is->peek()!='\n') {
363 
364  char buffer[10240] ;
365  Int_t nfree(10239) ;
366 
367  // Read till end of line
368  _is->getline(buffer,nfree,'\n') ;
369 
370  if (inclContLines) {
371  // Look for eventual continuation line sequence
372  char *pcontseq = strstr(buffer,"\\\\") ;
373  if (pcontseq) nfree -= (pcontseq-buffer) ;
374  while(pcontseq) {
375  _is->getline(pcontseq,nfree,'\n') ;
376 
377  char* nextpcontseq = strstr(pcontseq,"\\\\") ;
378  if (nextpcontseq) nfree -= (nextpcontseq-pcontseq) ;
379  pcontseq = nextpcontseq ;
380  }
381  }
382 
383  // Put back newline character in stream buffer
384  _is->putback('\n') ;
385  }
386 }
387 
388 
389 
390 ////////////////////////////////////////////////////////////////////////////////
391 /// Read the next token and return kTRUE if it is identical to the given 'expected' token.
392 
393 Bool_t RooStreamParser::expectToken(const TString& expected, Bool_t zapOnError)
394 {
395  TString token(readToken()) ;
396 
397  Bool_t error=token.CompareTo(expected) ;
398  if (error && !_prefix.IsNull()) {
399  oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '"
400  << expected << "'" << ", got '" << token << "'" << endl ;
401  if (zapOnError) zapToEnd(kTRUE) ;
402  }
403  return error ;
404 }
405 
406 
407 
408 ////////////////////////////////////////////////////////////////////////////////
409 /// Read the next token and convert it to a Double_t. Returns true
410 /// if an error occurred in reading or conversion
411 
413 {
414  TString token(readToken()) ;
415  if (token.IsNull()) return kTRUE ;
416  return convertToDouble(token,value) ;
417 
418 }
419 
420 
421 
422 ////////////////////////////////////////////////////////////////////////////////
423 /// Convert given string to a double. Return true if the conversion fails.
424 
425 Bool_t RooStreamParser::convertToDouble(const TString& token, Double_t& value)
426 {
427  char* endptr = 0;
428  const char* data=token.Data() ;
429 
430  // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
431  if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
432  value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
433  return kFALSE ;
434  }
435 
436  value = strtod(data,&endptr) ;
437  Bool_t error = (endptr-data!=token.Length()) ;
438 
439  if (error && !_prefix.IsNull()) {
440  oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '"
441  << token << "'" << " to double precision" << endl ;
442  }
443  return error ;
444 }
445 
446 
447 
448 ////////////////////////////////////////////////////////////////////////////////
449 /// Read a token and convert it to an Int_t. Returns true
450 /// if an error occurred in reading or conversion
451 
453 {
454  TString token(readToken()) ;
455  if (token.IsNull()) return kTRUE ;
456  return convertToInteger(token,value) ;
457 }
458 
459 
460 
461 ////////////////////////////////////////////////////////////////////////////////
462 /// Convert given string to an Int_t. Returns true if an error
463 /// occurred in conversion
464 
465 Bool_t RooStreamParser::convertToInteger(const TString& token, Int_t& value)
466 {
467  char* endptr = 0;
468  const char* data=token.Data() ;
469  value = strtol(data,&endptr,10) ;
470  Bool_t error = (endptr-data!=token.Length()) ;
471 
472  if (error && !_prefix.IsNull()) {
473  oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '"
474  << token << "'" << " to integer" << endl ;
475  }
476  return error ;
477 }
478 
479 
480 
481 ////////////////////////////////////////////////////////////////////////////////
482 /// Read a string token. Returns true if an error occurred in reading
483 /// or conversion. If a the read token is enclosed in quotation
484 /// marks those are stripped in the returned value
485 
486 Bool_t RooStreamParser::readString(TString& value, Bool_t /*zapOnError*/)
487 {
488  TString token(readToken()) ;
489  if (token.IsNull()) return kTRUE ;
490  return convertToString(token,value) ;
491 }
492 
493 
494 
495 ////////////////////////////////////////////////////////////////////////////////
496 /// Convert given token to a string (i.e. remove eventual quotation marks)
497 
498 Bool_t RooStreamParser::convertToString(const TString& token, TString& string)
499 {
500  // Transport to buffer
501  char buffer[10240],*ptr ;
502  strncpy(buffer,token.Data(),10239) ;
503  if (token.Length()>=10239) {
504  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::convertToString: token length exceeds 1023, truncated" << endl ;
505  buffer[10239]=0 ;
506  }
507  int len = strlen(buffer) ;
508 
509  // Remove trailing quote if any
510  if ((len) && (buffer[len-1]=='"'))
511  buffer[len-1]=0 ;
512 
513  // Skip leading quote, if present
514  ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
515 
516  string = ptr ;
517  return kFALSE ;
518 }
Bool_t isPunctChar(char c) const
Check if given char is considered punctuation.
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
STL namespace.
#define oocoutE(o, a)
Definition: RooMsgService.h:47
Bool_t convertToDouble(const TString &token, Double_t &value)
Convert given string to a double. Return true if the conversion fails.
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
Bool_t convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
static Double_t infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:49
void zapToEnd(Bool_t inclContLines=kFALSE)
Eat all characters up to and including then end of the current line.
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Bool_t readDouble(Double_t &value, Bool_t zapOnError=kFALSE)
Read the next token and convert it to a Double_t.
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of &#39;\&#39; in th...
Bool_t expectToken(const TString &expected, Bool_t zapOnError=kFALSE)
Read the next token and return kTRUE if it is identical to the given &#39;expected&#39; token.
Bool_t readString(TString &value, Bool_t zapOnError=kFALSE)
Read a string token.
const Bool_t kFALSE
Definition: RtypesCore.h:92
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
Bool_t convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
std::istream * _is
#define oocoutW(o, a)
Definition: RooMsgService.h:46
Mother of all ROOT objects.
Definition: TObject.h:37
virtual ~RooStreamParser()
Destructor.
Bool_t readInteger(Int_t &value, Bool_t zapOnError=kFALSE)
Read a token and convert it to an Int_t.
Definition: first.py:1
Bool_t atEOL()
If true, parser is at end of line in stream.
const Bool_t kTRUE
Definition: RtypesCore.h:91