Logo ROOT   6.12/07
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 //////////////////////////////////////////////////////////////////////////////
18 //
19 // RooStreamParser is a utility class to parse istreams into tokens and optionally
20 // convert them into basic types (double,int,string)
21 //
22 // The general tokenizing philosophy is that there are two kinds of tokens: value
23 // and punctuation. The former are variable length, the latter always
24 // one character. A token is terminated if one of the following conditions
25 // occur
26 // - space character found (' ',tab,newline)
27 // - change of token type (value -> punctuation or vv)
28 // - end of fixed-length token (punctuation only)
29 // - start or end of quoted string
30 //
31 // The parser is aware of floating point notation and will assign leading
32 // minus signs, decimal points etc to a value token when this is obvious
33 // from the context. The definition of what is punctuation can be redefined.
34 //
35 
36 
37 #include "RooFit.h"
38 
39 #include "Riostream.h"
40 #include "Riostream.h"
41 #include <stdlib.h>
42 #include <ctype.h>
43 
44 #ifndef _WIN32
45 #include <strings.h>
46 #endif
47 
48 #include "RooStreamParser.h"
49 #include "RooMsgService.h"
50 #include "RooNumber.h"
51 
52 
53 using namespace std;
54 
56 
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// Construct parser on given input stream
60 
62  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
63 {
64 }
65 
66 
67 ////////////////////////////////////////////////////////////////////////////////
68 /// Construct parser on given input stream. Use given errorPrefix to
69 /// prefix any parsing error messages
70 
71 RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
72  _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
73 {
74 }
75 
76 
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// Destructor
80 
82 {
83 }
84 
85 
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// If true, parser is at end of line in stream
89 
91 {
92  Int_t nc(_is->peek()) ;
93  return (nc=='\n'||nc==-1) ;
94 }
95 
96 
97 
98 ////////////////////////////////////////////////////////////////////////////////
99 /// Change list of characters interpreted as punctuation
100 
101 void RooStreamParser::setPunctuation(const TString& punct)
102 {
103  _punct = punct ;
104 }
105 
106 
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// Check if given char is considered punctuation
110 
112 {
113  const char* punct = _punct.Data() ;
114  for (int i=0 ; i<_punct.Length() ; i++)
115  if (punct[i] == c) {
116  return kTRUE ;
117  }
118  return kFALSE ;
119 }
120 
121 
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 /// Read one token separated by any of the know punctuation characters
125 /// This function recognizes and handles comment lines in the istream (those
126 /// starting with '#', quoted strings ("") the content of which is not tokenized
127 /// and '+-.' characters that are part of a floating point numbers and are exempt
128 /// from being interpreted as a token separator in case '+-.' are defined as
129 /// token separators.
130 
132 {
133  // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
134  Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
135  char buffer[64000], c(0), cnext = '\0', cprev = ' ';
136  Bool_t haveINF(kFALSE) ;
137  Int_t bufptr(0) ;
138 
139  // Check for end of file
140  if (_is->eof() || _is->fail()) {
141  _atEOF = kTRUE ;
142  return TString("") ;
143  }
144 
145  //Ignore leading newline
146  if (_is->peek()=='\n') {
147  _is->get(c) ;
148 
149  // If new line starts with #, zap it
150  while (_is->peek()=='#') {
151  zapToEnd(kFALSE) ;
152  _is->get(c) ; // absorb newline
153  }
154  }
155 
156  while(1) {
157  // Buffer overflow protection
158  if (bufptr >= 63999) {
160  << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl;
161  break;
162  }
163 
164  // Read next char
165  _is->get(c) ;
166 
167 
168 
169  // Terminate at EOF, EOL or trouble
170  if (_is->eof() || _is->fail() || c=='\n') break ;
171 
172  // Terminate as SPACE, unless we haven't seen any non-SPACE yet
173  if (isspace(c)) {
174  if (first)
175  continue ;
176  else
177  if (!quotedString) {
178  break ;
179  }
180  }
181 
182  // If '-' or '/' see what the next character is
183  if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
184  _is->get(cnext) ;
185 
186 
187  if (cnext=='I' || cnext=='i') {
188  char tmp1,tmp2 ;
189  _is->get(tmp1) ;
190  _is->get(tmp2) ;
191  _is->putback(tmp2) ;
192  _is->putback(tmp1) ;
193  haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
194  } else {
195  haveINF = kFALSE ;
196  }
197 
198  _is->putback(cnext) ;
199  }
200 
201 
202  // Check for line continuation marker
203  if (c=='\\' && cnext=='\\') {
204  // Kill rest of line including endline marker
205  zapToEnd(kFALSE) ;
206  _is->get(c) ;
207  lineCont=kTRUE ;
208  break ;
209  }
210 
211  // Stop if begin of comments is encountered
212  if (c=='/' && cnext=='/') {
213  zapToEnd(kFALSE) ;
214  break ;
215  }
216 
217  // Special handling of quoted strings
218  if (c=='"') {
219  if (first) {
220  quotedString=kTRUE ;
221  } else if (!quotedString) {
222  // Terminate current token. Next token will be quoted string
223  _is->putback('"') ;
224  break ;
225  }
226  }
227 
228  if (!quotedString) {
229  // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
230  if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
231  && !((c=='-'||c=='+') && isdigit(cnext) && cprev=='e')
232  && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
233 
234  if (first) {
235  // Make this a one-char punctuation token
236  buffer[bufptr++]=c ;
237  break ;
238  } else {
239  // Put back punct. char and terminate current alphanum token
240  _is->putback(c) ;
241  break ;
242  }
243  }
244  } else {
245  // Inside quoted string conventional tokenizing rules do not apply
246 
247  // Terminate token on closing quote
248  if (c=='"' && !first) {
249  buffer[bufptr++]=c ;
250  quotedString=kFALSE ;
251  break ;
252  }
253  }
254 
255  // Store in buffer
256  buffer[bufptr++]=c ;
257  first=kFALSE ;
258  cprev=c ;
259  }
260 
261  if (_is->eof() || _is->bad()) {
262  _atEOF = kTRUE ;
263  }
264 
265  // Check if closing quote was encountered
266  if (quotedString) {
267  oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
268  }
269 
270  // Absorb trailing white space or absorb rest of line if // is encountered
271  if (c=='\n') {
272  if (!lineCont) {
273  _is->putback(c) ;
274  }
275  } else {
276  c = _is->peek() ;
277 
278  while ((isspace(c) || c=='/') && c != '\n') {
279  if (c=='/') {
280  _is->get(c) ;
281  if (_is->peek()=='/') {
282  zapToEnd(kFALSE) ;
283  } else {
284  _is->putback('/') ;
285  }
286  break ;
287  } else {
288  _is->get(c) ;
289  c = _is->peek() ;
290  }
291  }
292  }
293 
294  // If no token was read line is continued, return first token on next line
295  if (bufptr==0 && lineCont) {
296  return readToken() ;
297  }
298 
299  // Zero terminate buffer and convert to TString
300  buffer[bufptr]=0 ;
301  return TString(buffer) ;
302 }
303 
304 
305 
306 ////////////////////////////////////////////////////////////////////////////////
307 /// Read an entire line from the stream and return as TString
308 /// This method recognizes the use of '\\' in the istream
309 /// as line continuation token.
310 
312 {
313  char c, buffer[64000];
314  Int_t nfree(63999);
315 
316  if (_is->peek() == '\n')
317  _is->get(c);
318 
319  // Read till end of line
320  _is->getline(buffer, nfree, '\n');
321 
322  // Look for eventual continuation line sequence
323  char *pcontseq = strstr(buffer, "\\\\");
324  if (pcontseq)
325  nfree -= (pcontseq - buffer);
326  while (pcontseq) {
327  _is->getline(pcontseq, nfree, '\n');
328 
329  char *nextpcontseq = strstr(pcontseq, "\\\\");
330  if (nextpcontseq)
331  nfree -= (nextpcontseq - pcontseq);
332  pcontseq = nextpcontseq;
333  }
334 
335  // Chop eventual comments
336  char *pcomment = strstr(buffer,"//") ;
337  if (pcomment) *pcomment=0 ;
338 
339  // Chop leading and trailing space
340  char *pstart=buffer ;
341  while (isspace(*pstart)) {
342  pstart++ ;
343  }
344  char *pend=buffer+strlen(buffer)-1 ;
345  if (pend>pstart)
346  while (isspace(*pend)) { *pend--=0 ; }
347 
348  if (_is->eof() || _is->fail()) {
349  _atEOF = kTRUE ;
350  }
351 
352  // Convert to TString
353  return TString(pstart) ;
354 }
355 
356 
357 
358 ////////////////////////////////////////////////////////////////////////////////
359 /// Eat all characters up to and including then end of the
360 /// current line. If inclContLines is kTRUE, all continuation lines
361 /// marked by the '\\' token are zapped as well
362 
364 {
365  // Skip over everything until the end of the current line
366  if (_is->peek()!='\n') {
367 
368  char buffer[64000];
369  Int_t nfree(63999);
370 
371  // Read till end of line
372  _is->getline(buffer, nfree, '\n');
373 
374  if (inclContLines) {
375  // Look for eventual continuation line sequence
376  char *pcontseq = strstr(buffer, "\\\\");
377  if (pcontseq)
378  nfree -= (pcontseq - buffer);
379  while (pcontseq) {
380  _is->getline(pcontseq, nfree, '\n');
381 
382  char *nextpcontseq = strstr(pcontseq, "\\\\");
383  if (nextpcontseq)
384  nfree -= (nextpcontseq - pcontseq);
385  pcontseq = nextpcontseq;
386  }
387  }
388 
389  // Put back newline character in stream buffer
390  _is->putback('\n') ;
391  }
392 }
393 
394 
395 
396 ////////////////////////////////////////////////////////////////////////////////
397 /// Read the next token and return kTRUE if it is identical to the given 'expected' token.
398 
399 Bool_t RooStreamParser::expectToken(const TString& expected, Bool_t zapOnError)
400 {
401  TString token(readToken()) ;
402 
403  Bool_t error=token.CompareTo(expected) ;
404  if (error && !_prefix.IsNull()) {
405  oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '"
406  << expected << "'" << ", got '" << token << "'" << endl ;
407  if (zapOnError) zapToEnd(kTRUE) ;
408  }
409  return error ;
410 }
411 
412 
413 
414 ////////////////////////////////////////////////////////////////////////////////
415 /// Read the next token and convert it to a Double_t. Returns true
416 /// if an error occurred in reading or conversion
417 
419 {
420  TString token(readToken()) ;
421  if (token.IsNull()) return kTRUE ;
422  return convertToDouble(token,value) ;
423 
424 }
425 
426 
427 
428 ////////////////////////////////////////////////////////////////////////////////
429 /// Convert given string to a double. Return true if the conversion fails.
430 
431 Bool_t RooStreamParser::convertToDouble(const TString& token, Double_t& value)
432 {
433  char* endptr = 0;
434  const char* data=token.Data() ;
435 
436  // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
437  if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
438  value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
439  return kFALSE ;
440  }
441 
442  value = strtod(data,&endptr) ;
443  Bool_t error = (endptr-data!=token.Length()) ;
444 
445  if (error && !_prefix.IsNull()) {
446  oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '"
447  << token << "'" << " to double precision" << endl ;
448  }
449  return error ;
450 }
451 
452 
453 
454 ////////////////////////////////////////////////////////////////////////////////
455 /// Read a token and convert it to an Int_t. Returns true
456 /// if an error occurred in reading or conversion
457 
459 {
460  TString token(readToken()) ;
461  if (token.IsNull()) return kTRUE ;
462  return convertToInteger(token,value) ;
463 }
464 
465 
466 
467 ////////////////////////////////////////////////////////////////////////////////
468 /// Convert given string to an Int_t. Returns true if an error
469 /// occurred in conversion
470 
471 Bool_t RooStreamParser::convertToInteger(const TString& token, Int_t& value)
472 {
473  char* endptr = 0;
474  const char* data=token.Data() ;
475  value = strtol(data,&endptr,10) ;
476  Bool_t error = (endptr-data!=token.Length()) ;
477 
478  if (error && !_prefix.IsNull()) {
479  oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '"
480  << token << "'" << " to integer" << endl ;
481  }
482  return error ;
483 }
484 
485 
486 
487 ////////////////////////////////////////////////////////////////////////////////
488 /// Read a string token. Returns true if an error occurred in reading
489 /// or conversion. If a the read token is enclosed in quotation
490 /// marks those are stripped in the returned value
491 
492 Bool_t RooStreamParser::readString(TString& value, Bool_t /*zapOnError*/)
493 {
494  TString token(readToken()) ;
495  if (token.IsNull()) return kTRUE ;
496  return convertToString(token,value) ;
497 }
498 
499 
500 
501 ////////////////////////////////////////////////////////////////////////////////
502 /// Convert given token to a string (i.e. remove eventual quotation marks)
503 
504 Bool_t RooStreamParser::convertToString(const TString& token, TString& string)
505 {
506  // Transport to buffer
507  char buffer[64000], *ptr;
508  strncpy(buffer, token.Data(), 63999);
509  if (token.Length() >= 63999) {
510  oocoutW((TObject *)0, InputArguments) << "RooStreamParser::convertToString: token length exceeds 63999, truncated"
511  << endl;
512  buffer[63999] = 0;
513  }
514  int len = strlen(buffer) ;
515 
516  // Remove trailing quote if any
517  if ((len) && (buffer[len-1]=='"'))
518  buffer[len-1]=0 ;
519 
520  // Skip leading quote, if present
521  ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
522 
523  string = ptr ;
524  return kFALSE ;
525 }
Bool_t isPunctChar(char c) const
Check if given char is considered punctuation.
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
STL namespace.
#define oocoutE(o, a)
Definition: RooMsgService.h:47
Bool_t convertToDouble(const TString &token, Double_t &value)
Convert given string to a double. Return true if the conversion fails.
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
Bool_t convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
static Double_t infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:49
void zapToEnd(Bool_t inclContLines=kFALSE)
Eat all characters up to and including then end of the current line.
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Bool_t readDouble(Double_t &value, Bool_t zapOnError=kFALSE)
Read the next token and convert it to a Double_t.
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of &#39;\&#39; in th...
Bool_t expectToken(const TString &expected, Bool_t zapOnError=kFALSE)
Read the next token and return kTRUE if it is identical to the given &#39;expected&#39; token.
Bool_t readString(TString &value, Bool_t zapOnError=kFALSE)
Read a string token.
const Bool_t kFALSE
Definition: RtypesCore.h:88
#define ClassImp(name)
Definition: Rtypes.h:359
double Double_t
Definition: RtypesCore.h:55
Bool_t convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
std::istream * _is
#define oocoutW(o, a)
Definition: RooMsgService.h:46
Mother of all ROOT objects.
Definition: TObject.h:37
virtual ~RooStreamParser()
Destructor.
Bool_t readInteger(Int_t &value, Bool_t zapOnError=kFALSE)
Read a token and convert it to an Int_t.
Definition: first.py:1
Bool_t atEOL()
If true, parser is at end of line in stream.
const Bool_t kTRUE
Definition: RtypesCore.h:87