Logo ROOT  
Reference Guide
RooStreamParser.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17//////////////////////////////////////////////////////////////////////////////
18//
19// RooStreamParser is a utility class to parse istreams into tokens and optionally
20// convert them into basic types (double,int,string)
21//
22// The general tokenizing philosophy is that there are two kinds of tokens: value
23// and punctuation. The former are variable length, the latter always
24// one character. A token is terminated if one of the following conditions
25// occur
26// - space character found (' ',tab,newline)
27// - change of token type (value -> punctuation or vv)
28// - end of fixed-length token (punctuation only)
29// - start or end of quoted string
30//
31// The parser is aware of floating point notation and will assign leading
32// minus signs, decimal points etc to a value token when this is obvious
33// from the context. The definition of what is punctuation can be redefined.
34//
35
36
37#include "RooFit.h"
38
39#include "Riostream.h"
40#include "Riostream.h"
41#include <stdlib.h>
42
43#ifndef _WIN32
44#include <strings.h>
45#endif
46
47#include "RooStreamParser.h"
48#include "RooMsgService.h"
49#include "RooNumber.h"
50
51
52using namespace std;
53
55
56
57////////////////////////////////////////////////////////////////////////////////
58/// Construct parser on given input stream
59
61 _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
62{
63}
64
65
66////////////////////////////////////////////////////////////////////////////////
67/// Construct parser on given input stream. Use given errorPrefix to
68/// prefix any parsing error messages
69
70RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
71 _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
72{
73}
74
75
76
77////////////////////////////////////////////////////////////////////////////////
78/// Destructor
79
81{
82}
83
84
85
86////////////////////////////////////////////////////////////////////////////////
87/// If true, parser is at end of line in stream
88
90{
91 Int_t nc(_is->peek()) ;
92 return (nc=='\n'||nc==-1) ;
93}
94
95
96
97////////////////////////////////////////////////////////////////////////////////
98/// Change list of characters interpreted as punctuation
99
101{
102 _punct = punct ;
103}
104
105
106
107////////////////////////////////////////////////////////////////////////////////
108/// Check if given char is considered punctuation
109
111{
112 const char* punct = _punct.Data() ;
113 for (int i=0 ; i<_punct.Length() ; i++)
114 if (punct[i] == c) {
115 return kTRUE ;
116 }
117 return kFALSE ;
118}
119
120
121
122////////////////////////////////////////////////////////////////////////////////
123/// Read one token separated by any of the know punctuation characters
124/// This function recognizes and handles comment lines in the istream (those
125/// starting with '#', quoted strings ("") the content of which is not tokenized
126/// and '+-.' characters that are part of a floating point numbers and are exempt
127/// from being interpreted as a token separator in case '+-.' are defined as
128/// token separators.
129
131{
132 // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
133 Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
134 char buffer[64000], c(0), cnext = '\0', cprev = ' ';
135 Bool_t haveINF(kFALSE) ;
136 Int_t bufptr(0) ;
137
138 // Check for end of file
139 if (_is->eof() || _is->fail()) {
140 _atEOF = kTRUE ;
141 return TString("") ;
142 }
143
144 //Ignore leading newline
145 if (_is->peek()=='\n') {
146 _is->get(c) ;
147
148 // If new line starts with #, zap it
149 while (_is->peek()=='#') {
151 _is->get(c) ; // absorb newline
152 }
153 }
154
155 while(1) {
156 // Buffer overflow protection
157 if (bufptr >= 63999) {
159 << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl;
160 break;
161 }
162
163 // Read next char
164 _is->get(c) ;
165
166
167
168 // Terminate at EOF, EOL or trouble
169 if (_is->eof() || _is->fail() || c=='\n') break ;
170
171 // Terminate as SPACE, unless we haven't seen any non-SPACE yet
172 if (isspace(c)) {
173 if (first)
174 continue ;
175 else
176 if (!quotedString) {
177 break ;
178 }
179 }
180
181 // If '-' or '/' see what the next character is
182 if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
183 _is->get(cnext) ;
184
185
186 if (cnext=='I' || cnext=='i') {
187 char tmp1,tmp2 ;
188 _is->get(tmp1) ;
189 _is->get(tmp2) ;
190 _is->putback(tmp2) ;
191 _is->putback(tmp1) ;
192 haveINF = ((cnext=='I' && tmp1 == 'N' && tmp2 == 'F') || (cnext=='i' && tmp1 == 'n' && tmp2 == 'f')) ;
193 } else {
194 haveINF = kFALSE ;
195 }
196
197 _is->putback(cnext) ;
198 }
199
200
201 // Check for line continuation marker
202 if (c=='\\' && cnext=='\\') {
203 // Kill rest of line including endline marker
205 _is->get(c) ;
206 lineCont=kTRUE ;
207 break ;
208 }
209
210 // Stop if begin of comments is encountered
211 if (c=='/' && cnext=='/') {
213 break ;
214 }
215
216 // Special handling of quoted strings
217 if (c=='"') {
218 if (first) {
219 quotedString=kTRUE ;
220 } else if (!quotedString) {
221 // Terminate current token. Next token will be quoted string
222 _is->putback('"') ;
223 break ;
224 }
225 }
226
227 if (!quotedString) {
228 // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
229 if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
230 && !((c=='-'||c=='+') && isdigit(cnext) && cprev=='e')
231 && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
232
233 if (first) {
234 // Make this a one-char punctuation token
235 buffer[bufptr++]=c ;
236 break ;
237 } else {
238 // Put back punct. char and terminate current alphanum token
239 _is->putback(c) ;
240 break ;
241 }
242 }
243 } else {
244 // Inside quoted string conventional tokenizing rules do not apply
245
246 // Terminate token on closing quote
247 if (c=='"' && !first) {
248 buffer[bufptr++]=c ;
249 quotedString=kFALSE ;
250 break ;
251 }
252 }
253
254 // Store in buffer
255 buffer[bufptr++]=c ;
256 first=kFALSE ;
257 cprev=c ;
258 }
259
260 if (_is->eof() || _is->bad()) {
261 _atEOF = kTRUE ;
262 }
263
264 // Check if closing quote was encountered
265 if (quotedString) {
266 oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
267 }
268
269 // Absorb trailing white space or absorb rest of line if // is encountered
270 if (c=='\n') {
271 if (!lineCont) {
272 _is->putback(c) ;
273 }
274 } else {
275 c = _is->peek() ;
276
277 while ((isspace(c) || c=='/') && c != '\n') {
278 if (c=='/') {
279 _is->get(c) ;
280 if (_is->peek()=='/') {
282 } else {
283 _is->putback('/') ;
284 }
285 break ;
286 } else {
287 _is->get(c) ;
288 c = _is->peek() ;
289 }
290 }
291 }
292
293 // If no token was read line is continued, return first token on next line
294 if (bufptr==0 && lineCont) {
295 return readToken() ;
296 }
297
298 // Zero terminate buffer and convert to TString
299 buffer[bufptr]=0 ;
300 return TString(buffer) ;
301}
302
303
304
305////////////////////////////////////////////////////////////////////////////////
306/// Read an entire line from the stream and return as TString
307/// This method recognizes the use of '\\' in the istream
308/// as line continuation token.
309
311{
312 char c, buffer[64000];
313 Int_t nfree(63999);
314
315 if (_is->peek() == '\n')
316 _is->get(c);
317
318 // Read till end of line
319 _is->getline(buffer, nfree, '\n');
320
321 // Look for eventual continuation line sequence
322 char *pcontseq = strstr(buffer, "\\\\");
323 if (pcontseq)
324 nfree -= (pcontseq - buffer);
325 while (pcontseq) {
326 _is->getline(pcontseq, nfree, '\n');
327
328 char *nextpcontseq = strstr(pcontseq, "\\\\");
329 if (nextpcontseq)
330 nfree -= (nextpcontseq - pcontseq);
331 pcontseq = nextpcontseq;
332 }
333
334 // Chop eventual comments
335 char *pcomment = strstr(buffer,"//") ;
336 if (pcomment) *pcomment=0 ;
337
338 // Chop leading and trailing space
339 char *pstart=buffer ;
340 while (isspace(*pstart)) {
341 pstart++ ;
342 }
343 char *pend=buffer+strlen(buffer)-1 ;
344 if (pend>pstart)
345 while (isspace(*pend)) { *pend--=0 ; }
346
347 if (_is->eof() || _is->fail()) {
348 _atEOF = kTRUE ;
349 }
350
351 // Convert to TString
352 return TString(pstart) ;
353}
354
355
356
357////////////////////////////////////////////////////////////////////////////////
358/// Eat all characters up to and including then end of the
359/// current line. If inclContLines is kTRUE, all continuation lines
360/// marked by the '\\' token are zapped as well
361
363{
364 // Skip over everything until the end of the current line
365 if (_is->peek()!='\n') {
366
367 char buffer[64000];
368 Int_t nfree(63999);
369
370 // Read till end of line
371 _is->getline(buffer, nfree, '\n');
372
373 if (inclContLines) {
374 // Look for eventual continuation line sequence
375 char *pcontseq = strstr(buffer, "\\\\");
376 if (pcontseq)
377 nfree -= (pcontseq - buffer);
378 while (pcontseq) {
379 _is->getline(pcontseq, nfree, '\n');
380
381 char *nextpcontseq = strstr(pcontseq, "\\\\");
382 if (nextpcontseq)
383 nfree -= (nextpcontseq - pcontseq);
384 pcontseq = nextpcontseq;
385 }
386 }
387
388 // Put back newline character in stream buffer
389 _is->putback('\n') ;
390 }
391}
392
393
394
395////////////////////////////////////////////////////////////////////////////////
396/// Read the next token and return kTRUE if it is identical to the given 'expected' token.
397
399{
400 TString token(readToken()) ;
401
402 Bool_t error=token.CompareTo(expected) ;
403 if (error && !_prefix.IsNull()) {
404 oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '"
405 << expected << "'" << ", got '" << token << "'" << endl ;
406 if (zapOnError) zapToEnd(kTRUE) ;
407 }
408 return error ;
409}
410
411
412
413////////////////////////////////////////////////////////////////////////////////
414/// Read the next token and convert it to a Double_t. Returns true
415/// if an error occurred in reading or conversion
416
418{
419 TString token(readToken()) ;
420 if (token.IsNull()) return kTRUE ;
421 return convertToDouble(token,value) ;
422
423}
424
425
426
427////////////////////////////////////////////////////////////////////////////////
428/// Convert given string to a double. Return true if the conversion fails.
429
431{
432 char* endptr = 0;
433 const char* data=token.Data() ;
434
435 // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
436 if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
437 value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
438 return kFALSE ;
439 }
440
441 value = strtod(data,&endptr) ;
442 Bool_t error = (endptr-data!=token.Length()) ;
443
444 if (error && !_prefix.IsNull()) {
445 oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '"
446 << token << "'" << " to double precision" << endl ;
447 }
448 return error ;
449}
450
451
452
453////////////////////////////////////////////////////////////////////////////////
454/// Read a token and convert it to an Int_t. Returns true
455/// if an error occurred in reading or conversion
456
458{
459 TString token(readToken()) ;
460 if (token.IsNull()) return kTRUE ;
461 return convertToInteger(token,value) ;
462}
463
464
465
466////////////////////////////////////////////////////////////////////////////////
467/// Convert given string to an Int_t. Returns true if an error
468/// occurred in conversion
469
471{
472 char* endptr = 0;
473 const char* data=token.Data() ;
474 value = strtol(data,&endptr,10) ;
475 Bool_t error = (endptr-data!=token.Length()) ;
476
477 if (error && !_prefix.IsNull()) {
478 oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '"
479 << token << "'" << " to integer" << endl ;
480 }
481 return error ;
482}
483
484
485
486////////////////////////////////////////////////////////////////////////////////
487/// Read a string token. Returns true if an error occurred in reading
488/// or conversion. If a the read token is enclosed in quotation
489/// marks those are stripped in the returned value
490
492{
493 TString token(readToken()) ;
494 if (token.IsNull()) return kTRUE ;
495 return convertToString(token,value) ;
496}
497
498
499
500////////////////////////////////////////////////////////////////////////////////
501/// Convert given token to a string (i.e. remove eventual quotation marks)
502
504{
505 // Transport to buffer
506 char buffer[64000], *ptr;
507 strncpy(buffer, token.Data(), 63999);
508 if (token.Length() >= 63999) {
509 oocoutW((TObject *)0, InputArguments) << "RooStreamParser::convertToString: token length exceeds 63999, truncated"
510 << endl;
511 buffer[63999] = 0;
512 }
513 int len = strlen(buffer) ;
514
515 // Remove trailing quote if any
516 if ((len) && (buffer[len-1]=='"'))
517 buffer[len-1]=0 ;
518
519 // Skip leading quote, if present
520 ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
521
522 string = ptr ;
523 return kFALSE ;
524}
#define c(i)
Definition: RSha256.hxx:101
#define oocoutW(o, a)
Definition: RooMsgService.h:48
#define oocoutE(o, a)
Definition: RooMsgService.h:49
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
static Double_t infinity()
Return internal infinity representation.
Definition: RooNumber.cxx:49
Bool_t isPunctChar(char c) const
Check if given char is considered punctuation.
Bool_t atEOL()
If true, parser is at end of line in stream.
Bool_t readInteger(Int_t &value, Bool_t zapOnError=kFALSE)
Read a token and convert it to an Int_t.
Bool_t readDouble(Double_t &value, Bool_t zapOnError=kFALSE)
Read the next token and convert it to a Double_t.
Bool_t convertToDouble(const TString &token, Double_t &value)
Convert given string to a double. Return true if the conversion fails.
Bool_t readString(TString &value, Bool_t zapOnError=kFALSE)
Read a string token.
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
void zapToEnd(Bool_t inclContLines=kFALSE)
Eat all characters up to and including then end of the current line.
Bool_t expectToken(const TString &expected, Bool_t zapOnError=kFALSE)
Read the next token and return kTRUE if it is identical to the given 'expected' token.
virtual ~RooStreamParser()
Destructor.
std::istream * _is
Bool_t convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of '\' in th...
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
Bool_t convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition: TString.cxx:418
const char * Data() const
Definition: TString.h:364
Bool_t IsNull() const
Definition: TString.h:402
@ InputArguments
Definition: RooGlobalFunc.h:68
Definition: first.py:1