Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooStreamParser.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17//////////////////////////////////////////////////////////////////////////////
18//
19// RooStreamParser is a utility class to parse istreams into tokens and optionally
20// convert them into basic types (double,int,string)
21//
22// The general tokenizing philosophy is that there are two kinds of tokens: value
23// and punctuation. The former are variable length, the latter always
24// one character. A token is terminated if one of the following conditions
25// occur
26// - space character found (' ',tab,newline)
27// - change of token type (value -> punctuation or vv)
28// - end of fixed-length token (punctuation only)
29// - start or end of quoted string
30//
31// The parser is aware of floating point notation and will assign leading
32// minus signs, decimal points etc to a value token when this is obvious
33// from the context. The definition of what is punctuation can be redefined.
34//
35
36
37#include "Riostream.h"
38#include <cstdlib>
39
40#ifndef _WIN32
41#include <strings.h>
42#endif
43
44#include "RooStreamParser.h"
45#include "RooMsgService.h"
46#include "RooNumber.h"
47
48
49using std::istream, std::endl;
50
52
53
54////////////////////////////////////////////////////////////////////////////////
55/// Construct parser on given input stream
56
58 _is(&is), _atEOL(false), _atEOF(false), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
59{
60}
61
62
63////////////////////////////////////////////////////////////////////////////////
64/// Construct parser on given input stream. Use given errorPrefix to
65/// prefix any parsing error messages
66
67RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) :
68 _is(&is), _atEOL(false), _atEOF(false), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
69{
70}
71
72////////////////////////////////////////////////////////////////////////////////
73/// If true, parser is at end of line in stream
74
76{
77 Int_t nc(_is->peek()) ;
78 return (nc=='\n'||nc==-1) ;
79}
80
81
82
83////////////////////////////////////////////////////////////////////////////////
84/// Change list of characters interpreted as punctuation
85
87{
88 _punct = punct ;
89}
90
91
92
93////////////////////////////////////////////////////////////////////////////////
94/// Check if given char is considered punctuation
95
97{
98 const char* punct = _punct.Data() ;
99 for (int i = 0; i < _punct.Length(); i++) {
100 if (punct[i] == c) {
101 return true;
102 }
103 }
104 return false ;
105}
106
107
108
109////////////////////////////////////////////////////////////////////////////////
110/// Read one token separated by any of the know punctuation characters
111/// This function recognizes and handles comment lines in the istream (those
112/// starting with '#', quoted strings ("") the content of which is not tokenized
113/// and '+-.' characters that are part of a floating point numbers and are exempt
114/// from being interpreted as a token separator in case '+-.' are defined as
115/// token separators.
116
118{
119 // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
120 bool first(true);
121 bool quotedString(false);
122 bool lineCont(false);
123 char buffer[64000];
124 char c(0);
125 char cnext = '\0';
126 char cprev = ' ';
127 bool haveINF(false) ;
128 Int_t bufptr(0) ;
129
130 // Check for end of file
131 if (_is->eof() || _is->fail()) {
132 _atEOF = true ;
133 return TString("") ;
134 }
135
136 //Ignore leading newline
137 if (_is->peek()=='\n') {
138 _is->get(c) ;
139
140 // If new line starts with #, zap it
141 while (_is->peek()=='#') {
142 zapToEnd(false) ;
143 _is->get(c) ; // absorb newline
144 }
145 }
146
147 while(true) {
148 // Buffer overflow protection
149 if (bufptr >= 63999) {
150 oocoutW(nullptr, InputArguments)
151 << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl;
152 break;
153 }
154
155 // Read next char
156 _is->get(c) ;
157
158
159
160 // Terminate at EOF, EOL or trouble
161 if (_is->eof() || _is->fail() || c=='\n') break ;
162
163 // Terminate as SPACE, unless we haven't seen any non-SPACE yet
164 if (isspace(c)) {
165 if (first) {
166 continue;
167 } else if (!quotedString) {
168 break;
169 }
170 }
171
172 // If '-' or '/' see what the next character is
173 if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
174 _is->get(cnext) ;
175
176
177 if (cnext=='I' || cnext=='i') {
178 char tmp1;
179 char tmp2;
180 _is->get(tmp1);
181 _is->get(tmp2);
182 _is->putback(tmp2);
183 _is->putback(tmp1);
184 haveINF = ((cnext == 'I' && tmp1 == 'N' && tmp2 == 'F') || (cnext == 'i' && tmp1 == 'n' && tmp2 == 'f'));
185 } else {
186 haveINF = false ;
187 }
188
189 _is->putback(cnext) ;
190 }
191
192
193 // Check for line continuation marker
194 if (c=='\\' && cnext=='\\') {
195 // Kill rest of line including endline marker
196 zapToEnd(false) ;
197 _is->get(c) ;
198 lineCont=true ;
199 break ;
200 }
201
202 // Stop if begin of comments is encountered
203 if (c=='/' && cnext=='/') {
204 zapToEnd(false) ;
205 break ;
206 }
207
208 // Special handling of quoted strings
209 if (c=='"') {
210 if (first) {
211 quotedString=true ;
212 } else if (!quotedString) {
213 // Terminate current token. Next token will be quoted string
214 _is->putback('"') ;
215 break ;
216 }
217 }
218
219 if (!quotedString) {
220 // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceding INF)
221 if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev)))
222 && !((c=='-'||c=='+') && isdigit(cnext) && (cprev == 'e' || cprev == 'E'))
223 && (!first || !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||haveINF)))) {
224
225 if (first) {
226 // Make this a one-char punctuation token
227 buffer[bufptr++]=c ;
228 break ;
229 } else {
230 // Put back punct. char and terminate current alphanum token
231 _is->putback(c) ;
232 break ;
233 }
234 }
235 } else {
236 // Inside quoted string conventional tokenizing rules do not apply
237
238 // Terminate token on closing quote
239 if (c=='"' && !first) {
240 buffer[bufptr++]=c ;
241 quotedString=false ;
242 break ;
243 }
244 }
245
246 // Store in buffer
247 buffer[bufptr++]=c ;
248 first=false ;
249 cprev=c ;
250 }
251
252 if (_is->eof() || _is->bad()) {
253 _atEOF = true ;
254 }
255
256 // Check if closing quote was encountered
257 if (quotedString) {
258 oocoutW(nullptr,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
259 }
260
261 // Absorb trailing white space or absorb rest of line if // is encountered
262 if (c=='\n') {
263 if (!lineCont) {
264 _is->putback(c) ;
265 }
266 } else {
267 c = _is->peek() ;
268
269 while ((isspace(c) || c=='/') && c != '\n') {
270 if (c=='/') {
271 _is->get(c) ;
272 if (_is->peek()=='/') {
273 zapToEnd(false) ;
274 } else {
275 _is->putback('/') ;
276 }
277 break ;
278 } else {
279 _is->get(c) ;
280 c = _is->peek() ;
281 }
282 }
283 }
284
285 // If no token was read line is continued, return first token on next line
286 if (bufptr==0 && lineCont) {
287 return readToken() ;
288 }
289
290 // Zero terminate buffer and convert to TString
291 buffer[bufptr]=0 ;
292 return TString(buffer) ;
293}
294
295
296
297////////////////////////////////////////////////////////////////////////////////
298/// Read an entire line from the stream and return as TString
299/// This method recognizes the use of '\\' in the istream
300/// as line continuation token.
301
303{
304 char c;
305 char buffer[64000];
306 Int_t nfree(63999);
307
308 if (_is->peek() == '\n')
309 _is->get(c);
310
311 // Read till end of line
312 _is->getline(buffer, nfree, '\n');
313
314 // Look for eventual continuation line sequence
315 char *pcontseq = strstr(buffer, "\\\\");
316 if (pcontseq)
317 nfree -= (pcontseq - buffer);
318 while (pcontseq) {
319 _is->getline(pcontseq, nfree, '\n');
320
321 char *nextpcontseq = strstr(pcontseq, "\\\\");
322 if (nextpcontseq)
323 nfree -= (nextpcontseq - pcontseq);
324 pcontseq = nextpcontseq;
325 }
326
327 // Chop eventual comments
328 char *pcomment = strstr(buffer, "//");
329 if (pcomment)
330 *pcomment = 0;
331
332 // Chop leading and trailing space
333 char *pstart = buffer;
334 while (isspace(*pstart)) {
335 pstart++;
336 }
337 char *pend = buffer + strlen(buffer) - 1;
338 if (pend > pstart) {
339 while (isspace(*pend)) {
340 *pend-- = 0;
341 }
342 }
343
344 if (_is->eof() || _is->fail()) {
345 _atEOF = true;
346 }
347
348 // Convert to TString
349 return TString(pstart);
350}
351
352
353
354////////////////////////////////////////////////////////////////////////////////
355/// Eat all characters up to and including then end of the
356/// current line. If inclContLines is true, all continuation lines
357/// marked by the '\\' token are zapped as well
358
359void RooStreamParser::zapToEnd(bool inclContLines)
360{
361 // Skip over everything until the end of the current line
362 if (_is->peek()!='\n') {
363
364 char buffer[64000];
365 Int_t nfree(63999);
366
367 // Read till end of line
368 _is->getline(buffer, nfree, '\n');
369
370 if (inclContLines) {
371 // Look for eventual continuation line sequence
372 char *pcontseq = strstr(buffer, "\\\\");
373 if (pcontseq)
374 nfree -= (pcontseq - buffer);
375 while (pcontseq) {
376 _is->getline(pcontseq, nfree, '\n');
377
378 char *nextpcontseq = strstr(pcontseq, "\\\\");
379 if (nextpcontseq)
380 nfree -= (nextpcontseq - pcontseq);
381 pcontseq = nextpcontseq;
382 }
383 }
384
385 // Put back newline character in stream buffer
386 _is->putback('\n') ;
387 }
388}
389
390
391
392////////////////////////////////////////////////////////////////////////////////
393/// Read the next token and return true if it is identical to the given 'expected' token.
394
395bool RooStreamParser::expectToken(const TString& expected, bool zapOnError)
396{
397 TString token(readToken()) ;
398
399 bool error=token.CompareTo(expected) ;
400 if (error && !_prefix.IsNull()) {
401 oocoutW(nullptr,InputArguments) << _prefix << ": parse error, expected '"
402 << expected << "'" << ", got '" << token << "'" << endl ;
403 if (zapOnError) zapToEnd(true) ;
404 }
405 return error ;
406}
407
408
409
410////////////////////////////////////////////////////////////////////////////////
411/// Read the next token and convert it to a double. Returns true
412/// if an error occurred in reading or conversion
413
414bool RooStreamParser::readDouble(double& value, bool /*zapOnError*/)
415{
416 TString token(readToken()) ;
417 if (token.IsNull()) return true ;
418 return convertToDouble(token,value) ;
419
420}
421
422
423
424////////////////////////////////////////////////////////////////////////////////
425/// Convert given string to a double. Return true if the conversion fails.
426
428{
429 char *endptr = nullptr;
430 const char *data = token.Data();
431
432 // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
433 if (!strcasecmp(data, "inf") || !strcasecmp(data + 1, "inf")) {
434 value = (data[0] == '-') ? -RooNumber::infinity() : RooNumber::infinity();
435 return false;
436 }
437
438 value = strtod(data, &endptr);
439 bool error = (endptr - data != token.Length());
440
441 if (error && !_prefix.IsNull()) {
442 oocoutE(nullptr, InputArguments) << _prefix << ": parse error, cannot convert '" << token << "'"
443 << " to double precision" << endl;
444 }
445 return error;
446}
447
448
449
450////////////////////////////////////////////////////////////////////////////////
451/// Read a token and convert it to an Int_t. Returns true
452/// if an error occurred in reading or conversion
453
454bool RooStreamParser::readInteger(Int_t& value, bool /*zapOnError*/)
455{
456 TString token(readToken()) ;
457 if (token.IsNull()) return true ;
458 return convertToInteger(token,value) ;
459}
460
461
462
463////////////////////////////////////////////////////////////////////////////////
464/// Convert given string to an Int_t. Returns true if an error
465/// occurred in conversion
466
468{
469 char* endptr = nullptr;
470 const char* data=token.Data() ;
471 value = strtol(data,&endptr,10) ;
472 bool error = (endptr-data!=token.Length()) ;
473
474 if (error && !_prefix.IsNull()) {
475 oocoutE(nullptr,InputArguments)<< _prefix << ": parse error, cannot convert '"
476 << token << "'" << " to integer" << endl ;
477 }
478 return error ;
479}
480
481
482
483////////////////////////////////////////////////////////////////////////////////
484/// Read a string token. Returns true if an error occurred in reading
485/// or conversion. If a the read token is enclosed in quotation
486/// marks those are stripped in the returned value
487
488bool RooStreamParser::readString(TString& value, bool /*zapOnError*/)
489{
490 TString token(readToken()) ;
491 if (token.IsNull()) return true ;
492 return convertToString(token,value) ;
493}
494
495
496
497////////////////////////////////////////////////////////////////////////////////
498/// Convert given token to a string (i.e. remove eventual quotation marks)
499
501{
502 // Transport to buffer
503 char buffer[64000];
504 char *ptr;
505 strncpy(buffer, token.Data(), 63999);
506 if (token.Length() >= 63999) {
507 oocoutW(nullptr, InputArguments) << "RooStreamParser::convertToString: token length exceeds 63999, truncated"
508 << endl;
509 buffer[63999] = 0;
510 }
511 int len = strlen(buffer) ;
512
513 // Remove trailing quote if any
514 if ((len) && (buffer[len-1]=='"'))
515 buffer[len-1]=0 ;
516
517 // Skip leading quote, if present
518 ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
519
520 string = ptr ;
521 return false ;
522}
#define c(i)
Definition RSha256.hxx:101
#define oocoutW(o, a)
#define oocoutE(o, a)
#define ClassImp(name)
Definition Rtypes.h:382
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
static constexpr double infinity()
Return internal infinity representation.
Definition RooNumber.h:25
bool convertToInteger(const TString &token, Int_t &value)
Convert given string to an Int_t.
bool readString(TString &value, bool zapOnError=false)
Read a string token.
void setPunctuation(const TString &punct)
Change list of characters interpreted as punctuation.
bool isPunctChar(char c) const
Check if given char is considered punctuation.
bool expectToken(const TString &expected, bool zapOnError=false)
Read the next token and return true if it is identical to the given 'expected' token.
bool convertToDouble(const TString &token, double &value)
Convert given string to a double. Return true if the conversion fails.
bool atEOL()
If true, parser is at end of line in stream.
std::istream * _is
TString readLine()
Read an entire line from the stream and return as TString This method recognizes the use of '\' in th...
bool readDouble(double &value, bool zapOnError=false)
Read the next token and convert it to a double.
TString readToken()
Read one token separated by any of the know punctuation characters This function recognizes and handl...
bool readInteger(Int_t &value, bool zapOnError=false)
Read a token and convert it to an Int_t.
void zapToEnd(bool inclContLines=false)
Eat all characters up to and including then end of the current line.
bool convertToString(const TString &token, TString &string)
Convert given token to a string (i.e. remove eventual quotation marks)
RooStreamParser(std::istream &is)
Construct parser on given input stream.
Basic string class.
Definition TString.h:139
Ssiz_t Length() const
Definition TString.h:417
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
Definition TString.cxx:457
const char * Data() const
Definition TString.h:376
Bool_t IsNull() const
Definition TString.h:414