Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
TRegexp.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Fons Rademakers 04/08/95
3
4/*************************************************************************
5 * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TRegexp
13\ingroup Base
14
15Regular expression class.
16
17~~~ {.cpp}
18 '^' // start-of-line anchor
19 '$' // end-of-line anchor
20 '.' // matches any character
21 '[' // start a character class
22 ']' // end a character class
23 '^' // negates character class if 1st character
24 '*' // Kleene closure (matches 0 or more)
25 '+' // Positive closure (1 or more)
26 '?' // Optional closure (0 or 1)
27~~~
28Note that the '|' operator (union) is not supported, nor are
29parentheses (grouping). Therefore "a|b" does not match "a".
30
31Standard classes like [:alnum:], [:alpha:], etc. are not supported,
32only [a-zA-Z], [^ntf] and so on.
33
34Warning: The preferred way to use regular expressions is via std::regex.
35E.g., Index() functions may return incorrect result.
36*/
37
38#include "TRegexp.h"
39#include "TString.h"
40#include "TError.h"
41#include "ThreadLocalStorage.h"
42
43const unsigned TRegexp::fgMaxpat = 2048;
44
45
47
48////////////////////////////////////////////////////////////////////////////////
49/// Create a regular expression from the input string. If wildcard is
50/// true then the input string will first be interpreted as a wildcard
51/// expression by MakeWildcard(), and the result then interpreted as a
52/// regular expression.
53
55{
56 if (wildcard)
58 else
59 GenPattern(re);
60}
61
62////////////////////////////////////////////////////////////////////////////////
63/// Create a regular expression from a TString.
64
66{
67 GenPattern(re.Data());
68}
69
70////////////////////////////////////////////////////////////////////////////////
71/// Copy ctor.
72
74{
76}
77
78////////////////////////////////////////////////////////////////////////////////
79/// Destructor.
80
82{
83 delete [] fPattern;
84}
85
86////////////////////////////////////////////////////////////////////////////////
87/// Assignment operator.
88
90{
91 if (this != &r) {
92 delete [] fPattern;
94 }
95 return *this;
96}
97
98////////////////////////////////////////////////////////////////////////////////
99/// Assignment operator taking a char* and assigning it to a regexp.
100
102{
103 delete [] fPattern;
104 GenPattern(str);
105 return *this;
106}
107
108////////////////////////////////////////////////////////////////////////////////
109/// Assignment operator taking a TString.
110
112{
113 delete [] fPattern;
114 GenPattern(str.Data());
115 return *this;
116}
117
118////////////////////////////////////////////////////////////////////////////////
119/// Generate the regular expression pattern.
120
121void TRegexp::GenPattern(const char *str)
122{
124 int error = ::Makepat(str, fPattern, fgMaxpat);
125 fStat = (error < 3) ? (EStatVal) error : kToolong;
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// Copy the regular expression pattern.
130
132{
134 memcpy(fPattern, r.fPattern, fgMaxpat * sizeof(Pattern_t));
135 fStat = r.fStat;
136}
137
138////////////////////////////////////////////////////////////////////////////////
139/// This routine transforms a wildcarding regular expression into
140/// a general regular expression used for pattern matching.
141/// When using wildcards the regular expression is assumed to be
142/// preceded by a "^" (BOL) and terminated by a "$" (EOL). Also, all
143/// "*"'s and "?"'s (closures) are assumed to be preceded by a "." (i.e. any
144/// character, except "/"'s) and all .'s are escaped (so *.ps is different
145/// from *.eps). The special treatment of "/" allows the easy matching of
146/// pathnames, e.g. "*.root" will match "aap.root", but not "pipo/aap.root".
147/// +s are escaped as well.
148
149const char *TRegexp::MakeWildcard(const char *re)
150{
151 TTHREAD_TLS_ARRAY(char,fgMaxpat,buf);
152 char *s = buf;
153 if (!re) return "";
154 int len = strlen(re);
155 int slen = 0;
156
157 if (!len) return "";
158
159 for (int i = 0; i < len; i++) {
160 if ((unsigned)slen > fgMaxpat - 10) {
161 Error("MakeWildcard", "regexp too large");
162 break;
163 }
164 if (i == 0 && re[i] != '^') {
165 *s++ = '^';
166 slen++;
167 }
168 if (re[i] == '*') {
169#ifndef R__WIN32
170 //const char *wc = "[a-zA-Z0-9-+_\.,: []<>]";
171 const char *wc = "[^/]";
172#else
173 //const char *wc = "[a-zA-Z0-9-+_., []<>]";
174 const char *wc = "[^\/:]";
175#endif
176 strcpy(s, wc);
177 s += strlen(wc);
178 slen += strlen(wc);
179 }
180 if (re[i] == '.' || re[i] == '+') {
181 *s++ = '\';
182 slen++;
183 }
184 if (re[i] == '?') {
185#ifndef R__WIN32
186 //const char *wc = "[a-zA-Z0-9-+_\.,: []<>]";
187 const char *wc = "[^/]";
188#else
189 //const char *wc = "[a-zA-Z0-9-+_., []<>]";
190 const char *wc = "[^\/:]";
191#endif
192 strcpy(s, wc);
193 s += strlen(wc);
194 slen += strlen(wc);
195 } else {
196 *s++ = re[i];
197 slen++;
198 }
199 if (i == len-1 && re[i] != '$') {
200 *s++ = '$';
201 slen++;
202 }
203 }
204 *s = '\0';
205 return buf;
206}
207
208////////////////////////////////////////////////////////////////////////////////
209/// Find the first occurrence of the regexp in string and return the
210/// position, or -1 if there is no match. Len is length of the matched
211/// string and i is the offset at which the matching should start.
212/// Please, see the Warning in the class documentation above.
213
214Ssiz_t TRegexp::Index(const TString& string, Ssiz_t* len, Ssiz_t i) const
215{
216 if (fStat != kOK)
217 Error("TRegexp::Index", "Bad Regular Expression");
218
219 const char* startp;
220 const char* s = string.Data();
221 Ssiz_t slen = string.Length();
222 if (slen < i) return kNPOS;
223 const char* endp = ::Matchs(s+i, slen-i, fPattern, &startp);
224 if (endp) {
225 *len = endp - startp;
226 return startp - s;
227 } else {
228 *len = 0;
229 return kNPOS;
230 }
231}
232
233////////////////////////////////////////////////////////////////////////////////
234/// Check status of regexp.
235
237{
238 EStatVal temp = fStat;
239 fStat = kOK;
240 return temp;
241}
242
243////////////////////////////////////////////////////////////////////////////////
244// //
245// TString member functions, put here so the linker will include //
246// them only if regular expressions are used. //
247// //
248////////////////////////////////////////////////////////////////////////////////
249
250////////////////////////////////////////////////////////////////////////////////
251/// Find the first occurrence of the regexp in string and return the
252/// position, or -1 if there is no match. Start is the offset at which
253/// the search should start.
254/// Please, see the Warning in the class documentation above.
255
257{
258 Ssiz_t len;
259 return r.Index(*this, &len, start); // len not used
260}
261
262////////////////////////////////////////////////////////////////////////////////
263/// Find the first occurrence of the regexp in string and return the
264/// position, or -1 if there is no match. Extent is length of the matched
265/// string and start is the offset at which the matching should start.
266/// Please, see the Warning in the class documentation above.
267
269{
270 return r.Index(*this, extent, start);
271}
272
273////////////////////////////////////////////////////////////////////////////////
274/// Return the substring found by applying the regexp starting at start.
275/// Please, see the Warning in the class documentation above.
276
278{
279 Ssiz_t len = 0;
280 Ssiz_t begin = Index(r, &len, start);
281 return TSubString(*this, begin, len);
282}
283
284////////////////////////////////////////////////////////////////////////////////
285/// Return the substring found by applying the regexp.
286
288{
289 return (*this)(r,0);
290}
291
292////////////////////////////////////////////////////////////////////////////////
293/// Search for tokens delimited by regular expression 'delim' (default " ")
294/// in this string; search starts at 'from' and the token is returned in 'tok'.
295/// Returns in 'from' the next position after the delimiter.
296/// Returns kTRUE if a token is found, kFALSE if not or if some inconsistency
297/// occurred.
298/// This method allows to loop over tokens in this way:
299/// ~~~ {.cpp}
300/// TString myl = "tok1 tok2|tok3";
301/// TString tok;
302/// Ssiz_t from = 0;
303/// while (myl.Tokenize(tok, from, "[ |]")) {
304/// // Analyse tok
305/// ...
306/// }
307/// ~~~
308/// more convenient of the other Tokenize method when saving the tokens is not
309/// needed.
310
311Bool_t TString::Tokenize(TString &tok, Ssiz_t &from, const char *delim) const
312{
313 Bool_t found = kFALSE;
314
315 // Reset the token
316 tok = "";
317
318 // Make sure inputs make sense
319 Int_t len = Length();
320 if (len <= 0 || from > (len - 1) || from < 0)
321 return found;
322
323 // Ensure backward compatibility to allow one or more times the delimiting character
325 if(rdelim.Length() == 1) {
326 rdelim = "[" + rdelim + "]+";
327 }
329
330 // Find delimiter
331 Int_t ext = 0;
332 Int_t pos = Index(rg, &ext, from);
333
334 // Assign to token
335 if (pos == kNPOS || pos > from) {
336 Ssiz_t last = (pos != kNPOS) ? (pos - 1) : len;
337 tok = (*this)(from, last-from+1);
338 }
339 found = kTRUE;
340
341 // Update start-of-search index
342 from = pos + ext;
343 if (pos == kNPOS) {
344 from = pos;
345 if (tok.IsNull()) {
346 // Empty, last token
347 found = kFALSE;
348 }
349 }
350 // Make sure that 'from' has a meaningful value
351 from = (from < len) ? from : len;
352
353 // Done
354 return found;
355}
unsigned short Pattern_t
Definition Match.h:24
int Makepat(const char *, Pattern_t *, int)
Make a pattern template from the string pointed to by exp.
Definition Match.cxx:129
const char * Matchs(const char *, size_t len, const Pattern_t *, const char **)
Match a string with a pattern.
Definition Match.cxx:220
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
constexpr Ssiz_t kNPOS
Definition RtypesCore.h:117
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
#define ClassImp(name)
Definition Rtypes.h:374
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:185
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
Regular expression class.
Definition TRegexp.h:31
Pattern_t * fPattern
Definition TRegexp.h:37
EStatVal Status()
Check status of regexp.
Definition TRegexp.cxx:236
EStatVal fStat
Definition TRegexp.h:38
void CopyPattern(const TRegexp &re)
Copy the regular expression pattern.
Definition TRegexp.cxx:131
static const unsigned fgMaxpat
Definition TRegexp.h:39
virtual ~TRegexp()
Destructor.
Definition TRegexp.cxx:81
TRegexp & operator=(const TRegexp &re)
Assignment operator.
Definition TRegexp.cxx:89
const char * MakeWildcard(const char *re)
This routine transforms a wildcarding regular expression into a general regular expression used for p...
Definition TRegexp.cxx:149
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition TRegexp.cxx:214
@ kToolong
Definition TRegexp.h:34
@ kOK
Definition TRegexp.h:34
TRegexp(const char *re, Bool_t wildcard=kFALSE)
Create a regular expression from the input string.
Definition TRegexp.cxx:54
void GenPattern(const char *re)
Generate the regular expression pattern.
Definition TRegexp.cxx:121
Basic string class.
Definition TString.h:139
Ssiz_t Length() const
Definition TString.h:417
friend class TSubString
Definition TString.h:142
char & operator()(Ssiz_t i)
Definition TString.h:724
static constexpr Ssiz_t kNPOS
Definition TString.h:278
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Definition TString.cxx:2264
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:651
A zero length substring is legal.
Definition TString.h:85