Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TPRegexp.cxx
Go to the documentation of this file.
1// @(#)root/base:$Id$
2// Author: Eddy Offermann 24/06/05
3
4/*************************************************************************
5 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/* \class TPRegexp
13\ingroup Base
14
15C++ Wrapper for the "Perl Compatible Regular Expressions" library
16 The PCRE lib can be found at: http://www.pcre.org/
17
18Extensive documentation about Regular expressions in Perl can be
19found at : http://perldoc.perl.org/perlre.html
20*/
21
22#include <iostream>
23#include "TPRegexp.h"
24#include "TObjArray.h"
25#include "TObjString.h"
26#include "TError.h"
27
28#ifdef R__WIN32
29#define PCRE_STATIC
30#endif
31#include <pcre.h>
32
33#include <vector>
34#include <stdexcept>
35
36struct PCREPriv_t {
37 pcre *fPCRE;
38 pcre_extra *fPCREExtra;
39
40 PCREPriv_t() { fPCRE = nullptr; fPCREExtra = nullptr; }
41};
42
43
45
47
48////////////////////////////////////////////////////////////////////////////////
49/// Default ctor.
50
52{
53 fPriv = new PCREPriv_t;
54 fPCREOpts = 0;
55}
56
57////////////////////////////////////////////////////////////////////////////////
58/// Create and initialize with pat.
59
61{
62 fPattern = pat;
63 fPriv = new PCREPriv_t;
64 fPCREOpts = 0;
65}
66
67////////////////////////////////////////////////////////////////////////////////
68/// Copy ctor.
69
71{
73 fPriv = new PCREPriv_t;
75}
76
77////////////////////////////////////////////////////////////////////////////////
78/// Cleanup.
79
81{
82 if (fPriv->fPCRE)
83 pcre_free(fPriv->fPCRE);
84 if (fPriv->fPCREExtra)
85 pcre_free(fPriv->fPCREExtra);
86 delete fPriv;
87}
88
89////////////////////////////////////////////////////////////////////////////////
90/// Assignment operator.
91
93{
94 if (this != &p) {
96 if (fPriv->fPCRE)
97 pcre_free(fPriv->fPCRE);
98 fPriv->fPCRE = nullptr;
99 if (fPriv->fPCREExtra)
100 pcre_free(fPriv->fPCREExtra);
101 fPriv->fPCREExtra = nullptr;
103 }
104 return *this;
105}
106
107////////////////////////////////////////////////////////////////////////////////
108/// Translate Perl modifier flags into pcre flags.
109/// The supported modStr characters are: g, i, m, o, s, x, and the
110/// special d for debug. The meaning of the letters is:
111/// - m
112/// Treat string as multiple lines. That is, change "^" and "$" from
113/// matching the start or end of the string to matching the start or
114/// end of any line anywhere within the string.
115/// - s
116/// Treat string as single line. That is, change "." to match any
117/// character whatsoever, even a newline, which normally it would not match.
118/// Used together, as /ms, they let the "." match any character whatsoever,
119/// while still allowing "^" and "$" to match, respectively, just after and
120/// just before newlines within the string.
121/// - i
122/// Do case-insensitive pattern matching.
123/// - x
124/// Extend your pattern's legibility by permitting whitespace and comments.
125/// - p
126/// Preserve the string matched such that ${^PREMATCH}, ${^MATCH},
127/// and ${^POSTMATCH} are available for use after matching.
128/// - g and c
129/// Global matching, and keep the Current position after failed matching.
130/// Unlike i, m, s and x, these two flags affect the way the regex is used
131/// rather than the regex itself. See Using regular expressions in Perl in
132/// perlretut for further explanation of the g and c modifiers.
133/// For more detail see: http://perldoc.perl.org/perlre.html#Modifiers.
134
136{
137 UInt_t opts = 0;
138
139 if (modStr.Length() <= 0)
140 return fPCREOpts;
141
142 //translate perl flags into pcre flags
143 const char *m = modStr;
144 while (*m) {
145 switch (*m) {
146 case 'g':
147 opts |= kPCRE_GLOBAL;
148 break;
149 case 'i':
150 opts |= PCRE_CASELESS;
151 break;
152 case 'm':
153 opts |= PCRE_MULTILINE;
154 break;
155 case 'o':
156 opts |= kPCRE_OPTIMIZE;
157 break;
158 case 's':
159 opts |= PCRE_DOTALL;
160 break;
161 case 'x':
162 opts |= PCRE_EXTENDED;
163 break;
164 case 'd': // special flag to enable debug printing (not Perl compat.)
165 opts |= kPCRE_DEBUG_MSGS;
166 break;
167 default:
168 Error("ParseMods", "illegal pattern modifier: %c", *m);
169 opts = 0;
170 }
171 ++m;
172 }
173 return opts;
174}
175
176////////////////////////////////////////////////////////////////////////////////
177/// Return PCRE modifier options as string.
178/// For meaning of mods see ParseMods().
179
181{
182 TString ret;
183
184 if (fPCREOpts & kPCRE_GLOBAL) ret += 'g';
185 if (fPCREOpts & PCRE_CASELESS) ret += 'i';
186 if (fPCREOpts & PCRE_MULTILINE) ret += 'm';
187 if (fPCREOpts & PCRE_DOTALL) ret += 's';
188 if (fPCREOpts & PCRE_EXTENDED) ret += 'x';
189 if (fPCREOpts & kPCRE_OPTIMIZE) ret += 'o';
190 if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';
191
192 return ret;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196/// Compile the fPattern.
197
199{
200 if (fPriv->fPCRE)
201 pcre_free(fPriv->fPCRE);
202
204 Info("Compile", "PREGEX compiling %s", fPattern.Data());
205
206 const char *errstr;
207 Int_t patIndex;
208 fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
209 &errstr, &patIndex, nullptr);
210
211 if (!fPriv->fPCRE) {
213 throw std::runtime_error
214 (TString::Format("TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s",
215 fPattern.Data(), patIndex, errstr).Data());
216 } else {
217 Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
218 fPattern.Data(), patIndex, errstr);
219 return;
220 }
221 }
222
224 Optimize();
225}
226
227////////////////////////////////////////////////////////////////////////////////
228/// Send the pattern through the optimizer.
229
231{
232 if (fPriv->fPCREExtra)
233 pcre_free(fPriv->fPCREExtra);
234
236 Info("Optimize", "PREGEX studying %s", fPattern.Data());
237
238 const char *errstr;
239 // pcre_study allows less options - see pcre_internal.h PUBLIC_STUDY_OPTIONS.
240 fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, 0, &errstr);
241
242 if (!fPriv->fPCREExtra && errstr) {
243 Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
244 fPattern.Data(), errstr);
245 }
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// Returns the number of expanded '$' constructs.
250
252 const TString &replacePattern,
253 Int_t *offVec, Int_t nrMatch) const
254{
255 Int_t nrSubs = 0;
256 const char *p = replacePattern;
257
258 Int_t state = 0;
259 Int_t subnum = 0;
260 while (state != -1) {
261 switch (state) {
262 case 0:
263 if (!*p) {
264 state = -1;
265 break;
266 }
267 if (*p == '$') {
268 state = 1;
269 subnum = 0;
270 if (p[1] == '&') {
271 p++;
272 if (isdigit(p[1]))
273 p++;
274 } else if (!isdigit(p[1])) {
275 Error("ReplaceSubs", "badly formed replacement pattern: %s",
276 replacePattern.Data());
277 }
278 } else
279 final += *p;
280 break;
281 case 1:
282 if (isdigit(*p)) {
283 subnum *= 10;
284 subnum += (*p)-'0';
285 } else {
287 Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
288 if (subnum < 0 || subnum > nrMatch-1) {
289 Error("ReplaceSubs","bad string number: %d",subnum);
290 } else {
291 const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
292 final += subStr;
293 nrSubs++;
294 }
295 state = 0;
296 continue; // send char to start state
297 }
298 }
299 p++;
300 }
301 return nrSubs;
302}
303
304////////////////////////////////////////////////////////////////////////////////
305/// Perform the actual matching - protected method.
306
308 Int_t nMaxMatch, TArrayI *pos) const
309{
310 Int_t *offVec = new Int_t[3*nMaxMatch];
311 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
312 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
313 s.Length(), start, 0,
314 offVec, 3*nMaxMatch);
315
316 if (nrMatch == PCRE_ERROR_NOMATCH)
317 nrMatch = 0;
318 else if (nrMatch <= 0) {
319 Error("Match","pcre_exec error = %d", nrMatch);
320 delete [] offVec;
321 return 0;
322 }
323
324 if (pos)
325 pos->Set(2*nrMatch, offVec);
326 delete [] offVec;
327
328 return nrMatch;
329}
330
331////////////////////////////////////////////////////////////////////////////////
332/// The number of matches is returned, this equals the full match +
333/// sub-pattern matches.
334/// nMaxMatch is the maximum allowed number of matches.
335/// pos contains the string indices of the matches. Its usage is
336/// shown in the routine MatchS.
337/// For meaning of mods see ParseMods().
338
339Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
340 Int_t nMaxMatch, TArrayI *pos)
341{
342 UInt_t opts = ParseMods(mods);
343
344 if (!fPriv->fPCRE || opts != fPCREOpts) {
345 fPCREOpts = opts;
346 Compile();
347 }
348
349 return MatchInternal(s, start, nMaxMatch, pos);
350}
351
352
353////////////////////////////////////////////////////////////////////////////////
354/// Returns a TObjArray of matched substrings as TObjString's.
355/// The TObjArray is owner of the objects and must be deleted by the user.
356/// The first entry is the full matched pattern, followed by the sub-patterns.
357/// If a pattern was not matched, it will return an empty substring:
358/// ~~~ {.cpp}
359/// TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc");
360/// for (Int_t i = 0; i < subStrL->GetLast()+1; i++) {
361/// const TString subStr = ((TObjString *)subStrL->At(i))->GetString();
362/// std::cout << "\"" << subStr << "\" ";
363/// }
364/// std::cout << subStr << std::endl;
365/// ~~~
366/// produces: "abc" "a" "" "bc"
367///
368/// For meaning of mods see ParseMods().
369
371 Int_t start, Int_t nMaxMatch)
372{
373 TArrayI pos;
374 Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
375
376 TObjArray *subStrL = new TObjArray();
377 subStrL->SetOwner();
378
379 for (Int_t i = 0; i < nrMatch; i++) {
380 Int_t startp = pos[2*i];
381 Int_t stopp = pos[2*i+1];
382 if (startp >= 0 && stopp >= 0) {
383 const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
384 subStrL->Add(new TObjString(subStr));
385 } else
386 subStrL->Add(new TObjString());
387 }
388
389 return subStrL;
390}
391
392////////////////////////////////////////////////////////////////////////////////
393/// Perform pattern substitution with optional back-ref replacement
394/// - protected method.
395
397 Int_t start, Int_t nMaxMatch,
398 Bool_t doDollarSubst) const
399{
400 Int_t *offVec = new Int_t[3*nMaxMatch];
401
402 TString fin;
403 Int_t nrSubs = 0;
404 Int_t offset = start;
405 Int_t last = 0;
406
407 while (kTRUE) {
408
409 // find next matching subs
410 // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
411 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
412 s.Length(), offset, 0,
413 offVec, 3*nMaxMatch);
414
415 if (nrMatch == PCRE_ERROR_NOMATCH) {
416 break;
417 } else if (nrMatch <= 0) {
418 Error("Substitute", "pcre_exec error = %d", nrMatch);
419 break;
420 }
421
422 // append anything previously unmatched, but not substituted
423 if (last <= offVec[0]) {
424 fin += s(last,offVec[0]-last);
425 last = offVec[1];
426 }
427
428 // replace stuff in s
429 if (doDollarSubst) {
430 ReplaceSubs(s, fin, replacePattern, offVec, nrMatch);
431 } else {
432 fin += replacePattern;
433 }
434 ++nrSubs;
435
436 // if global gotta check match at every pos
437 if (!(fPCREOpts & kPCRE_GLOBAL))
438 break;
439
440 if (offVec[0] != offVec[1]) {
441 offset = offVec[1];
442 } else {
443 // matched empty string
444 if (offVec[1] == s.Length()) break;
445 offset = offVec[1]+1;
446 }
447 }
448
449 delete [] offVec;
450
451 fin += s(last,s.Length()-last);
452 s = fin;
453
454 return nrSubs;
455}
456
457////////////////////////////////////////////////////////////////////////////////
458/// Substitute replaces the string s by a new string in which matching
459/// patterns are replaced by the replacePattern string. The number of
460/// substitutions are returned.
461/// ~~~ {.cpp}
462/// TString s("aap noot mies");
463/// const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1");
464/// std::cout << nrSub << " \"" << s << "\"" <<std::endl;
465/// ~~~
466/// produces: 2 "mies noot aap"
467///
468/// For meaning of mods see ParseMods().
469
470Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
471 const TString &mods, Int_t start, Int_t nMaxMatch)
472{
473 UInt_t opts = ParseMods(mods);
474
475 if (!fPriv->fPCRE || opts != fPCREOpts) {
476 fPCREOpts = opts;
477 Compile();
478 }
479
480 return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
481}
482
483
484////////////////////////////////////////////////////////////////////////////////
485/// Returns true if underlying PCRE structure has been successfully
486/// generated via regexp compilation.
487
489{
490 return fPriv->fPCRE != nullptr;
491}
492
493////////////////////////////////////////////////////////////////////////////////
494/// Get value of static flag controlling whether exception should be thrown upon an
495/// error during regular expression compilation by the PCRE engine.
496
498{
500}
501
502////////////////////////////////////////////////////////////////////////////////
503/// Set static flag controlling whether exception should be thrown upon an
504/// error during regular expression compilation by the PCRE engine.
505
507{
508 fgThrowAtCompileError = throwp;
509}
510
511////////////////////////////////////////////////////////////////////////////////
512// //
513// TString member functions, put here so the linker will include //
514// them only if regular expressions are used. //
515// //
516////////////////////////////////////////////////////////////////////////////////
517
518////////////////////////////////////////////////////////////////////////////////
519/// Find the first occurrence of the regexp in string and return the position.
520/// Start is the offset at which the search should start.
521
523{
524 TArrayI pos;
525 Int_t nrMatch = r.Match(*this,"",start,10,&pos);
526 if (nrMatch > 0)
527 return pos[0];
528 else
529 return -1;
530}
531
532////////////////////////////////////////////////////////////////////////////////
533/// Find the first occurrence of the regexp in string and return the position.
534/// Extent is length of the matched string and start is the offset at which
535/// the matching should start.
536
538{
539 TArrayI pos;
540 const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
541 if (nrMatch > 0) {
542 *extent = pos[1]-pos[0];
543 return pos[0];
544 } else {
545 *extent = 0;
546 return -1;
547 }
548}
549
550////////////////////////////////////////////////////////////////////////////////
551/// Return the substring found by applying the regexp starting at start.
552
554{
555 Ssiz_t len;
556 Ssiz_t begin = Index(r, &len, start);
557 return TSubString(*this, begin, len);
558}
559
560////////////////////////////////////////////////////////////////////////////////
561/// Return the substring found by applying the regexp.
562
564{
565 return (*this)(r, 0);
566}
567
568
569/** \class TPMERegexp
570
571Wrapper for PCRE library (Perl Compatible Regular Expressions).
572Based on PME - PCRE Made Easy by Zachary Hansen.
573
574Supports main Perl operations using regular expressions (Match,
575Substitute and Split). To retrieve the results one can simply use
576operator[] returning a TString.
577
578See $ROOTSYS/tutorials/regexp_pme.C for examples.
579*/
580
582
583////////////////////////////////////////////////////////////////////////////////
584/// Default constructor. This regexp will match an empty string.
585
587 TPRegexp(),
588 fNMaxMatches(10),
589 fNMatches(0),
590 fAddressOfLastString(nullptr),
591 fLastGlobalPosition(0)
592{
593 Compile();
594}
595
596////////////////////////////////////////////////////////////////////////////////
597/// Constructor.
598///
599/// \param[in] s string to compile into regular expression
600/// \param[in] opts perl-style character flags to be set on TPME object
601/// \param[in] nMatchMax maximum number of matches
602
603TPMERegexp::TPMERegexp(const TString& s, const TString& opts, Int_t nMatchMax) :
604 TPRegexp(s),
605 fNMaxMatches(nMatchMax),
606 fNMatches(0),
607 fAddressOfLastString(nullptr),
608 fLastGlobalPosition(0)
609{
610 fPCREOpts = ParseMods(opts);
611 Compile();
612}
613
614////////////////////////////////////////////////////////////////////////////////
615/// Constructor.
616///
617/// \param[in] s string to compile into regular expression
618/// \param[in] opts PCRE-style option flags to be set on TPME object
619/// \param[in] nMatchMax maximum number of matches
620
621TPMERegexp::TPMERegexp(const TString& s, UInt_t opts, Int_t nMatchMax) :
622 TPRegexp(s),
623 fNMaxMatches(nMatchMax),
624 fNMatches(0),
625 fAddressOfLastString(nullptr),
626 fLastGlobalPosition(0)
627{
628 fPCREOpts = opts;
629 Compile();
630}
631
632////////////////////////////////////////////////////////////////////////////////
633/// Copy constructor.
634/// Only PCRE specifics are copied, not last-match or global-match
635/// information.
636
638 TPRegexp(r),
639 fNMaxMatches(r.fNMaxMatches),
640 fNMatches(0),
641 fAddressOfLastString(nullptr),
642 fLastGlobalPosition(0)
643{
644 Compile();
645}
646
647////////////////////////////////////////////////////////////////////////////////
648/// Reset the pattern and options.
649/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
650
651void TPMERegexp::Reset(const TString& s, const TString& opts, Int_t nMatchMax)
652{
653 Reset(s, ParseMods(opts), nMatchMax);
654}
655
656////////////////////////////////////////////////////////////////////////////////
657/// Reset the pattern and options.
658/// If 'nMatchMax' other than -1 (the default) is passed, it is also set.
659
660void TPMERegexp::Reset(const TString& s, UInt_t opts, Int_t nMatchMax)
661{
662 fPattern = s;
663 fPCREOpts = opts;
664 Compile();
665
666 if (nMatchMax != -1)
667 fNMatches = nMatchMax;
668 fNMatches = 0;
670}
671
672////////////////////////////////////////////////////////////////////////////////
673/// Copy global-match state from 're; so that this regexp can continue
674/// parsing the string from where 're' left off.
675///
676/// Alternatively, GetGlobalPosition() get be used to retrieve the
677/// last match position so that it can passed to Match().
678///
679/// Ideally, as it is done in PERL, the last match position would be
680/// stored in the TString itself.
681
683{
686}
687
688////////////////////////////////////////////////////////////////////////////////
689/// Reset state of global match.
690/// This happens automatically when a new string is passed for matching.
691/// But be carefull, as the address of last TString object is used
692/// to make this decision.
693
695{
697}
698
699////////////////////////////////////////////////////////////////////////////////
700/// Runs a match on s against the regex 'this' was created with.
701///
702/// \param[in] s string to match against
703/// \param[in] start offset at which to start matching
704/// \return number of matches found
705
707{
708 // If we got a new string, reset the global position counter.
709 if (fAddressOfLastString != (void*) &s) {
711 }
712
713 if (fPCREOpts & kPCRE_GLOBAL) {
714 start += fLastGlobalPosition;
715 }
716
717 //fprintf(stderr, "string: '%s' length: %d offset: %d\n", s.Data(), s.length(), offset);
719
720 //fprintf(stderr, "MatchInternal_exec result = %d\n", fNMatches);
721
723 fAddressOfLastString = (void*) &s;
724
725 if (fPCREOpts & kPCRE_GLOBAL) {
726 if (fNMatches == PCRE_ERROR_NOMATCH) {
727 // fprintf(stderr, "TPME RESETTING: reset for no match\n");
728 fLastGlobalPosition = 0; // reset the position for next match (perl does this)
729 } else if (fNMatches > 0) {
730 // fprintf(stderr, "TPME RESETTING: setting to %d\n", marks[0].second);
731 fLastGlobalPosition = fMarkers[1]; // set to the end of the match
732 } else {
733 // fprintf(stderr, "TPME RESETTING: reset for no unknown\n");
735 }
736 }
737
738 return fNMatches;
739}
740
741////////////////////////////////////////////////////////////////////////////////
742/// Splits into at most maxfields. If maxfields is unspecified or
743/// 0, trailing empty matches are discarded. If maxfields is
744/// positive, no more than maxfields fields will be returned and
745/// trailing empty matches are preserved. If maxfields is empty,
746/// all fields (including trailing empty ones) are returned. This
747/// *should* be the same as the perl behaviour.
748///
749/// If pattern produces sub-matches, these are also stored in
750/// the result.
751///
752/// A pattern matching the null string will split the value of EXPR
753/// into separate characters at each point it matches that way.
754///
755/// \param[in] s string to split
756/// \param[in] maxfields maximum number of fields to be split out. 0 means
757/// split all fields, but discard any trailing empty bits.
758/// Negative means split all fields and keep trailing empty bits.
759/// Positive means keep up to N fields including any empty fields
760/// less than N. Anything remaining is in the last field.
761/// \return number of fields found
762
764{
765 typedef std::pair<int, int> MarkerLoc_t;
766 typedef std::vector<MarkerLoc_t> MarkerLocVec_t;
767
768 // stores the marks for the split
769 MarkerLocVec_t oMarks;
770
771 // this is a list of current trailing empty matches if maxfields is
772 // unspecified or 0. If there is stuff in it and a non-empty match
773 // is found, then everything in here is pushed into oMarks and then
774 // the new match is pushed on. If the end of the string is reached
775 // and there are empty matches in here, they are discarded.
776 MarkerLocVec_t oCurrentTrailingEmpties;
777
778 Int_t nOffset = 0;
779 Int_t nMatchesFound = 0;
780
781 // while we are still finding matches and maxfields is 0 or negative
782 // (meaning we get all matches), or we haven't gotten to the number
783 // of specified matches
784 Int_t matchRes;
785 while ((matchRes = Match(s, nOffset)) &&
786 ((maxfields < 1) || nMatchesFound < maxfields)) {
787 ++nMatchesFound;
788
789 if (fMarkers[1] - fMarkers[0] == 0) {
790 oMarks.push_back(MarkerLoc_t(nOffset, nOffset + 1));
791 ++nOffset;
792 if (nOffset >= s.Length())
793 break;
794 else
795 continue;
796 }
797
798 // match can be empty
799 if (nOffset != fMarkers[0]) {
800 if (!oCurrentTrailingEmpties.empty()) {
801 oMarks.insert(oMarks.end(),
802 oCurrentTrailingEmpties.begin(),
803 oCurrentTrailingEmpties.end());
804 oCurrentTrailingEmpties.clear();
805 }
806 oMarks.push_back(MarkerLoc_t(nOffset, fMarkers[0]));
807 } else {
808 // empty match
809 if (maxfields == 0) {
810 // store for possible later inclusion
811 oCurrentTrailingEmpties.push_back(MarkerLoc_t(nOffset, nOffset));
812 } else {
813 oMarks.push_back(MarkerLoc_t(nOffset, nOffset));
814 }
815 }
816
817 nOffset = fMarkers[1];
818
819 if (matchRes > 1) {
820 for (Int_t i = 1; i < matchRes; ++i)
821 oMarks.push_back(MarkerLoc_t(fMarkers[2*i], fMarkers[2*i + 1]));
822 }
823 }
824
825
826 // if there were no matches found, push the whole thing on
827 if (nMatchesFound == 0) {
828 oMarks.push_back(MarkerLoc_t(0, s.Length()));
829 }
830 // if we ran out of matches, then append the rest of the string
831 // onto the end of the last split field
832 else if (maxfields > 0 && nMatchesFound >= maxfields) {
833 oMarks[oMarks.size() - 1].second = s.Length();
834 }
835 // else we have to add another entry for the end of the string
836 else {
837 Bool_t last_empty = (nOffset == s.Length());
838 if (!last_empty || maxfields < 0) {
839 if (!oCurrentTrailingEmpties.empty()) {
840 oMarks.insert(oMarks.end(),
841 oCurrentTrailingEmpties.begin(),
842 oCurrentTrailingEmpties.end());
843 }
844 oMarks.push_back(MarkerLoc_t(nOffset, s.Length()));
845 }
846 }
847
848 fNMatches = oMarks.size();
850 for (Int_t i = 0; i < fNMatches; ++i) {
851 fMarkers[2*i] = oMarks[i].first;
852 fMarkers[2*i + 1] = oMarks[i].second;
853 }
854
855 // fprintf(stderr, "match returning %d\n", fNMatches);
856 return fNMatches;
857}
858
859////////////////////////////////////////////////////////////////////////////////
860/// Substitute matching part of s with r, dollar back-ref
861/// substitution is performed if doDollarSubst is true (default).
862/// Returns the number of substitutions made.
863///
864/// After the substitution, another pass is made over the resulting
865/// string and the following special tokens are interpreted:
866/// - `\l` lowercase next char,
867/// - `\u` uppercase next char,
868/// - `\L` lowercase till `\E`,
869/// - `\U` uppercase till `\E`, and
870/// - `\E` end case modification.
871
873{
874 Int_t cnt = SubstituteInternal(s, r, 0, fNMaxMatches, doDollarSubst);
875
876 TString ret;
877 Int_t state = 0;
878 Ssiz_t pos = 0, len = s.Length();
879 const Char_t *data = s.Data();
880 while (pos < len) {
881 Char_t c = data[pos];
882 if (c == '\\') {
883 c = data[pos+1]; // Rely on string-data being null-terminated.
884 switch (c) {
885 case 0 : ret += '\\'; break;
886 case 'l': state = 1; break;
887 case 'u': state = 2; break;
888 case 'L': state = 3; break;
889 case 'U': state = 4; break;
890 case 'E': state = 0; break;
891 default : ret += '\\'; ret += c; break;
892 }
893 pos += 2;
894 } else {
895 switch (state) {
896 case 0: ret += c; break;
897 case 1: ret += (Char_t) tolower(c); state = 0; break;
898 case 2: ret += (Char_t) toupper(c); state = 0; break;
899 case 3: ret += (Char_t) tolower(c); break;
900 case 4: ret += (Char_t) toupper(c); break;
901 default: Error("TPMERegexp::Substitute", "invalid state.");
902 }
903 ++pos;
904 }
905 }
906
907 s = ret;
908
909 return cnt;
910}
911
912////////////////////////////////////////////////////////////////////////////////
913/// Returns the sub-string from the internal fMarkers vector.
914/// Requires having run match or split first.
915
917{
918 if (index >= fNMatches)
919 return "";
920
921 Int_t begin = fMarkers[2*index];
922 Int_t end = fMarkers[2*index + 1];
923 return fLastStringMatched(begin, end-begin);
924}
925
926////////////////////////////////////////////////////////////////////////////////
927/// Print the regular expression and modifier options.
928/// If 'option' contains "all", prints also last string match and
929/// match results.
930
932{
933 TString opt = option;
934 opt.ToLower();
935
936 Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
937 if (opt.Contains("all")) {
938 Printf(" last string='%s'", fLastStringMatched.Data());
939 Printf(" number of matches = %d", fNMatches);
940 for (Int_t i=0; i<fNMatches; ++i)
941 Printf(" %d - %s", i, operator[](i).Data());
942 }
943}
944
945
946/** \class TStringToken
947Provides iteration through tokens of a given string.
948
949 - fFullStr stores the string to be split. It is never modified.
950 - fSplitRe is the perl-re that is used to separate the tokens.
951 - fReturnVoid if true, empty strings will be returned.
952
953Current token is stored in the TString base-class.
954During construction no match is done, use NextToken() to get the first
955and all subsequent tokens.
956*/
957
959
960////////////////////////////////////////////////////////////////////////////////
961/// Constructor.
962
963TStringToken::TStringToken(const TString& fullStr, const TString& splitRe, Bool_t retVoid) :
964 fFullStr (fullStr),
965 fSplitRe (splitRe),
966 fReturnVoid (retVoid),
967 fPos (0)
968{
969}
970
971////////////////////////////////////////////////////////////////////////////////
972/// Get the next token, it is stored in this TString.
973/// Returns true if new token is available, false otherwise.
974
976{
977 TArrayI x;
978 while (fPos < fFullStr.Length()) {
979 if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
981 fPos = x[1];
982 } else {
984 fPos = fFullStr.Length() + 1;
985 }
986 if (Length() || fReturnVoid)
987 return kTRUE;
988 }
989
990 // Special case: void-strings are requested and the full-string
991 // ends with the separator. Thus we return another empty string.
992 if (fPos == fFullStr.Length() && fReturnVoid) {
994 fPos = fFullStr.Length() + 1;
995 return kTRUE;
996 }
997
998 return kFALSE;
999}
ROOT::R::TRInterface & r
Definition Object.C:4
#define c(i)
Definition RSha256.hxx:101
char Char_t
Definition RtypesCore.h:37
const Bool_t kFALSE
Definition RtypesCore.h:101
bool Bool_t
Definition RtypesCore.h:63
const Bool_t kTRUE
Definition RtypesCore.h:100
const char Option_t
Definition RtypesCore.h:66
#define ClassImp(name)
Definition Rtypes.h:364
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Definition TError.cxx:220
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:187
void Printf(const char *fmt,...)
Array of integers (32 bits per element).
Definition TArrayI.h:27
void Set(Int_t n)
Set size of this array to n ints.
Definition TArrayI.cxx:105
virtual void SetOwner(Bool_t enable=kTRUE)
Set whether this collection is the owner (enable==true) of its content.
An array of TObjects.
Definition TObjArray.h:31
void Add(TObject *obj)
Definition TObjArray.h:68
Collectable string class.
Definition TObjString.h:28
Wrapper for PCRE library (Perl Compatible Regular Expressions).
Definition TPRegexp.h:97
Int_t fLastGlobalPosition
Definition TPRegexp.h:110
void ResetGlobalState()
Reset state of global match.
Definition TPRegexp.cxx:694
Int_t fNMatches
Definition TPRegexp.h:104
void * fAddressOfLastString
Definition TPRegexp.h:108
virtual void Print(Option_t *option="")
Print the regular expression and modifier options.
Definition TPRegexp.cxx:931
Int_t Split(const TString &s, Int_t maxfields=0)
Splits into at most maxfields.
Definition TPRegexp.cxx:763
TPMERegexp()
Default constructor. This regexp will match an empty string.
Definition TPRegexp.cxx:586
Int_t Substitute(TString &s, const TString &r, Bool_t doDollarSubst=kTRUE)
Substitute matching part of s with r, dollar back-ref substitution is performed if doDollarSubst is t...
Definition TPRegexp.cxx:872
TString operator[](Int_t)
Returns the sub-string from the internal fMarkers vector.
Definition TPRegexp.cxx:916
Int_t Match(const TString &s, UInt_t start=0)
Runs a match on s against the regex 'this' was created with.
Definition TPRegexp.cxx:706
void Reset(const TString &s, const TString &opts="", Int_t nMatchMax=-1)
Reset the pattern and options.
Definition TPRegexp.cxx:651
Int_t fNMaxMatches
Definition TPRegexp.h:103
TArrayI fMarkers
Definition TPRegexp.h:105
TString fLastStringMatched
Definition TPRegexp.h:107
void AssignGlobalState(const TPMERegexp &re)
Copy global-match state from 're; so that this regexp can continue parsing the string from where 're'...
Definition TPRegexp.cxx:682
TPRegexp()
Default ctor.
Definition TPRegexp.cxx:51
void Compile()
Compile the fPattern.
Definition TPRegexp.cxx:198
Int_t SubstituteInternal(TString &s, const TString &replace, Int_t start, Int_t nMaxMatch0, Bool_t doDollarSubst) const
Perform pattern substitution with optional back-ref replacement.
Definition TPRegexp.cxx:396
Bool_t IsValid() const
Returns true if underlying PCRE structure has been successfully generated via regexp compilation.
Definition TPRegexp.cxx:488
TString fPattern
Definition TPRegexp.h:46
TPRegexp & operator=(const TPRegexp &p)
Assignment operator.
Definition TPRegexp.cxx:92
UInt_t ParseMods(const TString &mods) const
Translate Perl modifier flags into pcre flags.
Definition TPRegexp.cxx:135
UInt_t fPCREOpts
Definition TPRegexp.h:48
PCREPriv_t * fPriv
Definition TPRegexp.h:47
Int_t Match(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10, TArrayI *pos=0)
The number of matches is returned, this equals the full match + sub-pattern matches.
Definition TPRegexp.cxx:339
Int_t MatchInternal(const TString &s, Int_t start, Int_t nMaxMatch, TArrayI *pos=0) const
Perform the actual matching - protected method.
Definition TPRegexp.cxx:307
TObjArray * MatchS(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Returns a TObjArray of matched substrings as TObjString's.
Definition TPRegexp.cxx:370
static Bool_t fgThrowAtCompileError
Definition TPRegexp.h:50
Int_t ReplaceSubs(const TString &s, TString &final, const TString &replacePattern, Int_t *ovec, Int_t nmatch) const
Returns the number of expanded '$' constructs.
Definition TPRegexp.cxx:251
virtual ~TPRegexp()
Cleanup.
Definition TPRegexp.cxx:80
Int_t Substitute(TString &s, const TString &replace, const TString &mods="", Int_t start=0, Int_t nMatchMax=10)
Substitute replaces the string s by a new string in which matching patterns are replaced by the repla...
Definition TPRegexp.cxx:470
TString GetModifiers() const
Return PCRE modifier options as string.
Definition TPRegexp.cxx:180
static Bool_t GetThrowAtCompileError()
Get value of static flag controlling whether exception should be thrown upon an error during regular ...
Definition TPRegexp.cxx:497
void Optimize()
Send the pattern through the optimizer.
Definition TPRegexp.cxx:230
@ kPCRE_GLOBAL
Definition TPRegexp.h:40
@ kPCRE_OPTIMIZE
Definition TPRegexp.h:41
@ kPCRE_DEBUG_MSGS
Definition TPRegexp.h:42
@ kPCRE_INTMASK
Definition TPRegexp.h:43
static void SetThrowAtCompileError(Bool_t throwp)
Set static flag controlling whether exception should be thrown upon an error during regular expressio...
Definition TPRegexp.cxx:506
Provides iteration through tokens of a given string.
Definition TPRegexp.h:143
TStringToken(const TString &fullStr, const TString &splitRe, Bool_t retVoid=kFALSE)
Constructor.
Definition TPRegexp.cxx:963
TPRegexp fSplitRe
Definition TPRegexp.h:147
const TString fFullStr
Definition TPRegexp.h:146
Bool_t NextToken()
Get the next token, it is stored in this TString.
Definition TPRegexp.cxx:975
Bool_t fReturnVoid
Definition TPRegexp.h:148
Basic string class.
Definition TString.h:136
Ssiz_t Length() const
Definition TString.h:410
friend class TSubString
Definition TString.h:139
char & operator()(Ssiz_t i)
Definition TString.h:714
void ToLower()
Change string to lower-case.
Definition TString.cxx:1150
const char * Data() const
Definition TString.h:369
TString & operator=(char s)
Assign character c to TString.
Definition TString.cxx:287
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2336
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:624
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:639
A zero length substring is legal.
Definition TString.h:82
Double_t x[n]
Definition legend1.C:17
pcre_extra * fPCREExtra
Definition TPRegexp.cxx:38
pcre * fPCRE
Definition TPRegexp.cxx:37
auto * m
Definition textangle.C:8