// @(#)root/base:$Name: $:$Id: TPRegexp.cxx,v 1.1 2005/12/02 16:17:48 rdm Exp $
// Author: Eddy Offermann 24/06/05
/*************************************************************************
* Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
//////////////////////////////////////////////////////////////////////////
// //
// TPRegexp //
// //
// C++ Wrapper for the "Perl Compatible Regular Expressions" library //
// The PCRE lib can be found at: //
// http://www.pcre.org/ //
// //
// Extensive documentation about Regular expressions in Perl can be //
// found at : //
// http://perldoc.perl.org/perlre.html //
// //
//////////////////////////////////////////////////////////////////////////
#include "Riostream.h"
#include "TPRegexp.h"
#include "TObjArray.h"
#include "TObjString.h"
#include "TError.h"
#include <pcre.h>
struct PCREPriv_t {
pcre *fPCRE;
pcre_extra *fPCREExtra;
PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
};
ClassImp(TPRegexp)
//______________________________________________________________________________
TPRegexp::TPRegexp()
{
// Default ctor.
fPriv = new PCREPriv_t;
fPCREOpts = 0;
}
//______________________________________________________________________________
TPRegexp::TPRegexp(const TString &pat)
{
// Create and initialize with pat.
fPattern = pat;
fPriv = new PCREPriv_t;
fPCREOpts = 0;
}
//______________________________________________________________________________
TPRegexp::TPRegexp(const TPRegexp &p)
{
// Copy ctor.
fPattern = p.fPattern;
fPriv = new PCREPriv_t;
fPCREOpts = p.fPCREOpts;
}
//______________________________________________________________________________
TPRegexp::~TPRegexp()
{
// Cleanup.
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
delete fPriv;
}
//______________________________________________________________________________
TPRegexp &TPRegexp::operator=(const TPRegexp &p)
{
// Assignement operator.
if (this != &p) {
fPattern = p.fPattern;
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
fPriv->fPCRE = 0;
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
fPriv->fPCREExtra = 0;
fPCREOpts = p.fPCREOpts;
}
return *this;
}
//______________________________________________________________________________
UInt_t TPRegexp::ParseMods(const TString &modStr) const
{
// Translate Perl modifier flags into pcre flags.
UInt_t opts = 0;
if (modStr.Length() <= 0)
return fPCREOpts;
//translate perl flags into pcre flags
const char *m = modStr;
while (*m) {
switch (*m) {
case 'g':
opts |= kPCRE_GLOBAL;
break;
case 'i':
opts |= PCRE_CASELESS;
break;
case 'm':
opts |= PCRE_MULTILINE;
break;
case 'o':
opts |= kPCRE_OPTIMIZE;
break;
case 's':
opts |= PCRE_DOTALL;
break;
case 'x':
opts |= PCRE_EXTENDED;
break;
case 'd': // special flag to enable debug printing (not Perl compat.)
opts |= kPCRE_DEBUG_MSGS;
break;
default:
Error("ParseMods", "illegal pattern modifier: %c", *m);
opts = 0;
}
++m;
}
return opts;
}
//______________________________________________________________________________
void TPRegexp::Compile()
{
// Compile the fPattern.
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
if (fPCREOpts & kPCRE_DEBUG_MSGS)
Info("Compile", "PREGEX compiling %s", fPattern.Data());
const char *errstr;
Int_t patIndex;
fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
&errstr, &patIndex, 0);
if (!fPriv->fPCRE) {
Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
fPattern.Data(), patIndex, errstr);
}
if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
Optimize();
}
//______________________________________________________________________________
void TPRegexp::Optimize()
{
// Send the pattern through the optimizer.
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
if (fPCREOpts & kPCRE_DEBUG_MSGS)
Info("Optimize", "PREGEX studying %s", fPattern.Data());
const char *errstr;
fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, fPCREOpts & kPCRE_INTMASK, &errstr);
if (!fPriv->fPCREExtra && errstr) {
Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
fPattern.Data(), errstr);
}
}
//______________________________________________________________________________
Int_t TPRegexp::ReplaceSubs(const TString &s, TString &final,
const TString &replacePattern,
Int_t *offVec, Int_t nrMatch) const
{
// Return the number of substitutions.
Int_t nrSubs = 0;
const char *p = replacePattern;
Int_t state = 0;
Int_t subnum = 0;
while (state != -1) {
switch (state) {
case 0:
if (!*p) {
state = -1;
break;
}
if (*p == '$') {
state = 1;
subnum = 0;
if (p[1] == '&') {
p++;
if (isdigit(p[1]))
p++;
} else if (!isdigit(p[1])) {
Error("ReplaceSubs", "badly formed replacement pattern: %s",
replacePattern.Data());
}
} else
final += *p;
break;
case 1:
if (isdigit(*p)) {
subnum *= 10;
subnum += (*p)-'0';
} else {
if (fPCREOpts & kPCRE_DEBUG_MSGS)
Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
if (subnum < 0 || subnum > nrMatch-1) {
Error("ReplaceSubs","bad string number :%d",subnum);
}
const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
final += subStr;
nrSubs++;
state = 0;
continue; // send char to start state
}
}
p++;
}
return nrSubs;
}
//______________________________________________________________________________
Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
Int_t nMaxMatch, TArrayI *pos)
{
// The number of matches is returned, this equals the full match +
// sub-pattern matches.
// nMaxmatch is the maximum allowed number of matches.
// pos contains the string indices of the matches. Its usage is
// shown in the routine MatchS.
UInt_t opts = ParseMods(mods);
if (!fPriv->fPCRE || opts != fPCREOpts) {
fPCREOpts = opts;
Compile();
}
Int_t *offVec = new Int_t[nMaxMatch];
Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
s.Length(), start, fPCREOpts & kPCRE_INTMASK,
offVec, nMaxMatch);
if (nrMatch == PCRE_ERROR_NOMATCH)
nrMatch = 0;
else if (nrMatch <= 0) {
Error("Match","pcre_exec error = %d", nrMatch);
delete [] offVec;
return 0;
}
if (pos)
pos->Adopt(2*nrMatch, offVec);
else
delete [] offVec;
return nrMatch;
}
//______________________________________________________________________________
TObjArray *TPRegexp::MatchS(const TString &s, const TString &mods,
Int_t start, Int_t nMaxMatch)
{
// Returns a TObjArray of matched substrings as TObjString's.
// The TObjArray is owner of the objects. The first entry is the full
// matched pattern, followed by the subpatterns.
// If a pattern was not matched, it will return an empty substring:
//
// TObjArray *subStrL = TPRegexp("(a|(z))(bc)").MatchS("abc");
// for (Int_t i = 0; i < subStrL->GetLast()+1; i++) {
// const TString subStr = ((TObjString *)subStrL->At(i))->GetString();
// cout << "\"" << subStr << "\" ";
// }
// cout << subStr << endl;
//
// produces: "abc" "a" "" "bc"
TArrayI pos;
Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
TObjArray *subStrL = new TObjArray();
subStrL->SetOwner();
for (Int_t i = 0; i < nrMatch; i++) {
Int_t start = pos[2*i];
Int_t stop = pos[2*i+1];
if (start >= 0 && stop >= 0) {
const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
subStrL->Add(new TObjString(subStr));
} else
subStrL->Add(new TObjString());
}
return subStrL;
}
//______________________________________________________________________________
Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
const TString &mods, Int_t start, Int_t nMaxMatch)
{
// Substitute replaces the string s by a new string in which matching
// patterns are replaced by the replacePattern string. The number of
// substitutions are returned.
//
// TString s("aap noot mies");
// const Int_t nrSub = TPRegexp("(\\w*) noot (\\w*)").Substitute(s,"$2 noot $1");
// cout << nrSub << " \"" << s << "\"" <<endl;
//
// produces: 2 "mies noot aap"
UInt_t opts = ParseMods(mods);
Int_t nrSubs = 0;
TString final;
if (!fPriv->fPCRE || opts != fPCREOpts) {
fPCREOpts = opts;
Compile();
}
Int_t *offVec = new Int_t[nMaxMatch];
Int_t offset = start;
Int_t last = 0;
while (kTRUE) {
// find next matching subs
Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
s.Length(), offset, fPCREOpts & kPCRE_INTMASK,
offVec, nMaxMatch);
if (nrMatch == PCRE_ERROR_NOMATCH) {
nrMatch = 0;
break;
} else if (nrMatch <= 0)
Error("Substitute", "pcre_exec error = %d", nrMatch);
// append anything previously unmatched, but not substituted
if (last <= offVec[0]) {
final += s(last,offVec[0]-last);
last = offVec[1];
}
// replace stuff in s
nrSubs += ReplaceSubs(s, final, replacePattern, offVec, nrMatch);
// if global gotta check match at every pos
if (!(fPCREOpts & kPCRE_GLOBAL))
break;
if (offVec[0] != offVec[1])
offset = offVec[1];
else {
// matched empty string
if (offVec[1] == s.Length())
break;
offset = offVec[1]+1;
}
}
delete [] offVec;
final += s(last,s.Length()-last);
s = final;
return nrSubs;
}
//////////////////////////////////////////////////////////////////////////
// //
// TString member functions, put here so the linker will include //
// them only if regular expressions are used. //
// //
//////////////////////////////////////////////////////////////////////////
//______________________________________________________________________________
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start) const
{
// Find the first occurance of the regexp in string and return the position.
// Start is the offset at which the search should start.
TArrayI pos;
Int_t nrMatch = r.Match(*this,"",start,30,&pos);
if (nrMatch > 0)
return pos[0];
else
return -1;
}
//______________________________________________________________________________
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start) const
{
// Find the first occurance of the regexp in string and return the position.
// Extent is length of the matched string and start is the offset at which
// the matching should start.
TArrayI pos;
const Int_t nrMatch = r.Match(*this,"",start,30,&pos);
if (nrMatch > 0) {
*extent = pos[1]-pos[0];
return pos[0];
} else {
*extent = 0;
return -1;
}
}
//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r, Ssiz_t start)
{
// Return the substring found by applying the regexp starting at start.
Ssiz_t len;
Ssiz_t begin = Index(r, &len, start);
return TSubString(*this, begin, len);
}
//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r)
{
// Return the substring found by applying the regexp.
return (*this)(r,0);
}
//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r, Ssiz_t start) const
{
// Return the substring found by applying the regexp starting at start.
Ssiz_t len;
Ssiz_t begin = Index(r, &len, start);
return TSubString(*this, begin, len);
}
//______________________________________________________________________________
TSubString TString::operator()(TPRegexp& r) const
{
// Return the substring found by applying the regexp.
return (*this)(r, 0);
}
ROOT page - Class index - Class Hierarchy - Top of the page
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.