ROOT logo
// @(#)root/base:$Id: TRegexp.cxx 40918 2011-09-18 16:58:07Z rdm $
// Author: Fons Rademakers   04/08/95

/*************************************************************************
 * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers.               *
 * All rights reserved.                                                  *
 *                                                                       *
 * For the licensing terms see $ROOTSYS/LICENSE.                         *
 * For the list of contributors see $ROOTSYS/README/CREDITS.             *
 *************************************************************************/

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// TRegexp                                                              //
//                                                                      //
// Regular expression class.                                            //
//                                                                      //
//   '^'             // start-of-line anchor                            //
//   '$'             // end-of-line anchor                              //
//   '.'             // matches any character                           //
//   '['             // start a character class                         //
//   ']'             // end a character class                           //
//   '^'             // negates character class if 1st character        //
//   '*'             // Kleene closure (matches 0 or more)              //
//   '+'             // Positive closure (1 or more)                    //
//   '?'             // Optional closure (0 or 1)                       //
//                                                                      //
//   Note that the '|' operator (union) is not supported, nor are       //
//   parentheses (grouping). Therefore "a|b" does not match "a".        //
//                                                                      //
//   Standard classes like [:alnum:], [:alpha:], etc. are not supported,//
//   only [a-zA-Z], [^ntf] and so on.                                   //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

#include "TRegexp.h"
#include "TString.h"
#include "TError.h"

const unsigned TRegexp::fgMaxpat = 2048;


ClassImp(TRegexp)

//______________________________________________________________________________
TRegexp::TRegexp(const char *re, Bool_t wildcard)
{
   // Create a regular expression from the input string. If wildcard is
   // true then the input string will first be interpreted as a wildcard
   // expression by MakeWildcard(), and the result then interpreted as a
   // regular expression.

   if (wildcard)
      GenPattern(MakeWildcard(re));
   else
      GenPattern(re);
}

//______________________________________________________________________________
TRegexp::TRegexp(const TString& re)
{
   // Create a regular expression from a TString.

   GenPattern(re.Data());
}

//______________________________________________________________________________
TRegexp::TRegexp(const TRegexp& r)
{
   // Copy ctor.

   CopyPattern(r);
}

//______________________________________________________________________________
TRegexp::~TRegexp()
{
   // Destructor.
   delete [] fPattern;
}

//______________________________________________________________________________
TRegexp& TRegexp::operator=(const TRegexp& r)
{
   // Assignment operator.

   if (this != &r) {
      delete [] fPattern;
      CopyPattern(r);
   }
   return *this;
}

//______________________________________________________________________________
TRegexp& TRegexp::operator=(const char *str)
{
   // Assignment operator taking a char* and assigning it to a regexp.

   delete [] fPattern;
   GenPattern(str);
   return *this;
}

//______________________________________________________________________________
TRegexp& TRegexp::operator=(const TString &str)
{
   // Assignment operator taking a TString.

   delete [] fPattern;
   GenPattern(str.Data());
   return *this;
}

//______________________________________________________________________________
void TRegexp::GenPattern(const char *str)
{
   // Generate the regular expression pattern.

   fPattern = new Pattern_t[fgMaxpat];
   int error = ::Makepat(str, fPattern, fgMaxpat);
   fStat = (error < 3) ? (EStatVal) error : kToolong;
}

//______________________________________________________________________________
void TRegexp::CopyPattern(const TRegexp& r)
{
   // Copy the regular expression pattern.

   fPattern = new Pattern_t[fgMaxpat];
   memcpy(fPattern, r.fPattern, fgMaxpat * sizeof(Pattern_t));
   fStat = r.fStat;
}

//______________________________________________________________________________
const char *TRegexp::MakeWildcard(const char *re)
{
   // This routine transforms a wildcarding regular expression into
   // a general regular expression used for pattern matching.
   // When using wildcards the regular expression is assumed to be
   // preceded by a "^" (BOL) and terminated by a "$" (EOL). Also, all
   // "*"'s and "?"'s (closures) are assumed to be preceded by a "." (i.e. any
   // character, except "/"'s) and all .'s are escaped (so *.ps is different
   // from *.eps). The special treatment of "/" allows the easy matching of
   // pathnames, e.g. "*.root" will match "aap.root", but not "pipo/aap.root".

   static char buf[fgMaxpat];
   char *s = buf;
   if (!re) return "";
   int len = strlen(re);
   int slen = 0;

   if (!len) return "";

   for (int i = 0; i < len; i++) {
      if ((unsigned)slen > fgMaxpat - 10) {
         Error("MakeWildcard", "regexp too large");
         break;
      }
      if (i == 0 && re[i] != '^') {
         *s++ = '^';
         slen++;
      }
      if (re[i] == '*') {
#ifndef R__WIN32
         //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
         const char *wc = "[^/]";
#else
         //const char *wc = "[a-zA-Z0-9-+_., []<>]";
         const char *wc = "[^\\/:]";
#endif
         strcpy(s, wc);
         s += strlen(wc);
         slen += strlen(wc);
      }
      if (re[i] == '.') {
         *s++ = '\\';
         slen++;
      }
      if (re[i] == '?') {
#ifndef R__WIN32
         //const char *wc = "[a-zA-Z0-9-+_\\.,: []<>]";
         const char *wc = "[^/]";
#else
         //const char *wc = "[a-zA-Z0-9-+_., []<>]";
         const char *wc = "[^\\/:]";
#endif
         strcpy(s, wc);
         s += strlen(wc);
         slen += strlen(wc);
      } else {
         *s++ = re[i];
         slen++;
      }
      if (i == len-1 && re[i] != '$') {
         *s++ = '$';
         slen++;
      }
   }
   *s = '\0';
   return buf;
}

//______________________________________________________________________________
Ssiz_t TRegexp::Index(const TString& string, Ssiz_t* len, Ssiz_t i) const
{
   // Find the first occurance of the regexp in string and return the
   // position, or -1 if there is no match. Len is length of the matched
   // string and i is the offset at which the matching should start.

   if (fStat != kOK)
      Error("TRegexp::Index", "Bad Regular Expression");

   const char* startp;
   const char* s = string.Data();
   Ssiz_t slen = string.Length();
   if (slen < i) return kNPOS;
   const char* endp = ::Matchs(s+i, slen-i, fPattern, &startp);
   if (endp) {
      *len = endp - startp;
      return startp - s;
   } else {
      *len = 0;
      return kNPOS;
   }
}

//______________________________________________________________________________
TRegexp::EStatVal TRegexp::Status()
{
   // Check status of regexp.

   EStatVal temp = fStat;
   fStat = kOK;
   return temp;
}

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// TString member functions, put here so the linker will include        //
// them only if regular expressions are used.                           //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

//______________________________________________________________________________
Ssiz_t TString::Index(const TRegexp& r, Ssiz_t start) const
{
   // Find the first occurance of the regexp in string and return the
   // position, or -1 if there is no match. Start is the offset at which
   // the search should start.

   Ssiz_t len;
   return r.Index(*this, &len, start); // len not used
}

//______________________________________________________________________________
Ssiz_t TString::Index(const TRegexp& r, Ssiz_t* extent, Ssiz_t start) const
{
   // Find the first occurance of the regexp in string and return the
   // position, or -1 if there is no match. Extent is length of the matched
   // string and start is the offset at which the matching should start.

   return r.Index(*this, extent, start);
}

//______________________________________________________________________________
TSubString TString::operator()(const TRegexp& r, Ssiz_t start) const
{
   // Return the substring found by applying the regexp starting at start.

   Ssiz_t len;
   Ssiz_t begin = Index(r, &len, start);
   return TSubString(*this, begin, len);
}

//______________________________________________________________________________
TSubString TString::operator()(const TRegexp& r) const
{
   // Return the substring found by applying the regexp.

   return (*this)(r,0);
}

//__________________________________________________________________________________
Bool_t TString::Tokenize(TString &tok, Ssiz_t &from, const char *delim) const
{
   // Search for tokens delimited by regular expression 'delim' (default " ")
   // in this string; search starts at 'from' and the token is returned in 'tok'.
   // Returns in 'from' the next position after the delimiter.
   // Returns kTRUE if a token is found, kFALSE if not or if some inconsistency
   // occured.
   // This method allows to loop over tokens in this way:
   //
   //    TString myl = "tok1 tok2|tok3";
   //    TString tok;
   //    Ssiz_t from = 0;
   //    while (myl.Tokenize(tok, from, "[ |]")) {
   //       // Analyse tok
   //       ...
   //    }
   //
   // more convenient of the other Tokenize method when saving the tokens is not
   // needed.

   Bool_t found = kFALSE;

   // Reset the token
   tok = "";

   // Make sure inputs make sense
   Int_t len = Length();
   if (len <= 0 || from > (len - 1) || from < 0)
      return found;

   TRegexp rg(delim);

   while (tok.IsNull()) {
      // Find delimiter
      Int_t ext = 0;
      Int_t pos = Index(rg, &ext, from);

      // Assign to token
      if (pos == kNPOS || pos > from) {
         Ssiz_t last = (pos != kNPOS) ? (pos - 1) : len;
         tok = (*this)(from, last-from+1);
      }
      found = kTRUE;

      // Update start-of-search index
      from = pos + ext;
      if (pos == kNPOS) {
         from = pos;
         if (tok.IsNull()) {
            // Empty, last token
            found = kFALSE;
            break;
         }
      }
   }
   // Make sure that 'from' has a meaningful value
   from = (from < len) ? from : len;

   // Done
   return found;
}
 TRegexp.cxx:1
 TRegexp.cxx:2
 TRegexp.cxx:3
 TRegexp.cxx:4
 TRegexp.cxx:5
 TRegexp.cxx:6
 TRegexp.cxx:7
 TRegexp.cxx:8
 TRegexp.cxx:9
 TRegexp.cxx:10
 TRegexp.cxx:11
 TRegexp.cxx:12
 TRegexp.cxx:13
 TRegexp.cxx:14
 TRegexp.cxx:15
 TRegexp.cxx:16
 TRegexp.cxx:17
 TRegexp.cxx:18
 TRegexp.cxx:19
 TRegexp.cxx:20
 TRegexp.cxx:21
 TRegexp.cxx:22
 TRegexp.cxx:23
 TRegexp.cxx:24
 TRegexp.cxx:25
 TRegexp.cxx:26
 TRegexp.cxx:27
 TRegexp.cxx:28
 TRegexp.cxx:29
 TRegexp.cxx:30
 TRegexp.cxx:31
 TRegexp.cxx:32
 TRegexp.cxx:33
 TRegexp.cxx:34
 TRegexp.cxx:35
 TRegexp.cxx:36
 TRegexp.cxx:37
 TRegexp.cxx:38
 TRegexp.cxx:39
 TRegexp.cxx:40
 TRegexp.cxx:41
 TRegexp.cxx:42
 TRegexp.cxx:43
 TRegexp.cxx:44
 TRegexp.cxx:45
 TRegexp.cxx:46
 TRegexp.cxx:47
 TRegexp.cxx:48
 TRegexp.cxx:49
 TRegexp.cxx:50
 TRegexp.cxx:51
 TRegexp.cxx:52
 TRegexp.cxx:53
 TRegexp.cxx:54
 TRegexp.cxx:55
 TRegexp.cxx:56
 TRegexp.cxx:57
 TRegexp.cxx:58
 TRegexp.cxx:59
 TRegexp.cxx:60
 TRegexp.cxx:61
 TRegexp.cxx:62
 TRegexp.cxx:63
 TRegexp.cxx:64
 TRegexp.cxx:65
 TRegexp.cxx:66
 TRegexp.cxx:67
 TRegexp.cxx:68
 TRegexp.cxx:69
 TRegexp.cxx:70
 TRegexp.cxx:71
 TRegexp.cxx:72
 TRegexp.cxx:73
 TRegexp.cxx:74
 TRegexp.cxx:75
 TRegexp.cxx:76
 TRegexp.cxx:77
 TRegexp.cxx:78
 TRegexp.cxx:79
 TRegexp.cxx:80
 TRegexp.cxx:81
 TRegexp.cxx:82
 TRegexp.cxx:83
 TRegexp.cxx:84
 TRegexp.cxx:85
 TRegexp.cxx:86
 TRegexp.cxx:87
 TRegexp.cxx:88
 TRegexp.cxx:89
 TRegexp.cxx:90
 TRegexp.cxx:91
 TRegexp.cxx:92
 TRegexp.cxx:93
 TRegexp.cxx:94
 TRegexp.cxx:95
 TRegexp.cxx:96
 TRegexp.cxx:97
 TRegexp.cxx:98
 TRegexp.cxx:99
 TRegexp.cxx:100
 TRegexp.cxx:101
 TRegexp.cxx:102
 TRegexp.cxx:103
 TRegexp.cxx:104
 TRegexp.cxx:105
 TRegexp.cxx:106
 TRegexp.cxx:107
 TRegexp.cxx:108
 TRegexp.cxx:109
 TRegexp.cxx:110
 TRegexp.cxx:111
 TRegexp.cxx:112
 TRegexp.cxx:113
 TRegexp.cxx:114
 TRegexp.cxx:115
 TRegexp.cxx:116
 TRegexp.cxx:117
 TRegexp.cxx:118
 TRegexp.cxx:119
 TRegexp.cxx:120
 TRegexp.cxx:121
 TRegexp.cxx:122
 TRegexp.cxx:123
 TRegexp.cxx:124
 TRegexp.cxx:125
 TRegexp.cxx:126
 TRegexp.cxx:127
 TRegexp.cxx:128
 TRegexp.cxx:129
 TRegexp.cxx:130
 TRegexp.cxx:131
 TRegexp.cxx:132
 TRegexp.cxx:133
 TRegexp.cxx:134
 TRegexp.cxx:135
 TRegexp.cxx:136
 TRegexp.cxx:137
 TRegexp.cxx:138
 TRegexp.cxx:139
 TRegexp.cxx:140
 TRegexp.cxx:141
 TRegexp.cxx:142
 TRegexp.cxx:143
 TRegexp.cxx:144
 TRegexp.cxx:145
 TRegexp.cxx:146
 TRegexp.cxx:147
 TRegexp.cxx:148
 TRegexp.cxx:149
 TRegexp.cxx:150
 TRegexp.cxx:151
 TRegexp.cxx:152
 TRegexp.cxx:153
 TRegexp.cxx:154
 TRegexp.cxx:155
 TRegexp.cxx:156
 TRegexp.cxx:157
 TRegexp.cxx:158
 TRegexp.cxx:159
 TRegexp.cxx:160
 TRegexp.cxx:161
 TRegexp.cxx:162
 TRegexp.cxx:163
 TRegexp.cxx:164
 TRegexp.cxx:165
 TRegexp.cxx:166
 TRegexp.cxx:167
 TRegexp.cxx:168
 TRegexp.cxx:169
 TRegexp.cxx:170
 TRegexp.cxx:171
 TRegexp.cxx:172
 TRegexp.cxx:173
 TRegexp.cxx:174
 TRegexp.cxx:175
 TRegexp.cxx:176
 TRegexp.cxx:177
 TRegexp.cxx:178
 TRegexp.cxx:179
 TRegexp.cxx:180
 TRegexp.cxx:181
 TRegexp.cxx:182
 TRegexp.cxx:183
 TRegexp.cxx:184
 TRegexp.cxx:185
 TRegexp.cxx:186
 TRegexp.cxx:187
 TRegexp.cxx:188
 TRegexp.cxx:189
 TRegexp.cxx:190
 TRegexp.cxx:191
 TRegexp.cxx:192
 TRegexp.cxx:193
 TRegexp.cxx:194
 TRegexp.cxx:195
 TRegexp.cxx:196
 TRegexp.cxx:197
 TRegexp.cxx:198
 TRegexp.cxx:199
 TRegexp.cxx:200
 TRegexp.cxx:201
 TRegexp.cxx:202
 TRegexp.cxx:203
 TRegexp.cxx:204
 TRegexp.cxx:205
 TRegexp.cxx:206
 TRegexp.cxx:207
 TRegexp.cxx:208
 TRegexp.cxx:209
 TRegexp.cxx:210
 TRegexp.cxx:211
 TRegexp.cxx:212
 TRegexp.cxx:213
 TRegexp.cxx:214
 TRegexp.cxx:215
 TRegexp.cxx:216
 TRegexp.cxx:217
 TRegexp.cxx:218
 TRegexp.cxx:219
 TRegexp.cxx:220
 TRegexp.cxx:221
 TRegexp.cxx:222
 TRegexp.cxx:223
 TRegexp.cxx:224
 TRegexp.cxx:225
 TRegexp.cxx:226
 TRegexp.cxx:227
 TRegexp.cxx:228
 TRegexp.cxx:229
 TRegexp.cxx:230
 TRegexp.cxx:231
 TRegexp.cxx:232
 TRegexp.cxx:233
 TRegexp.cxx:234
 TRegexp.cxx:235
 TRegexp.cxx:236
 TRegexp.cxx:237
 TRegexp.cxx:238
 TRegexp.cxx:239
 TRegexp.cxx:240
 TRegexp.cxx:241
 TRegexp.cxx:242
 TRegexp.cxx:243
 TRegexp.cxx:244
 TRegexp.cxx:245
 TRegexp.cxx:246
 TRegexp.cxx:247
 TRegexp.cxx:248
 TRegexp.cxx:249
 TRegexp.cxx:250
 TRegexp.cxx:251
 TRegexp.cxx:252
 TRegexp.cxx:253
 TRegexp.cxx:254
 TRegexp.cxx:255
 TRegexp.cxx:256
 TRegexp.cxx:257
 TRegexp.cxx:258
 TRegexp.cxx:259
 TRegexp.cxx:260
 TRegexp.cxx:261
 TRegexp.cxx:262
 TRegexp.cxx:263
 TRegexp.cxx:264
 TRegexp.cxx:265
 TRegexp.cxx:266
 TRegexp.cxx:267
 TRegexp.cxx:268
 TRegexp.cxx:269
 TRegexp.cxx:270
 TRegexp.cxx:271
 TRegexp.cxx:272
 TRegexp.cxx:273
 TRegexp.cxx:274
 TRegexp.cxx:275
 TRegexp.cxx:276
 TRegexp.cxx:277
 TRegexp.cxx:278
 TRegexp.cxx:279
 TRegexp.cxx:280
 TRegexp.cxx:281
 TRegexp.cxx:282
 TRegexp.cxx:283
 TRegexp.cxx:284
 TRegexp.cxx:285
 TRegexp.cxx:286
 TRegexp.cxx:287
 TRegexp.cxx:288
 TRegexp.cxx:289
 TRegexp.cxx:290
 TRegexp.cxx:291
 TRegexp.cxx:292
 TRegexp.cxx:293
 TRegexp.cxx:294
 TRegexp.cxx:295
 TRegexp.cxx:296
 TRegexp.cxx:297
 TRegexp.cxx:298
 TRegexp.cxx:299
 TRegexp.cxx:300
 TRegexp.cxx:301
 TRegexp.cxx:302
 TRegexp.cxx:303
 TRegexp.cxx:304
 TRegexp.cxx:305
 TRegexp.cxx:306
 TRegexp.cxx:307
 TRegexp.cxx:308
 TRegexp.cxx:309
 TRegexp.cxx:310
 TRegexp.cxx:311
 TRegexp.cxx:312
 TRegexp.cxx:313
 TRegexp.cxx:314
 TRegexp.cxx:315
 TRegexp.cxx:316
 TRegexp.cxx:317
 TRegexp.cxx:318
 TRegexp.cxx:319
 TRegexp.cxx:320
 TRegexp.cxx:321
 TRegexp.cxx:322
 TRegexp.cxx:323
 TRegexp.cxx:324
 TRegexp.cxx:325
 TRegexp.cxx:326
 TRegexp.cxx:327
 TRegexp.cxx:328
 TRegexp.cxx:329
 TRegexp.cxx:330
 TRegexp.cxx:331
 TRegexp.cxx:332
 TRegexp.cxx:333
 TRegexp.cxx:334
 TRegexp.cxx:335
 TRegexp.cxx:336
 TRegexp.cxx:337
 TRegexp.cxx:338
 TRegexp.cxx:339
 TRegexp.cxx:340
 TRegexp.cxx:341
 TRegexp.cxx:342
 TRegexp.cxx:343
 TRegexp.cxx:344