ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
regexp.C
Go to the documentation of this file.
1 #include "Riostream.h"
2 #include "TString.h"
3 #include "TPRegexp.h"
4 #include "TClonesArray.h"
5 #include "TObjString.h"
6 
7 //-------------------------------------------------------------------------------------------
8 //
9 // A regular expression, often called a pattern, is an expression that describes a set of
10 // strings. They are usually used to give a concise description of a set, without having to
11 // list all elements.
12 // The Unix utilities like sed and grep make extensive use of regular expressions. Scripting
13 // languages like Perl have regular expression engines built directly into their syntax .
14 //
15 // Extensive documentation about Regular expressions in Perl can be
16 // found at :
17 // http://perldoc.perl.org/perlre.html
18 //
19 // ROOT has this capability through the use of the P(erl) C(ompatible) R(egular) E(xpression)
20 // - library, PCRE, see http://www.pcre.org
21 //
22 // Its functionality can be accessed through the TPRegexp and TString class .
23 // Note that in patterns taken from Perl all backslash character have to be replaced in the
24 // C/C++ strings by two backslashes .
25 //
26 // This macro shows several ways how to use the Match/Substitute capabilities of the
27 // the TPRegexp class . It can be run as follows :
28 // .x regexp.C
29 //
30 // Author: Eddy Offermann
31 //
32 //-------------------------------------------------------------------------------------------
33 
34 void regexp()
35 {
36  // Substitute example :
37  // Find a word that starts with "peper" and ends with "koek" .
38 
39  TString s1("lekkere pepernotenkoek");
40  TPRegexp r1("\\bpeper(\\w+)koek\\b");
41 
42  // Note that the TString class gives access to some of the simpler TPRegexp functionality .
43  // The following command returns the fully matched string .
44  cout << s1(r1) << endl;
45 
46  // In the "Substitute" command, keep the middle part (indicated in the regexp by "(\\w+)"
47  // and the substitute string by "$1") and sandwich it between "wal" and "boom" .
48  r1.Substitute(s1,"wal$1boom");
49  cout << s1 << endl;
50 
51  // Substitute example :
52  // Swap first two words in a string
53 
54  TString s2("one two three");
55  TPRegexp("^([^ ]+) +([^ ]+)").Substitute(s2,"$2 $1");
56  cout << s2 << endl;
57 
58  // Substitute example :
59  // $1, $2, and so on, in the substitute string are equivalent to whatever the corresponding set
60  // of parentheses match in the regexp string, counting opening parentheses from left to right .
61  // In the following example, we are trying to catch a date MMDDYYYY in a string and rearrange
62  // it to DDMMYYY . "(\\d{1,2}) matches only 1 or 2 digits etc .
63 
64  TString s3("on 09/24/1959 the world stood still");
65  TPRegexp("\\b(\\d{1,2})/(\\d{1,2})/(\\d{4})\\b").Substitute(s3,"$2-$1-$3");
66  cout << s3 << endl;
67 
68  // Match Example :
69  // The following example shows how to extract a protocol and port number from an URL string .
70  // Note again the parentheses in the regexp string : "(\\w+)" requires a non-empty
71  // alphanumeric string while "(\\d+)" wants a pure digital string .
72  // The matched substrings together with the full matched string are returned in a
73  // TObjArray . The first entry is the full string while next entries are the substrings
74  // in the order as listed in the regexp string .
75  //
76  // Note that there is also a Match(..) command that returns the positions of the
77  // substrings in the input string .
78 
79  TString s4("http://fink.sourceforge.net:8080/index/readme.html");
80  TObjArray *subStrL = TPRegexp("^(\\w+)://[^/]+:(\\d+)/$").MatchS(s4);
81  const Int_t nrSubStr = subStrL->GetLast()+1;
82  if (nrSubStr > 2) {
83  const TString proto = ((TObjString *)subStrL->At(1))->GetString();
84  const TString port = ((TObjString *)subStrL->At(2))->GetString();
85  cout << "protocol: " << proto << " port: " << port << endl;
86  }
87 
88  // Match Example :
89  // This example returns kTRUE if the email address is valid . For that it has to fulfill the following
90  // criteria:
91  // 1) It should be of the form string1@string2 . The "^" and "$" ensure that we compare the complete
92  // email string
93  // 2) ([\\w-\\.]+) :
94  // string1 is only allowed to be composed out of the alphanumeric characters, "-" and "." .
95  // The "+" ensures that string1 can not be empty .
96  // 3) string2 is matched against three different parts :
97  // a. ((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+)) :
98  // This regular expression ensures that EITHER the string starts with "[" followed by three groups
99  // of numbers, separated by "." , where each group has 1 to 3 numbers, OR alphanumeric strings,
100  // possibly containing "-" characters, seperated by "." .
101  // b. ([a-zA-Z]{2,4}|[0-9]{1,3}) :
102  // This part contains EITHER 2 to 4 alpha characters OR 1 to 3 numbers
103  // c. (\\]?) :
104  // At most one "]" character .
105 
106  TString s5("fons.rademakers@cern.ch");
107  TPRegexp r5("^([\\w-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$");
108  cout << "Check if the email address \"" << s5 << "\" is valid: " << (r5.MatchB(s5) ? "TRUE" : "FALSE") << endl;
109 
110  // Substitute Example with pattern modifier :
111  // Like in Perl, Substitute/Match commands accept modifier arguments . For instance a "g" modifier causes to
112  // match the regexp globally . In the example below, all words starting and ending with the character "n"
113  // are replaced by the word neutrino .
114 
115  TString s6("neutron proton electron neutron");
116  TPRegexp("(n\\w+n)").Substitute(s6,"neutrino","g");
117  cout << s6 << endl;
118 }
An array of TObjects.
Definition: TObjArray.h:39
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Definition: TPRegexp.h:84
Collectable string class.
Definition: TObjString.h:32
Int_t GetLast() const
Return index of last object in array.
Definition: TObjArray.cxx:528
void regexp()
Definition: regexp.C:34
TObjArray * MatchS(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Returns a TObjArray of matched substrings as TObjString's.
Definition: TPRegexp.cxx:366
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
Int_t Substitute(TString &s, const TString &replace, const TString &mods="", Int_t start=0, Int_t nMatchMax=10)
Substitute replaces the string s by a new string in which matching patterns are replaced by the repla...
Definition: TPRegexp.cxx:468
TSocket * s1
Definition: hserv2.C:36
unsigned int r1[N_CITIES]
Definition: simanTSP.cxx:321
Double_t r5
Definition: parallelcoord.C:13
TH1F * s2
Definition: threadsh2.C:15
TObject * At(Int_t idx) const
Definition: TObjArray.h:167