33#
44# re-compatible interface for the sre matching engine
55#
6- # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
6+ # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
77#
88# This version of the SRE library can be redistributed under CNRI's
99# Python 1.6 license. For any other use, please contact Secret Labs
1414# other compatibility work.
1515#
1616
17- # FIXME: change all FIXME's to XXX ;-)
18-
1917import sre_compile
2018import sre_parse
2119
20+ # public symbols
21+ __all__ = ["match" ,"search" ,"sub" ,"subn" ,"split" ,"findall" ,
22+ "compile" ,"purge" ,"template" ,"escape" ,"I" ,"L" ,"M" ,"S" ,"X" ,
23+ "U" ,"IGNORECASE" ,"LOCALE" ,"MULTILINE" ,"DOTALL" ,"VERBOSE" ,
24+ "UNICODE" ,"error" ]
25+
26+ __version__ = "2.1b2"
27+
28+ # this module works under 1.5.2 and later. don't use string methods
2229import string
2330
2431# flags
25- I = IGNORECASE = sre_compile .SRE_FLAG_IGNORECASE
26- L = LOCALE = sre_compile .SRE_FLAG_LOCALE
27- M = MULTILINE = sre_compile .SRE_FLAG_MULTILINE
28- S = DOTALL = sre_compile .SRE_FLAG_DOTALL
29- X = VERBOSE = sre_compile .SRE_FLAG_VERBOSE
32+ I = IGNORECASE = sre_compile .SRE_FLAG_IGNORECASE # ignore case
33+ L = LOCALE = sre_compile .SRE_FLAG_LOCALE # assume current 8-bit locale
34+ U = UNICODE = sre_compile .SRE_FLAG_UNICODE # assume unicode locale
35+ M = MULTILINE = sre_compile .SRE_FLAG_MULTILINE # make anchors look for newline
36+ S = DOTALL = sre_compile .SRE_FLAG_DOTALL # make dot match newline
37+ X = VERBOSE = sre_compile .SRE_FLAG_VERBOSE # ignore whitespace and comments
3038
31- # sre extensions (may or may not be in 1.6/2.0 final )
32- T = TEMPLATE = sre_compile .SRE_FLAG_TEMPLATE
33- U = UNICODE = sre_compile .SRE_FLAG_UNICODE
39+ # sre extensions (experimental, don't rely on these )
40+ T = TEMPLATE = sre_compile .SRE_FLAG_TEMPLATE # disable backtracking
41+ DEBUG = sre_compile .SRE_FLAG_DEBUG # dump pattern after compilation
3442
3543# sre exception
3644error = sre_compile .error
3745
3846# --------------------------------------------------------------------
3947# public interface
4048
41- # FIXME: add docstrings
42-
4349def match (pattern ,string ,flags = 0 ):
50+ """Try to apply the pattern at the start of the string, returning
51+ a match object, or None if no match was found."""
4452return _compile (pattern ,flags ).match (string )
4553
4654def search (pattern ,string ,flags = 0 ):
55+ """Scan through string looking for a match to the pattern, returning
56+ a match object, or None if no match was found."""
4757return _compile (pattern ,flags ).search (string )
4858
4959def sub (pattern ,repl ,string ,count = 0 ):
60+ """Return the string obtained by replacing the leftmost
61+ non-overlapping occurrences of the pattern in string by the
62+ replacement repl"""
5063return _compile (pattern ,0 ).sub (repl ,string ,count )
5164
5265def subn (pattern ,repl ,string ,count = 0 ):
66+ """Return a 2-tuple containing (new_string, number).
67+ new_string is the string obtained by replacing the leftmost
68+ non-overlapping occurrences of the pattern in the source
69+ string by the replacement repl. number is the number of
70+ substitutions that were made."""
5371return _compile (pattern ,0 ).subn (repl ,string ,count )
5472
5573def split (pattern ,string ,maxsplit = 0 ):
74+ """Split the source string by the occurrences of the pattern,
75+ returning a list containing the resulting substrings."""
5676return _compile (pattern ,0 ).split (string ,maxsplit )
5777
5878def findall (pattern ,string ,maxsplit = 0 ):
79+ """Return a list of all non-overlapping matches in the string.
80+
81+ If one or more groups are present in the pattern, return a
82+ list of groups; this will be a list of tuples if the pattern
83+ has more than one group.
84+
85+ Empty matches are included in the result."""
5986return _compile (pattern ,0 ).findall (string ,maxsplit )
6087
6188def compile (pattern ,flags = 0 ):
89+ "Compile a regular expression pattern, returning a pattern object."
6290return _compile (pattern ,flags )
6391
6492def purge ():
93+ "Clear the regular expression cache"
6594_cache .clear ()
95+ _cache_repl .clear ()
6696
6797def template (pattern ,flags = 0 ):
98+ "Compile a template pattern, returning a pattern object"
6899return _compile (pattern ,flags | T )
69100
70101def escape (pattern ):
102+ "Escape all non-alphanumeric characters in pattern."
71103s = list (pattern )
72104for i in range (len (pattern )):
73105c = pattern [i ]
@@ -82,6 +114,8 @@ def escape(pattern):
82114# internals
83115
84116_cache = {}
117+ _cache_repl = {}
118+
85119_MAXCACHE = 100
86120
87121def _join (seq ,sep ):
@@ -105,6 +139,21 @@ def _compile(*key):
105139_cache [key ]= p
106140return p
107141
142+ def _compile_repl (* key ):
143+ # internal: compile replacement pattern
144+ p = _cache_repl .get (key )
145+ if p is not None :
146+ return p
147+ repl ,pattern = key
148+ try :
149+ p = sre_parse .parse_template (repl ,pattern )
150+ except error ,v :
151+ raise error ,v # invalid expression
152+ if len (_cache_repl )>= _MAXCACHE :
153+ _cache_repl .clear ()
154+ _cache_repl [key ]= p
155+ return p
156+
108157def _expand (pattern ,match ,template ):
109158# internal: match.expand implementation hook
110159template = sre_parse .parse_template (template ,pattern )
@@ -119,7 +168,7 @@ def _subn(pattern, template, string, count=0):
119168if callable (template ):
120169filter = template
121170else :
122- template = sre_parse . parse_template (template ,pattern )
171+ template = _compile_repl (template ,pattern )
123172def filter (match ,template = template ):
124173return sre_parse .expand_template (template ,match )
125174n = i = 0
@@ -158,7 +207,7 @@ def _split(pattern, string, maxsplit=0):
158207continue
159208append (string [i :b ])
160209if g and b != e :
161- extend (m .groups ())
210+ extend (list ( m .groups () ))
162211i = e
163212n = n + 1
164213append (string [i :])
@@ -204,7 +253,7 @@ def scan(self, string):
204253break
205254action = self .lexicon [m .lastindex ][1 ]
206255if callable (action ):
207- self .match = match
256+ self .match = m
208257action = action (self ,m .group ())
209258if action is not None :
210259append (action )