summaryrefslogtreecommitdiff
path: root/Lib/re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/re.py')
-rw-r--r--Lib/re.py50
1 files changed, 29 insertions, 21 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 309afef125..b9106061c8 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -69,8 +69,11 @@ resulting RE will match the second character.
In string patterns without the ASCII flag, it will match the whole
range of Unicode digits.
\D Matches any non-digit character; equivalent to [^\d].
- \s Matches any whitespace character; equivalent to [ \t\n\r\f\v].
- \S Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
+ \s Matches any whitespace character; equivalent to [ \t\n\r\f\v] in
+ bytes patterns or string patterns with the ASCII flag.
+ In string patterns without the ASCII flag, it will match the whole
+ range of Unicode whitespace characters.
+ \S Matches any non-whitespace character; equivalent to [^\s].
\w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]
in bytes patterns or string patterns with the ASCII flag.
In string patterns without the ASCII flag, it will match the
@@ -118,6 +121,7 @@ This module also defines an exception 'error'.
import sys
import sre_compile
import sre_parse
+import functools
# public symbols
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
@@ -178,14 +182,19 @@ def subn(pattern, repl, string, count=0, flags=0):
def split(pattern, string, maxsplit=0, flags=0):
"""Split the source string by the occurrences of the pattern,
- returning a list containing the resulting substrings."""
+ returning a list containing the resulting substrings. If
+ capturing parentheses are used in pattern, then the text of all
+ groups in the pattern are also returned as part of the resulting
+ list. If maxsplit is nonzero, at most maxsplit splits occur,
+ and the remainder of the string is returned as the final element
+ of the list."""
return _compile(pattern, flags).split(string, maxsplit)
def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string.
- If one or more groups are present in the pattern, return a
- list of groups; this will be a list of tuples if the pattern
+ If one or more capturing groups are present in the pattern, return
+ a list of groups; this will be a list of tuples if the pattern
has more than one group.
Empty matches are included in the result."""
@@ -205,7 +214,7 @@ def compile(pattern, flags=0):
return _compile(pattern, flags)
def purge():
- "Clear the regular expression cache"
+ "Clear the regular expression caches"
_cache.clear()
_cache_repl.clear()
@@ -253,15 +262,14 @@ _cache_repl = {}
_pattern_type = type(sre_compile.compile("", 0))
-_MAXCACHE = 100
+_MAXCACHE = 512
-def _compile(*key):
+def _compile(pattern, flags):
# internal: compile pattern
- cachekey = (type(key[0]),) + key
- p = _cache.get(cachekey)
- if p is not None:
- return p
- pattern, flags = key
+ try:
+ return _cache[type(pattern), pattern, flags]
+ except KeyError:
+ pass
if isinstance(pattern, _pattern_type):
if flags:
raise ValueError(
@@ -272,19 +280,19 @@ def _compile(*key):
p = sre_compile.compile(pattern, flags)
if len(_cache) >= _MAXCACHE:
_cache.clear()
- _cache[cachekey] = p
+ _cache[type(pattern), pattern, flags] = p
return p
-def _compile_repl(*key):
+def _compile_repl(repl, pattern):
# internal: compile replacement pattern
- p = _cache_repl.get(key)
- if p is not None:
- return p
- repl, pattern = key
+ try:
+ return _cache_repl[repl, pattern]
+ except KeyError:
+ pass
p = sre_parse.parse_template(repl, pattern)
if len(_cache_repl) >= _MAXCACHE:
_cache_repl.clear()
- _cache_repl[key] = p
+ _cache_repl[repl, pattern] = p
return p
def _expand(pattern, match, template):
@@ -342,7 +350,7 @@ class Scanner:
if i == j:
break
action = self.lexicon[m.lastindex-1][1]
- if hasattr(action, "__call__"):
+ if callable(action):
self.match = m
action = action(self, m.group())
if action is not None: