diff options
Diffstat (limited to 'Lib/re.py')
-rw-r--r-- | Lib/re.py | 50 |
1 files changed, 29 insertions, 21 deletions
@@ -69,8 +69,11 @@ resulting RE will match the second character. In string patterns without the ASCII flag, it will match the whole range of Unicode digits. \D Matches any non-digit character; equivalent to [^\d]. - \s Matches any whitespace character; equivalent to [ \t\n\r\f\v]. - \S Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v]. + \s Matches any whitespace character; equivalent to [ \t\n\r\f\v] in + bytes patterns or string patterns with the ASCII flag. + In string patterns without the ASCII flag, it will match the whole + range of Unicode whitespace characters. + \S Matches any non-whitespace character; equivalent to [^\s]. \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_] in bytes patterns or string patterns with the ASCII flag. In string patterns without the ASCII flag, it will match the @@ -118,6 +121,7 @@ This module also defines an exception 'error'. import sys import sre_compile import sre_parse +import functools # public symbols __all__ = [ "match", "search", "sub", "subn", "split", "findall", @@ -178,14 +182,19 @@ def subn(pattern, repl, string, count=0, flags=0): def split(pattern, string, maxsplit=0, flags=0): """Split the source string by the occurrences of the pattern, - returning a list containing the resulting substrings.""" + returning a list containing the resulting substrings. If + capturing parentheses are used in pattern, then the text of all + groups in the pattern are also returned as part of the resulting + list. If maxsplit is nonzero, at most maxsplit splits occur, + and the remainder of the string is returned as the final element + of the list.""" return _compile(pattern, flags).split(string, maxsplit) def findall(pattern, string, flags=0): """Return a list of all non-overlapping matches in the string. - If one or more groups are present in the pattern, return a - list of groups; this will be a list of tuples if the pattern + If one or more capturing groups are present in the pattern, return + a list of groups; this will be a list of tuples if the pattern has more than one group. Empty matches are included in the result.""" @@ -205,7 +214,7 @@ def compile(pattern, flags=0): return _compile(pattern, flags) def purge(): - "Clear the regular expression cache" + "Clear the regular expression caches" _cache.clear() _cache_repl.clear() @@ -253,15 +262,14 @@ _cache_repl = {} _pattern_type = type(sre_compile.compile("", 0)) -_MAXCACHE = 100 +_MAXCACHE = 512 -def _compile(*key): +def _compile(pattern, flags): # internal: compile pattern - cachekey = (type(key[0]),) + key - p = _cache.get(cachekey) - if p is not None: - return p - pattern, flags = key + try: + return _cache[type(pattern), pattern, flags] + except KeyError: + pass if isinstance(pattern, _pattern_type): if flags: raise ValueError( @@ -272,19 +280,19 @@ def _compile(*key): p = sre_compile.compile(pattern, flags) if len(_cache) >= _MAXCACHE: _cache.clear() - _cache[cachekey] = p + _cache[type(pattern), pattern, flags] = p return p -def _compile_repl(*key): +def _compile_repl(repl, pattern): # internal: compile replacement pattern - p = _cache_repl.get(key) - if p is not None: - return p - repl, pattern = key + try: + return _cache_repl[repl, pattern] + except KeyError: + pass p = sre_parse.parse_template(repl, pattern) if len(_cache_repl) >= _MAXCACHE: _cache_repl.clear() - _cache_repl[key] = p + _cache_repl[repl, pattern] = p return p def _expand(pattern, match, template): @@ -342,7 +350,7 @@ class Scanner: if i == j: break action = self.lexicon[m.lastindex-1][1] - if hasattr(action, "__call__"): + if callable(action): self.match = m action = action(self, m.group()) if action is not None: |