diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 +0000 |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 +0000 |
commit | fd036451bf0e0ade8783e21df801abf7be96d020 (patch) | |
tree | e70ff65a9e641d8e790bc091f0dc2507baf344ca /Lib/sre_parse.py | |
parent | 3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff) | |
download | cpython-git-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz |
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII
flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Lib/sre_parse.py')
-rw-r--r-- | Lib/sre_parse.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index ffa8902b7b..9d6e631ef1 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -64,6 +64,7 @@ FLAGS = { "s": SRE_FLAG_DOTALL, "x": SRE_FLAG_VERBOSE, # extensions + "a": SRE_FLAG_ASCII, "t": SRE_FLAG_TEMPLATE, "u": SRE_FLAG_UNICODE, } @@ -672,6 +673,18 @@ def _parse(source, state): return subpattern +def fix_flags(src, flags): + # Check and fix flags according to the type of pattern (str or bytes) + if isinstance(src, str): + if not flags & SRE_FLAG_ASCII: + flags |= SRE_FLAG_UNICODE + elif flags & SRE_FLAG_UNICODE: + raise ValueError("ASCII and UNICODE flags are incompatible") + else: + if flags & SRE_FLAG_UNICODE: + raise ValueError("can't use UNICODE flag with a bytes pattern") + return flags + def parse(str, flags=0, pattern=None): # parse 're' pattern into list of (opcode, argument) tuples @@ -683,6 +696,7 @@ def parse(str, flags=0, pattern=None): pattern.str = str p = _parse_sub(source, pattern, 0) + p.pattern.flags = fix_flags(str, p.pattern.flags) tail = source.get() if tail == ")": |