summaryrefslogtreecommitdiff
path: root/Lib/sre_parse.py
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 +0000
committerAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 +0000
commitfd036451bf0e0ade8783e21df801abf7be96d020 (patch)
treee70ff65a9e641d8e790bc091f0dc2507baf344ca /Lib/sre_parse.py
parent3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff)
downloadcpython-git-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Lib/sre_parse.py')
-rw-r--r--Lib/sre_parse.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index ffa8902b7b..9d6e631ef1 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -64,6 +64,7 @@ FLAGS = {
"s": SRE_FLAG_DOTALL,
"x": SRE_FLAG_VERBOSE,
# extensions
+ "a": SRE_FLAG_ASCII,
"t": SRE_FLAG_TEMPLATE,
"u": SRE_FLAG_UNICODE,
}
@@ -672,6 +673,18 @@ def _parse(source, state):
return subpattern
+def fix_flags(src, flags):
+ # Check and fix flags according to the type of pattern (str or bytes)
+ if isinstance(src, str):
+ if not flags & SRE_FLAG_ASCII:
+ flags |= SRE_FLAG_UNICODE
+ elif flags & SRE_FLAG_UNICODE:
+ raise ValueError("ASCII and UNICODE flags are incompatible")
+ else:
+ if flags & SRE_FLAG_UNICODE:
+ raise ValueError("can't use UNICODE flag with a bytes pattern")
+ return flags
+
def parse(str, flags=0, pattern=None):
# parse 're' pattern into list of (opcode, argument) tuples
@@ -683,6 +696,7 @@ def parse(str, flags=0, pattern=None):
pattern.str = str
p = _parse_sub(source, pattern, 0)
+ p.pattern.flags = fix_flags(str, p.pattern.flags)
tail = source.get()
if tail == ")":