diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 +0000 |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 +0000 |
commit | fd036451bf0e0ade8783e21df801abf7be96d020 (patch) | |
tree | e70ff65a9e641d8e790bc091f0dc2507baf344ca /Lib/tokenize.py | |
parent | 3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff) | |
download | cpython-git-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz |
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII
flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 31366deb1f..ec5a79a645 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -47,21 +47,23 @@ def group(*choices): return '(' + '|'.join(choices) + ')' def any(*choices): return group(*choices) + '*' def maybe(*choices): return group(*choices) + '?' +# Note: we use unicode matching for names ("\w") but ascii matching for +# number literals. Whitespace = r'[ \f\t]*' Comment = r'#[^\r\n]*' Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) Name = r'[a-zA-Z_]\w*' -Hexnumber = r'0[xX][\da-fA-F]+' +Hexnumber = r'0[xX][0-9a-fA-F]+' Binnumber = r'0[bB][01]+' Octnumber = r'0[oO][0-7]+' -Decnumber = r'(?:0+|[1-9]\d*)' +Decnumber = r'(?:0+|[1-9][0-9]*)' Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?\d+' -Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent) -Expfloat = r'\d+' + Exponent +Exponent = r'[eE][-+]?[0-9]+' +Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) +Expfloat = r'[0-9]+' + Exponent Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]') +Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) # Tail end of ' string. |