diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-10-31 12:36:56 +0200 |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-10-31 12:36:56 +0200 |
commit | 4b8f8949b43715f1b0f0ef77e15e19c180ccc195 (patch) | |
tree | bebc1eda94d11692278f03c41c683b5b8ca815dd /Modules/_sre.c | |
parent | 455de40a6e99ad7548e6061733f9c5dae2327e83 (diff) | |
download | cpython-git-4b8f8949b43715f1b0f0ef77e15e19c180ccc195.tar.gz |
Issue #17381: Fixed handling of case-insensitive ranges in regular expressions.
Added new opcode RANGE_IGNORE.
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r-- | Modules/_sre.c | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 0dc5212e45..63778f4e6b 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -113,6 +113,11 @@ static unsigned int sre_lower(unsigned int ch) return ((ch) < 128 ? Py_TOLOWER(ch) : ch); } +static unsigned int sre_upper(unsigned int ch) +{ + return ((ch) < 128 ? Py_TOUPPER(ch) : ch); +} + /* locale-specific character predicates */ /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids * warnings when c's type supports only numbers < N+1 */ @@ -124,6 +129,11 @@ static unsigned int sre_lower_locale(unsigned int ch) return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); } +static unsigned int sre_upper_locale(unsigned int ch) +{ + return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch); +} + /* unicode-specific character predicates */ #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch) @@ -137,6 +147,11 @@ static unsigned int sre_lower_unicode(unsigned int ch) return (unsigned int) Py_UNICODE_TOLOWER(ch); } +static unsigned int sre_upper_unicode(unsigned int ch) +{ + return (unsigned int) Py_UNICODE_TOUPPER(ch); +} + LOCAL(int) sre_category(SRE_CODE category, unsigned int ch) { @@ -377,12 +392,18 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->pos = start; state->endpos = end; - if (pattern->flags & SRE_FLAG_LOCALE) + if (pattern->flags & SRE_FLAG_LOCALE) { state->lower = sre_lower_locale; - else if (pattern->flags & SRE_FLAG_UNICODE) + state->upper = sre_upper_locale; + } + else if (pattern->flags & SRE_FLAG_UNICODE) { state->lower = sre_lower_unicode; - else + state->upper = sre_upper_unicode; + } + else { state->lower = sre_lower; + state->upper = sre_upper; + } return string; err: @@ -1567,6 +1588,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end) break; case SRE_OP_RANGE: + case SRE_OP_RANGE_IGNORE: GET_ARG; GET_ARG; break; |