summaryrefslogtreecommitdiff
path: root/Modules/_sre.c
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-10-31 12:36:56 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2014-10-31 12:36:56 +0200
commit4b8f8949b43715f1b0f0ef77e15e19c180ccc195 (patch)
treebebc1eda94d11692278f03c41c683b5b8ca815dd /Modules/_sre.c
parent455de40a6e99ad7548e6061733f9c5dae2327e83 (diff)
downloadcpython-git-4b8f8949b43715f1b0f0ef77e15e19c180ccc195.tar.gz
Issue #17381: Fixed handling of case-insensitive ranges in regular expressions.
Added new opcode RANGE_IGNORE.
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c28
1 files changed, 25 insertions, 3 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 0dc5212e45..63778f4e6b 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -113,6 +113,11 @@ static unsigned int sre_lower(unsigned int ch)
return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
}
+static unsigned int sre_upper(unsigned int ch)
+{
+ return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
+}
+
/* locale-specific character predicates */
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
* warnings when c's type supports only numbers < N+1 */
@@ -124,6 +129,11 @@ static unsigned int sre_lower_locale(unsigned int ch)
return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
}
+static unsigned int sre_upper_locale(unsigned int ch)
+{
+ return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
+}
+
/* unicode-specific character predicates */
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
@@ -137,6 +147,11 @@ static unsigned int sre_lower_unicode(unsigned int ch)
return (unsigned int) Py_UNICODE_TOLOWER(ch);
}
+static unsigned int sre_upper_unicode(unsigned int ch)
+{
+ return (unsigned int) Py_UNICODE_TOUPPER(ch);
+}
+
LOCAL(int)
sre_category(SRE_CODE category, unsigned int ch)
{
@@ -377,12 +392,18 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->pos = start;
state->endpos = end;
- if (pattern->flags & SRE_FLAG_LOCALE)
+ if (pattern->flags & SRE_FLAG_LOCALE) {
state->lower = sre_lower_locale;
- else if (pattern->flags & SRE_FLAG_UNICODE)
+ state->upper = sre_upper_locale;
+ }
+ else if (pattern->flags & SRE_FLAG_UNICODE) {
state->lower = sre_lower_unicode;
- else
+ state->upper = sre_upper_unicode;
+ }
+ else {
state->lower = sre_lower;
+ state->upper = sre_upper;
+ }
return string;
err:
@@ -1567,6 +1588,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
break;
case SRE_OP_RANGE:
+ case SRE_OP_RANGE_IGNORE:
GET_ARG;
GET_ARG;
break;