summaryrefslogtreecommitdiff
path: root/Modules
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 +0000
committerAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 +0000
commitfd036451bf0e0ade8783e21df801abf7be96d020 (patch)
treee70ff65a9e641d8e790bc091f0dc2507baf344ca /Modules
parent3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff)
downloadcpython-git-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_sre.c27
-rw-r--r--Modules/sre.h1
2 files changed, 25 insertions, 3 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 64fc5132c8..2a54d8e603 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1691,7 +1691,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
/* get pointer to string buffer */
view.len = -1;
buffer = Py_TYPE(string)->tp_as_buffer;
- if (!buffer || !buffer->bf_getbuffer ||
+ if (!buffer || !buffer->bf_getbuffer ||
(*buffer->bf_getbuffer)(string, &view, PyBUF_SIMPLE) < 0) {
PyErr_SetString(PyExc_TypeError, "expected string or buffer");
return NULL;
@@ -1717,7 +1717,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
if (PyBytes_Check(string) || bytes == size)
charsize = 1;
#if defined(HAVE_UNICODE)
- else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
+ else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
charsize = sizeof(Py_UNICODE);
#endif
else {
@@ -1729,7 +1729,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
*p_charsize = charsize;
if (ptr == NULL) {
- PyErr_SetString(PyExc_ValueError,
+ PyErr_SetString(PyExc_ValueError,
"Buffer is NULL");
}
return ptr;
@@ -1754,6 +1754,17 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
if (!ptr)
return NULL;
+ if (charsize == 1 && pattern->charsize > 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't use a string pattern on a bytes-like object");
+ return NULL;
+ }
+ if (charsize > 1 && pattern->charsize == 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't use a bytes pattern on a string-like object");
+ return NULL;
+ }
+
/* adjust boundaries */
if (start < 0)
start = 0;
@@ -2682,6 +2693,16 @@ _compile(PyObject* self_, PyObject* args)
return NULL;
}
+ if (pattern == Py_None)
+ self->charsize = -1;
+ else {
+ Py_ssize_t p_length;
+ if (!getstring(pattern, &p_length, &self->charsize)) {
+ PyObject_DEL(self);
+ return NULL;
+ }
+ }
+
Py_INCREF(pattern);
self->pattern = pattern;
diff --git a/Modules/sre.h b/Modules/sre.h
index d4af05c045..518c11db30 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -30,6 +30,7 @@ typedef struct {
PyObject* pattern; /* pattern source (or None) */
int flags; /* flags used when compiling pattern source */
PyObject *weakreflist; /* List of weak references */
+ int charsize; /* pattern charsize (or -1) */
/* pattern code */
Py_ssize_t codesize;
SRE_CODE code[1];