diff options
author | Armin Rigo <arigo@tunes.org> | 2006-10-04 11:44:06 +0000 |
---|---|---|
committer | Armin Rigo <arigo@tunes.org> | 2006-10-04 11:44:06 +0000 |
commit | 4b63c21d6faebc406260733c16b826e3e01b0d89 (patch) | |
tree | bdeaa6389ccc7075550272904869d32302046572 /Objects | |
parent | c6f2f884b4789c4000ffb30a85646f088da102b1 (diff) | |
download | cpython-git-4b63c21d6faebc406260733c16b826e3e01b0d89.tar.gz |
Forward-port of r52136: a review of overflow-detecting code.
* unified the way intobject, longobject and mystrtoul handle
values around -sys.maxint-1.
* in general, trying to entierely avoid overflows in any computation
involving signed ints or longs is extremely involved. Fixed a few
simple cases where a compiler might be too clever (but that's all
guesswork).
* more overflow checks against bad data in marshal.c.
* 2.5 specific: fixed a number of places that were still confusing int
and Py_ssize_t. Some of them could potentially have caused
"real-world" breakage.
* list.pop(x): fixing overflow issues on x was messy. I just reverted
to PyArg_ParseTuple("n"), which does the right thing. (An obscure
test was trying to give a Decimal to list.pop()... doesn't make
sense any more IMHO)
* trying to write a few tests...
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/abstract.c | 14 | ||||
-rw-r--r-- | Objects/fileobject.c | 8 | ||||
-rw-r--r-- | Objects/intobject.c | 24 | ||||
-rw-r--r-- | Objects/listobject.c | 11 | ||||
-rw-r--r-- | Objects/longobject.c | 71 | ||||
-rw-r--r-- | Objects/stringobject.c | 25 | ||||
-rw-r--r-- | Objects/typeobject.c | 17 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 7 |
8 files changed, 105 insertions, 72 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index a18bb787ea..7115c523c2 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1652,20 +1652,18 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation) if (cmp > 0) { switch (operation) { case PY_ITERSEARCH_COUNT: - ++n; - if (n <= 0) { - /* XXX(nnorwitz): int means ssize_t */ + if (n == PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, - "count exceeds C int size"); + "count exceeds C integer size"); goto Fail; } + ++n; break; case PY_ITERSEARCH_INDEX: if (wrapped) { - /* XXX(nnorwitz): int means ssize_t */ PyErr_SetString(PyExc_OverflowError, - "index exceeds C int size"); + "index exceeds C integer size"); goto Fail; } goto Done; @@ -1680,9 +1678,9 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation) } if (operation == PY_ITERSEARCH_INDEX) { - ++n; - if (n <= 0) + if (n == PY_SSIZE_T_MAX) wrapped = 1; + ++n; } } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index b43bf85b87..ced07684fd 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -1001,6 +1001,7 @@ getline_via_fgets(FILE *fp) size_t nfree; /* # of free buffer slots; pvend-pvfree */ size_t total_v_size; /* total # of slots in buffer */ size_t increment; /* amount to increment the buffer */ + size_t prev_v_size; /* Optimize for normal case: avoid _PyString_Resize if at all * possible via first reading into stack buffer "buf". @@ -1115,8 +1116,11 @@ getline_via_fgets(FILE *fp) /* expand buffer and try again */ assert(*(pvend-1) == '\0'); increment = total_v_size >> 2; /* mild exponential growth */ + prev_v_size = total_v_size; total_v_size += increment; - if (total_v_size > PY_SSIZE_T_MAX) { + /* check for overflow */ + if (total_v_size <= prev_v_size || + total_v_size > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, "line is longer than a Python string can hold"); Py_DECREF(v); @@ -1125,7 +1129,7 @@ getline_via_fgets(FILE *fp) if (_PyString_Resize(&v, (int)total_v_size) < 0) return NULL; /* overwrite the trailing null byte */ - pvfree = BUF(v) + (total_v_size - increment - 1); + pvfree = BUF(v) + (prev_v_size - 1); } if (BUF(v) + total_v_size != p) _PyString_Resize(&v, p - BUF(v)); diff --git a/Objects/intobject.c b/Objects/intobject.c index 28f7606710..a4d50be129 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -546,6 +546,17 @@ int_mul(PyObject *v, PyObject *w) } } +/* Integer overflow checking for unary negation: on a 2's-complement + * box, -x overflows iff x is the most negative long. In this case we + * get -x == x. However, -x is undefined (by C) if x /is/ the most + * negative long (it's a signed overflow case), and some compilers care. + * So we cast x to unsigned long first. However, then other compilers + * warn about applying unary minus to an unsigned operand. Hence the + * weird "0-". + */ +#define UNARY_NEG_WOULD_OVERFLOW(x) \ + ((x) < 0 && (unsigned long)(x) == 0-(unsigned long)(x)) + /* Return type of i_divmod */ enum divmod_result { DIVMOD_OK, /* Correct result */ @@ -564,14 +575,8 @@ i_divmod(register long x, register long y, "integer division or modulo by zero"); return DIVMOD_ERROR; } - /* (-sys.maxint-1)/-1 is the only overflow case. x is the most - * negative long iff x < 0 and, on a 2's-complement box, x == -x. - * However, -x is undefined (by C) if x /is/ the most negative long - * (it's a signed overflow case), and some compilers care. So we cast - * x to unsigned long first. However, then other compilers warn about - * applying unary minus to an unsigned operand. Hence the weird "0-". - */ - if (y == -1 && x < 0 && (unsigned long)x == 0-(unsigned long)x) + /* (-sys.maxint-1)/-1 is the only overflow case. */ + if (y == -1 && UNARY_NEG_WOULD_OVERFLOW(x)) return DIVMOD_OVERFLOW; xdivy = x / y; xmody = x - xdivy * y; @@ -762,7 +767,8 @@ int_neg(PyIntObject *v) { register long a; a = v->ob_ival; - if (a < 0 && (unsigned long)a == 0-(unsigned long)a) { + /* check for overflow */ + if (UNARY_NEG_WOULD_OVERFLOW(a)) { PyObject *o = PyLong_FromLong(a); if (o != NULL) { PyObject *result = PyNumber_Negative(o); diff --git a/Objects/listobject.c b/Objects/listobject.c index ad276447d3..7afea121cd 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -863,17 +863,12 @@ static PyObject * listpop(PyListObject *self, PyObject *args) { Py_ssize_t i = -1; - PyObject *v, *arg = NULL; + PyObject *v; int status; - if (!PyArg_UnpackTuple(args, "pop", 0, 1, &arg)) + if (!PyArg_ParseTuple(args, "|n:pop", &i)) return NULL; - if (arg != NULL) { - if (PyInt_Check(arg)) - i = PyInt_AS_LONG((PyIntObject*) arg); - else if (!PyArg_ParseTuple(args, "|n:pop", &i)) - return NULL; - } + if (self->ob_size == 0) { /* Special-case most common failure cause */ PyErr_SetString(PyExc_IndexError, "pop from empty list"); diff --git a/Objects/longobject.c b/Objects/longobject.c index e32c42566e..4d886cd724 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -193,6 +193,18 @@ PyLong_FromDouble(double dval) return (PyObject *)v; } +/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define + * anything about what happens when a signed integer operation overflows, + * and some compilers think they're doing you a favor by being "clever" + * then. The bit pattern for the largest postive signed long is + * (unsigned long)LONG_MAX, and for the smallest negative signed long + * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN. + * However, some other compilers warn about applying unary minus to an + * unsigned operand. Hence the weird "0-". + */ +#define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) +#define PY_ABS_SSIZE_T_MIN (0-(size_t)PY_SSIZE_T_MIN) + /* Get a C long int from a long int object. Returns -1 and sets an error condition if overflow occurs. */ @@ -225,14 +237,16 @@ PyLong_AsLong(PyObject *vv) if ((x >> SHIFT) != prev) goto overflow; } - /* Haven't lost any bits, but if the sign bit is set we're in - * trouble *unless* this is the min negative number. So, - * trouble iff sign bit set && (positive || some bit set other - * than the sign bit). - */ - if ((long)x < 0 && (sign > 0 || (x << 1) != 0)) - goto overflow; - return (long)x * sign; + /* Haven't lost any bits, but casting to long requires extra care + * (see comment above). + */ + if (x <= (unsigned long)LONG_MAX) { + return (long)x * sign; + } + else if (sign < 0 && x == PY_ABS_LONG_MIN) { + return LONG_MIN; + } + /* else overflow */ overflow: PyErr_SetString(PyExc_OverflowError, @@ -268,14 +282,16 @@ _PyLong_AsSsize_t(PyObject *vv) { if ((x >> SHIFT) != prev) goto overflow; } - /* Haven't lost any bits, but if the sign bit is set we're in - * trouble *unless* this is the min negative number. So, - * trouble iff sign bit set && (positive || some bit set other - * than the sign bit). + /* Haven't lost any bits, but casting to a signed type requires + * extra care (see comment above). */ - if ((Py_ssize_t)x < 0 && (sign > 0 || (x << 1) != 0)) - goto overflow; - return (Py_ssize_t)x * sign; + if (x <= (size_t)PY_SSIZE_T_MAX) { + return (Py_ssize_t)x * sign; + } + else if (sign < 0 && x == PY_ABS_SSIZE_T_MIN) { + return PY_SSIZE_T_MIN; + } + /* else overflow */ overflow: PyErr_SetString(PyExc_OverflowError, @@ -1167,7 +1183,7 @@ long_format(PyObject *aa, int base, int addL) { register PyLongObject *a = (PyLongObject *)aa; PyStringObject *str; - Py_ssize_t i; + Py_ssize_t i, j, sz; Py_ssize_t size_a; char *p; int bits; @@ -1187,11 +1203,18 @@ long_format(PyObject *aa, int base, int addL) ++bits; i >>= 1; } - i = 5 + (addL ? 1 : 0) + (size_a*SHIFT + bits-1) / bits; - str = (PyStringObject *) PyString_FromStringAndSize((char *)0, i); + i = 5 + (addL ? 1 : 0); + j = size_a*SHIFT + bits-1; + sz = i + j / bits; + if (j / SHIFT < size_a || sz < i) { + PyErr_SetString(PyExc_OverflowError, + "long is too large to format"); + return NULL; + } + str = (PyStringObject *) PyString_FromStringAndSize((char *)0, sz); if (str == NULL) return NULL; - p = PyString_AS_STRING(str) + i; + p = PyString_AS_STRING(str) + sz; *p = '\0'; if (addL) *--p = 'L'; @@ -1305,7 +1328,7 @@ long_format(PyObject *aa, int base, int addL) } while ((*q++ = *p++) != '\0'); q--; _PyString_Resize((PyObject **)&str, - (int) (q - PyString_AS_STRING(str))); + (Py_ssize_t) (q - PyString_AS_STRING(str))); } return (PyObject *)str; } @@ -1363,14 +1386,14 @@ long_from_binary_base(char **str, int base) while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base) ++p; *str = p; - n = (p - start) * bits_per_char; - if (n / bits_per_char != p - start) { + /* n <- # of Python digits needed, = ceiling(n/SHIFT). */ + n = (p - start) * bits_per_char + SHIFT - 1; + if (n / bits_per_char < p - start) { PyErr_SetString(PyExc_ValueError, "long string too large to convert"); return NULL; } - /* n <- # of Python digits needed, = ceiling(n/SHIFT). */ - n = (n + SHIFT - 1) / SHIFT; + n = n / SHIFT; z = _PyLong_New(n); if (z == NULL) return NULL; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 4c2faf4543..aa2fd872dd 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -804,10 +804,22 @@ string_print(PyStringObject *op, FILE *fp, int flags) return ret; } if (flags & Py_PRINT_RAW) { + char *data = op->ob_sval; + Py_ssize_t size = op->ob_size; + while (size > INT_MAX) { + /* Very long strings cannot be written atomically. + * But don't write exactly INT_MAX bytes at a time + * to avoid memory aligment issues. + */ + const int chunk_size = INT_MAX & ~0x3FFF; + fwrite(data, 1, chunk_size, fp); + data += chunk_size; + size -= chunk_size; + } #ifdef __VMS - if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp); + if (size) fwrite(data, (int)size, 1, fp); #else - fwrite(op->ob_sval, 1, (int) op->ob_size, fp); + fwrite(data, 1, (int)size, fp); #endif return 0; } @@ -844,7 +856,7 @@ PyString_Repr(PyObject *obj, int smartquotes) register PyStringObject* op = (PyStringObject*) obj; size_t newsize = 2 + 4 * op->ob_size; PyObject *v; - if (newsize > PY_SSIZE_T_MAX) { + if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) { PyErr_SetString(PyExc_OverflowError, "string is too large to make repr"); } @@ -4237,7 +4249,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type, return NULL; } llen = PyString_Size(result); - if (llen > PY_SSIZE_T_MAX) { + if (llen > INT_MAX) { PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); return NULL; } @@ -4726,9 +4738,10 @@ PyString_Format(PyObject *format, PyObject *args) default: PyErr_Format(PyExc_ValueError, "unsupported format character '%c' (0x%x) " - "at index %i", + "at index %zd", c, c, - (int)(fmt - 1 - PyString_AsString(format))); + (Py_ssize_t)(fmt - 1 - + PyString_AsString(format))); goto error; } if (sign) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 4d99f7d67f..d4a46c37f0 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -98,7 +98,7 @@ type_module(PyTypeObject *type, void *context) s = strrchr(type->tp_name, '.'); if (s != NULL) return PyString_FromStringAndSize( - type->tp_name, (int)(s - type->tp_name)); + type->tp_name, (Py_ssize_t)(s - type->tp_name)); return PyString_FromString("__builtin__"); } } @@ -4116,19 +4116,10 @@ slot_sq_length(PyObject *self) return -1; len = PyInt_AsSsize_t(res); Py_DECREF(res); - if (len == -1 && PyErr_Occurred()) - return -1; -#if SIZEOF_SIZE_T < SIZEOF_INT - /* Overflow check -- range of PyInt is more than C ssize_t */ - if (len != (int)len) { - PyErr_SetString(PyExc_OverflowError, - "__len__() should return 0 <= outcome < 2**31"); - return -1; - } -#endif if (len < 0) { - PyErr_SetString(PyExc_ValueError, - "__len__() should return >= 0"); + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_ValueError, + "__len__() should return >= 0"); return -1; } return len; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2ae3f61d23..00f2018da0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2380,6 +2380,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, Py_UNICODE unimax = PyUnicode_GetMax(); #endif + /* XXX overflow detection missing */ v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE); if (v == NULL) goto onError; @@ -3166,6 +3167,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, Py_ssize_t needed = (targetsize - extrachars) + \ (targetsize << 2); extrachars += needed; + /* XXX overflow detection missing */ if (_PyUnicode_Resize(&v, PyUnicode_GET_SIZE(v) + needed) < 0) { Py_DECREF(x); @@ -7758,10 +7760,11 @@ PyObject *PyUnicode_Format(PyObject *format, default: PyErr_Format(PyExc_ValueError, "unsupported format character '%c' (0x%x) " - "at index %i", + "at index %zd", (31<=c && c<=126) ? (char)c : '?', (int)c, - (int)(fmt -1 - PyUnicode_AS_UNICODE(uformat))); + (Py_ssize_t)(fmt - 1 - + PyUnicode_AS_UNICODE(uformat))); goto onError; } if (sign) { |