From ed2cd346034d3af49a364bce5acaee719d88379f Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 25 Aug 2020 14:46:02 +0100 Subject: MAINT: Remove references to PyStringObject --- numpy/core/include/numpy/arrayscalars.h | 3 +-- numpy/core/src/multiarray/scalarapi.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'numpy') diff --git a/numpy/core/include/numpy/arrayscalars.h b/numpy/core/include/numpy/arrayscalars.h index 6dce88df3..b282a2cd4 100644 --- a/numpy/core/include/numpy/arrayscalars.h +++ b/numpy/core/include/numpy/arrayscalars.h @@ -134,8 +134,7 @@ typedef struct { char obval; } PyScalarObject; -#define PyStringScalarObject PyStringObject -#define PyStringScalarObject PyStringObject +#define PyStringScalarObject PyBytesObject typedef struct { /* note that the PyObject_HEAD macro lives right here */ PyUnicodeObject base; diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index 4dee259f8..bcd777183 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -755,8 +755,8 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) } if (PyTypeNum_ISFLEXIBLE(type_num)) { if (type_num == NPY_STRING) { - destptr = PyString_AS_STRING(obj); - ((PyStringObject *)obj)->ob_shash = -1; + destptr = PyBytes_AS_STRING(obj); + ((PyBytesObject *)obj)->ob_shash = -1; memcpy(destptr, data, itemsize); return obj; } -- cgit v1.2.1 From a68a3ada494d9f66625c7ae94d12635021992e3a Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 25 Aug 2020 14:57:20 +0100 Subject: BUG: Do not throw UnicodeError for non-ascii typestrs in __array_interface__ Instead, just throw ValueError like `np.dtype` does This also fixes unsafe error handling of a call to PyArray_DescrConverter2 --- numpy/core/src/multiarray/ctors.c | 67 +++++++++++++------------------------ numpy/core/tests/test_multiarray.py | 12 +++++++ 2 files changed, 35 insertions(+), 44 deletions(-) (limited to 'numpy') diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 6add032bf..c9b1614f4 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1809,37 +1809,19 @@ PyArray_FromStructInterface(PyObject *input) */ NPY_NO_EXPORT int _is_default_descr(PyObject *descr, PyObject *typestr) { - PyObject *tuple, *name, *typestr2; - PyObject *tmp = NULL; - int ret = 0; - if (!PyList_Check(descr) || PyList_GET_SIZE(descr) != 1) { return 0; } - tuple = PyList_GET_ITEM(descr, 0); + PyObject *tuple = PyList_GET_ITEM(descr, 0); if (!(PyTuple_Check(tuple) && PyTuple_GET_SIZE(tuple) == 2)) { return 0; } - name = PyTuple_GET_ITEM(tuple, 0); + PyObject *name = PyTuple_GET_ITEM(tuple, 0); if (!(PyUnicode_Check(name) && PyUnicode_GetLength(name) == 0)) { return 0; } - typestr2 = PyTuple_GET_ITEM(tuple, 1); - /* Allow unicode type strings */ - if (PyUnicode_Check(typestr2)) { - tmp = PyUnicode_AsASCIIString(typestr2); - if (tmp == NULL) { - return 0; - } - typestr2 = tmp; - } - if (PyBytes_Check(typestr2) && - PyObject_RichCompareBool(typestr, typestr2, Py_EQ)) { - ret = 1; - } - Py_XDECREF(tmp); - - return ret; + PyObject *typestr2 = PyTuple_GET_ITEM(tuple, 1); + return PyObject_RichCompareBool(typestr, typestr2, Py_EQ); } /*NUMPY_API*/ @@ -1893,26 +1875,15 @@ PyArray_FromInterface(PyObject *origin) return NULL; } - /* Allow unicode type strings */ - if (PyUnicode_Check(attr)) { - PyObject *tmp = PyUnicode_AsASCIIString(attr); - if (tmp == NULL) { - goto fail; - } - attr = tmp; - } - else { - Py_INCREF(attr); - } - - if (!PyBytes_Check(attr)) { + /* allow bytes for backwards compatibility */ + if (!PyBytes_Check(attr) && !PyUnicode_Check(attr)) { PyErr_SetString(PyExc_TypeError, "__array_interface__ typestr must be a string"); goto fail; } + /* Get dtype from type string */ - dtype = _array_typedescr_fromstr(PyString_AS_STRING(attr)); - if (dtype == NULL) { + if (PyArray_DescrConverter(attr, &dtype) != NPY_SUCCEED) { goto fail; } @@ -1926,16 +1897,24 @@ PyArray_FromInterface(PyObject *origin) goto fail; } PyArray_Descr *new_dtype = NULL; + if (descr != NULL) { + int is_default = _is_default_descr(descr, attr); + if (is_default < 0) { + goto fail; + } + if (!is_default) { + if (PyArray_DescrConverter2(descr, &new_dtype) != NPY_SUCCEED) { + goto fail; + } + if (new_dtype != NULL) { + Py_DECREF(dtype); + dtype = new_dtype; + } + } - if (descr != NULL && !_is_default_descr(descr, attr) && - PyArray_DescrConverter2(descr, &new_dtype) == NPY_SUCCEED && - new_dtype != NULL) { - Py_DECREF(dtype); - dtype = new_dtype; } - } - Py_DECREF(attr); /* Pairs with the unicode handling above */ + } /* Get shape tuple from interface specification */ attr = _PyDict_GetItemStringWithError(iface, "shape"); diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 03f10bf2d..a701de7c1 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -7436,6 +7436,18 @@ def test_array_interface_offset(): arr1 = np.asarray(DummyArray()) assert_equal(arr1, arr[1:]) +def test_array_interface_unicode_typestr(): + arr = np.array([1, 2, 3], dtype='int32') + interface = dict(arr.__array_interface__) + interface['typestr'] = '\N{check mark}' + + class DummyArray: + __array_interface__ = interface + + # should not be UnicodeEncodeError + with pytest.raises(TypeError): + np.asarray(DummyArray()) + def test_flat_element_deletion(): it = np.ones(3).flat try: -- cgit v1.2.1 From aa3570b296fb4d7d467c5185b7bdb96621b3d66b Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 25 Aug 2020 15:12:58 +0100 Subject: MAINT: remove _array_typedescr_fromstr and adjust its remaining caller --- numpy/core/src/multiarray/common.c | 20 -------------------- numpy/core/src/multiarray/ctors.c | 18 +++++++++++------- 2 files changed, 11 insertions(+), 27 deletions(-) (limited to 'numpy') diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 5f8250fb7..2256906eb 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -127,26 +127,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype) return 0; } - -/* new reference */ -NPY_NO_EXPORT PyArray_Descr * -_array_typedescr_fromstr(char const *c_str) -{ - PyArray_Descr *descr = NULL; - PyObject *stringobj = PyBytes_FromString(c_str); - - if (stringobj == NULL) { - return NULL; - } - if (PyArray_DescrConverter(stringobj, &descr) != NPY_SUCCEED) { - Py_DECREF(stringobj); - return NULL; - } - Py_DECREF(stringobj); - return descr; -} - - NPY_NO_EXPORT char * index2ptr(PyArrayObject *mp, npy_intp i) { diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index c9b1614f4..7534c0717 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1736,10 +1736,8 @@ NPY_NO_EXPORT PyObject * PyArray_FromStructInterface(PyObject *input) { PyArray_Descr *thetype = NULL; - char buf[40]; PyArrayInterface *inter; PyObject *attr; - PyArrayObject *ret; char endian = NPY_NATBYTE; attr = PyArray_LookupSpecial_OnInstance(input, "__array_struct__"); @@ -1782,20 +1780,26 @@ PyArray_FromStructInterface(PyObject *input) } if (thetype == NULL) { - PyOS_snprintf(buf, sizeof(buf), - "%c%c%d", endian, inter->typekind, inter->itemsize); - if (!(thetype=_array_typedescr_fromstr(buf))) { + PyObject *type_str = PyUnicode_FromFormat( + "%c%c%d", endian, inter->typekind, inter->itemsize); + if (type_str == NULL) { + Py_DECREF(attr); + return NULL; + } + int ok = PyArray_DescrConverter(type_str, &thetype); + Py_DECREF(type_str); + if (ok != NPY_SUCCEED) { Py_DECREF(attr); return NULL; } } - ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( + PyObject *ret = PyArray_NewFromDescrAndBase( &PyArray_Type, thetype, inter->nd, inter->shape, inter->strides, inter->data, inter->flags, NULL, input); Py_DECREF(attr); - return (PyObject *)ret; + return ret; fail: PyErr_SetString(PyExc_ValueError, "invalid __array_struct__"); -- cgit v1.2.1 From be40e3f885b6a93f23edad20c5debe9379134b58 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 25 Aug 2020 15:18:48 +0100 Subject: MAINT: replace all remaining PyString_AS_STRING with PyBytes_AS_STRING These all look like valid enough uses of bytes strings --- numpy/core/src/multiarray/multiarraymodule.c | 6 +++--- numpy/core/src/multiarray/scalarapi.c | 2 +- numpy/core/src/umath/_rational_tests.c.src | 5 +++-- numpy/core/src/umath/_umath_tests.c.src | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'numpy') diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index db419636d..c79d9a845 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1956,9 +1956,9 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) return NULL; } } - if (!PyString_Check(obj)) { + if (!PyBytes_Check(obj)) { PyErr_SetString(PyExc_TypeError, - "initializing object must be a string"); + "initializing object must be a bytes object"); Py_XDECREF(tmpobj); return NULL; } @@ -1968,7 +1968,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) Py_XDECREF(tmpobj); return NULL; } - dptr = PyString_AS_STRING(obj); + dptr = PyBytes_AS_STRING(obj); } } ret = PyArray_Scalar(dptr, typecode, NULL); diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index bcd777183..b2f52f554 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -138,7 +138,7 @@ scalar_value(PyObject *scalar, PyArray_Descr *descr) } else if (_CHK(Flexible)) { if (_CHK(String)) { - return (void *)PyString_AS_STRING(scalar); + return (void *)PyBytes_AS_STRING(scalar); } if (_CHK(Unicode)) { /* Treat this the same as the NPY_UNICODE base class */ diff --git a/numpy/core/src/umath/_rational_tests.c.src b/numpy/core/src/umath/_rational_tests.c.src index cbb6d9d17..e611a0847 100644 --- a/numpy/core/src/umath/_rational_tests.c.src +++ b/numpy/core/src/umath/_rational_tests.c.src @@ -406,8 +406,9 @@ pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { Py_INCREF(x[0]); return x[0]; } - else if (PyString_Check(x[0])) { - const char* s = PyString_AS_STRING(x[0]); + // TODO: allow construction from unicode strings + else if (PyBytes_Check(x[0])) { + const char* s = PyBytes_AS_STRING(x[0]); rational x; if (scan_rational(&s,&x)) { const char* p; diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src index 932c3b5ab..3ab89d6a5 100644 --- a/numpy/core/src/umath/_umath_tests.c.src +++ b/numpy/core/src/umath/_umath_tests.c.src @@ -480,7 +480,7 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args) return NULL; } - if (PyString_Check(signature)) { + if (PyBytes_Check(signature)) { sig_str = signature; } else if (PyUnicode_Check(signature)) { sig_str = PyUnicode_AsUTF8String(signature); @@ -493,7 +493,7 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args) NULL, NULL, NULL, 0, nin, nout, PyUFunc_None, "no name", "doc:none", - 1, PyString_AS_STRING(sig_str)); + 1, PyBytes_AS_STRING(sig_str)); if (sig_str != signature) { Py_DECREF(sig_str); } -- cgit v1.2.1