diff options
author | Matti Picus <matti.picus@gmail.com> | 2020-10-07 21:50:34 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-07 21:50:34 +0300 |
commit | 18af0e10878fe49a893fd576317dabd424c7ca16 (patch) | |
tree | 7421567bcb95f8dab786f40b6dff14721bee539b | |
parent | 4ebbaaeff09aca51e0006e62026a35e020c3b49f (diff) | |
parent | a2e76ff3dc1e19203b2529d939f489ca04ab98a0 (diff) | |
download | numpy-18af0e10878fe49a893fd576317dabd424c7ca16.tar.gz |
Merge pull request #17410 from seberg/scalars-to-int-array-special
API: Special case how numpy scalars are coerced to signed integer
-rw-r--r-- | doc/release/upcoming_changes/16200.compatibility.rst | 22 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 28 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtypemeta.c | 17 | ||||
-rw-r--r-- | numpy/core/tests/test_array_coercion.py | 47 |
4 files changed, 104 insertions, 10 deletions
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst index d0fd51265..2bbdd883e 100644 --- a/doc/release/upcoming_changes/16200.compatibility.rst +++ b/doc/release/upcoming_changes/16200.compatibility.rst @@ -8,14 +8,26 @@ error:: np.array([np.float64(np.nan)], dtype=np.int64) -will succeed at this time (this may change) and return an undefined result -(usually the smallest possible integer). This also affects assignments:: +will succeed and return an undefined result (usually the smallest possible +integer). This also affects assignments:: arr[0] = np.float64(np.nan) -Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)`` -and that the behaviour is unchanged for ``np.nan`` itself which is a Python -float. +At this time, NumPy retains the behaviour for:: + + np.array(np.float64(np.nan), dtype=np.int64) + +The above changes do not affect Python scalars: + + np.array([float("NaN")], dtype=np.int64) + +remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one). +Unlike signed integers, unsigned integers do not retain this special case, +since they always behaved more like casting. +The following code stops raising an error:: + + np.array([np.float64(np.nan)], dtype=np.uint64) + To avoid backward compatibility issues, at this time assignment from ``datetime64`` scalar to strings of too short length remains supported. This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")`` diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 55c0a31f0..b09ec9f8e 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1461,6 +1461,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, ((PyVoidScalarObject *)op)->flags, NULL, op); } + else if (cache == 0 && newtype != NULL && + PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) { + assert(ndim == 0); + /* + * This is an (possible) inconsistency where: + * + * np.array(np.float64(np.nan), dtype=np.int64) + * + * behaves differently from: + * + * np.array([np.float64(np.nan)], dtype=np.int64) + * arr1d_int64[0] = np.float64(np.nan) + * np.array(np.array(np.nan), dtype=np.int64) + * + * by not raising an error instead of using typical casting. + * The error is desirable, but to always error seems like a + * larger change to be considered at some other time and it is + * undesirable that 0-D arrays behave differently from scalars. + * This retains the behaviour, largely due to issues in pandas + * which relied on a try/except (although hopefully that will + * have a better solution at some point): + * https://github.com/pandas-dev/pandas/issues/35481 + */ + return PyArray_FromScalar(op, dtype); + } /* There was no array (or array-like) passed in directly. */ if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) || @@ -1483,7 +1508,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, if (cache == NULL) { /* This is a single item. Set it directly. */ assert(ndim == 0); - if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) { + + if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) { Py_DECREF(ret); return NULL; } diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index 84d9dc381..af14bb7e5 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -305,6 +305,18 @@ python_builtins_are_known_scalar_types( static int +signed_integers_is_known_scalar_types( + PyArray_DTypeMeta *cls, PyTypeObject *pytype) +{ + if (python_builtins_are_known_scalar_types(cls, pytype)) { + return 1; + } + /* Convert our scalars (raise on too large unsigned and NaN, etc.) */ + return PyType_IsSubtype(pytype, &PyGenericArrType_Type); +} + + +static int datetime_known_scalar_types( PyArray_DTypeMeta *cls, PyTypeObject *pytype) { @@ -567,6 +579,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) dtype_class->common_dtype = default_builtin_common_dtype; dtype_class->common_instance = NULL; + if (PyTypeNum_ISSIGNED(dtype_class->type_num)) { + /* Convert our scalars (raise on too large unsigned and NaN, etc.) */ + dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types; + } + if (PyTypeNum_ISUSERDEF(descr->type_num)) { dtype_class->common_dtype = legacy_userdtype_common_dtype_function; } diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index 79954b998..78def9360 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -309,6 +309,13 @@ class TestScalarDiscovery: # coercion should also raise (error type may change) with pytest.raises(Exception): np.array(scalar, dtype=dtype) + + if (isinstance(scalar, rational) and + np.issubdtype(dtype, np.signedinteger)): + return + + with pytest.raises(Exception): + np.array([scalar], dtype=dtype) # assignment should also raise res = np.zeros((), dtype=dtype) with pytest.raises(Exception): @@ -340,6 +347,30 @@ class TestScalarDiscovery: assert discovered_dtype == dtype assert discovered_dtype.itemsize == dtype.itemsize + @pytest.mark.parametrize("dtype", np.typecodes["Integer"]) + def test_scalar_to_int_coerce_does_not_cast(self, dtype): + """ + Signed integers are currently different in that they do not cast other + NumPy scalar, but instead use scalar.__int__(). The harcoded + exception to this rule is `np.array(scalar, dtype=integer)`. + """ + dtype = np.dtype(dtype) + invalid_int = np.ulonglong(-1) + + float_nan = np.float64(np.nan) + + for scalar in [float_nan, invalid_int]: + # This is a special case using casting logic and thus not failing: + coerced = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + assert_array_equal(coerced, cast) + + # However these fail: + with pytest.raises((ValueError, OverflowError)): + np.array([scalar], dtype=dtype) + with pytest.raises((ValueError, OverflowError)): + cast[()] = scalar + class TestTimeScalars: @pytest.mark.parametrize("dtype", [np.int64, np.float32]) @@ -349,13 +380,21 @@ class TestTimeScalars: param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"), param(np.datetime64(1, "D"), id="datetime64[D]")],) def test_coercion_basic(self, dtype, scalar): + # Note the `[scalar]` is there because np.array(scalar) uses stricter + # `scalar.__int__()` rules for backward compatibility right now. arr = np.array(scalar, dtype=dtype) cast = np.array(scalar).astype(dtype) - ass = np.ones((), dtype=dtype) - ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) - assert_array_equal(arr, cast) - assert_array_equal(cast, cast) + + ass = np.ones((), dtype=dtype) + if issubclass(dtype, np.integer): + with pytest.raises(TypeError): + # raises, as would np.array([scalar], dtype=dtype), this is + # conversion from times, but behaviour of integers. + ass[()] = scalar + else: + ass[()] = scalar + assert_array_equal(ass, cast) @pytest.mark.parametrize("dtype", [np.int64, np.float32]) @pytest.mark.parametrize("scalar", |