diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2020-09-30 13:55:54 -0500 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2020-10-01 10:57:06 -0500 |
commit | a2e76ff3dc1e19203b2529d939f489ca04ab98a0 (patch) | |
tree | eb1439daeb26ce151a20484fac6b83c291d3a840 | |
parent | 60945085cfd1abf4e04db461040bc675f1d325c7 (diff) | |
download | numpy-a2e76ff3dc1e19203b2529d939f489ca04ab98a0.tar.gz |
API: Special case how numpy scalars are coerced to signed integer
This removes one of the larger changes to array-coercion, which
meant that NumPy scalars were always coerced like a 0-D array
would be (i.e. using normal casting). When the assignment is
explicitly an integer, now `scalar.__int__()` will be used instead
(as was the case previously).
Since previously this was handled differently, a *single* scalar
is still converted using casting:
np.array(np.float64(np.nan), dtype=np.int64)
succeeds, but any other thing fails, such as:
np.array([np.float64(np.nan)], dtype=np.int64)
arr1d_int64[()] = np.float64(np.nan)
np.array(np.array(np.nan), dtype=np.int64)
This does not affect Python scalars, that always raise, because
they always are converted using `scalar.__int__()`.
Unsigned integers always supported casting from their signed
equivalent, so the difference is much less visible for them and
this chooses to always use the casting behaviour.
The main reason for this change is to help pands:
https://github.com/pandas-dev/pandas/issues/35481
-rw-r--r-- | doc/release/upcoming_changes/16200.compatibility.rst | 22 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 28 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtypemeta.c | 17 | ||||
-rw-r--r-- | numpy/core/tests/test_array_coercion.py | 47 |
4 files changed, 104 insertions, 10 deletions
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst index d0fd51265..2bbdd883e 100644 --- a/doc/release/upcoming_changes/16200.compatibility.rst +++ b/doc/release/upcoming_changes/16200.compatibility.rst @@ -8,14 +8,26 @@ error:: np.array([np.float64(np.nan)], dtype=np.int64) -will succeed at this time (this may change) and return an undefined result -(usually the smallest possible integer). This also affects assignments:: +will succeed and return an undefined result (usually the smallest possible +integer). This also affects assignments:: arr[0] = np.float64(np.nan) -Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)`` -and that the behaviour is unchanged for ``np.nan`` itself which is a Python -float. +At this time, NumPy retains the behaviour for:: + + np.array(np.float64(np.nan), dtype=np.int64) + +The above changes do not affect Python scalars: + + np.array([float("NaN")], dtype=np.int64) + +remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one). +Unlike signed integers, unsigned integers do not retain this special case, +since they always behaved more like casting. +The following code stops raising an error:: + + np.array([np.float64(np.nan)], dtype=np.uint64) + To avoid backward compatibility issues, at this time assignment from ``datetime64`` scalar to strings of too short length remains supported. This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")`` diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 956dfd3bb..f543d02d0 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1460,6 +1460,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, ((PyVoidScalarObject *)op)->flags, NULL, op); } + else if (cache == 0 && newtype != NULL && + PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) { + assert(ndim == 0); + /* + * This is an (possible) inconsistency where: + * + * np.array(np.float64(np.nan), dtype=np.int64) + * + * behaves differently from: + * + * np.array([np.float64(np.nan)], dtype=np.int64) + * arr1d_int64[0] = np.float64(np.nan) + * np.array(np.array(np.nan), dtype=np.int64) + * + * by not raising an error instead of using typical casting. + * The error is desirable, but to always error seems like a + * larger change to be considered at some other time and it is + * undesirable that 0-D arrays behave differently from scalars. + * This retains the behaviour, largely due to issues in pandas + * which relied on a try/except (although hopefully that will + * have a better solution at some point): + * https://github.com/pandas-dev/pandas/issues/35481 + */ + return PyArray_FromScalar(op, dtype); + } /* There was no array (or array-like) passed in directly. */ if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) || @@ -1480,7 +1505,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, if (cache == NULL) { /* This is a single item. Set it directly. */ assert(ndim == 0); - if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) { + + if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) { Py_DECREF(ret); return NULL; } diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index dbe5ba476..109f4a225 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -305,6 +305,18 @@ python_builtins_are_known_scalar_types( static int +signed_integers_is_known_scalar_types( + PyArray_DTypeMeta *cls, PyTypeObject *pytype) +{ + if (python_builtins_are_known_scalar_types(cls, pytype)) { + return 1; + } + /* Convert our scalars (raise on too large unsigned and NaN, etc.) */ + return PyType_IsSubtype(pytype, &PyGenericArrType_Type); +} + + +static int datetime_known_scalar_types( PyArray_DTypeMeta *cls, PyTypeObject *pytype) { @@ -549,6 +561,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) dtype_class->common_dtype = default_builtin_common_dtype; dtype_class->common_instance = NULL; + if (PyTypeNum_ISSIGNED(dtype_class->type_num)) { + /* Convert our scalars (raise on too large unsigned and NaN, etc.) */ + dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types; + } + if (PyTypeNum_ISUSERDEF(descr->type_num)) { dtype_class->common_dtype = legacy_userdtype_common_dtype_function; } diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index e0480c7bf..ce66589ca 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -309,6 +309,13 @@ class TestScalarDiscovery: # coercion should also raise (error type may change) with pytest.raises(Exception): np.array(scalar, dtype=dtype) + + if (isinstance(scalar, rational) and + np.issubdtype(dtype, np.signedinteger)): + return + + with pytest.raises(Exception): + np.array([scalar], dtype=dtype) # assignment should also raise res = np.zeros((), dtype=dtype) with pytest.raises(Exception): @@ -340,6 +347,30 @@ class TestScalarDiscovery: assert discovered_dtype == dtype assert discovered_dtype.itemsize == dtype.itemsize + @pytest.mark.parametrize("dtype", np.typecodes["Integer"]) + def test_scalar_to_int_coerce_does_not_cast(self, dtype): + """ + Signed integers are currently different in that they do not cast other + NumPy scalar, but instead use scalar.__int__(). The harcoded + exception to this rule is `np.array(scalar, dtype=integer)`. + """ + dtype = np.dtype(dtype) + invalid_int = np.ulonglong(-1) + + float_nan = np.float64(np.nan) + + for scalar in [float_nan, invalid_int]: + # This is a special case using casting logic and thus not failing: + coerced = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + assert_array_equal(coerced, cast) + + # However these fail: + with pytest.raises((ValueError, OverflowError)): + np.array([scalar], dtype=dtype) + with pytest.raises((ValueError, OverflowError)): + cast[()] = scalar + class TestTimeScalars: @pytest.mark.parametrize("dtype", [np.int64, np.float32]) @@ -349,13 +380,21 @@ class TestTimeScalars: param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"), param(np.datetime64(1, "D"), id="datetime64[D]")],) def test_coercion_basic(self, dtype, scalar): + # Note the `[scalar]` is there because np.array(scalar) uses stricter + # `scalar.__int__()` rules for backward compatibility right now. arr = np.array(scalar, dtype=dtype) cast = np.array(scalar).astype(dtype) - ass = np.ones((), dtype=dtype) - ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) - assert_array_equal(arr, cast) - assert_array_equal(cast, cast) + + ass = np.ones((), dtype=dtype) + if issubclass(dtype, np.integer): + with pytest.raises(TypeError): + # raises, as would np.array([scalar], dtype=dtype), this is + # conversion from times, but behaviour of integers. + ass[()] = scalar + else: + ass[()] = scalar + assert_array_equal(ass, cast) @pytest.mark.parametrize("dtype", [np.int64, np.float32]) @pytest.mark.parametrize("scalar", |