API: Special case how numpy scalars are coerced to signed integer

This removes one of the larger changes to array-coercion, which meant that NumPy scalars were always coerced like a 0-D array would be (i.e. using normal casting). When the assignment is explicitly an integer, now `scalar.__int__()` will be used instead (as was the case previously). Since previously this was handled differently, a *single* scalar is still converted using casting: np.array(np.float64(np.nan), dtype=np.int64) succeeds, but any other thing fails, such as: np.array([np.float64(np.nan)], dtype=np.int64) arr1d_int64[()] = np.float64(np.nan) np.array(np.array(np.nan), dtype=np.int64) This does not affect Python scalars, that always raise, because they always are converted using `scalar.__int__()`. Unsigned integers always supported casting from their signed equivalent, so the difference is much less visible for them and this chooses to always use the casting behaviour. The main reason for this change is to help pands: https://github.com/pandas-dev/pandas/issues/35481
author: Sebastian Berg <sebastian@sipsolutions.net> 2020-09-30 13:55:54 -0500
committer: Sebastian Berg <sebastian@sipsolutions.net> 2020-10-01 10:57:06 -0500
commit: a2e76ff3dc1e19203b2529d939f489ca04ab98a0 (patch)
tree: eb1439daeb26ce151a20484fac6b83c291d3a840
parent: 60945085cfd1abf4e04db461040bc675f1d325c7 (diff)
download: numpy-a2e76ff3dc1e19203b2529d939f489ca04ab98a0.tar.gz
4 files changed, 104 insertions, 10 deletions
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst
index d0fd51265..2bbdd883e 100644
--- a/doc/release/upcoming_changes/16200.compatibility.rst
+++ b/doc/release/upcoming_changes/16200.compatibility.rst
@@ -8,14 +8,26 @@ error::
 
     np.array([np.float64(np.nan)], dtype=np.int64)
 
-will succeed at this time (this may change) and return an undefined result
-(usually the smallest possible integer).  This also affects assignments::
+will succeed and return an undefined result (usually the smallest possible
+integer).  This also affects assignments::
 
     arr[0] = np.float64(np.nan)
 
-Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)``
-and that the behaviour is unchanged for ``np.nan`` itself which is a Python
-float.
+At this time, NumPy retains the behaviour for::
+
+    np.array(np.float64(np.nan), dtype=np.int64)
+
+The above changes do not affect Python scalars:
+
+    np.array([float("NaN")], dtype=np.int64)
+
+remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one).
+Unlike signed integers, unsigned integers do not retain this special case,
+since they always behaved more like casting.
+The following code stops raising an error::
+
+    np.array([np.float64(np.nan)], dtype=np.uint64)
+
 To avoid backward compatibility issues, at this time assignment from
 ``datetime64`` scalar to strings of too short length remains supported.
 This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")``
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 956dfd3bb..f543d02d0 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1460,6 +1460,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
                 ((PyVoidScalarObject *)op)->flags,
                 NULL, op);
     }
+    else if (cache == 0 && newtype != NULL &&
+            PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) {
+        assert(ndim == 0);
+        /*
+         * This is an (possible) inconsistency where:
+         *
+         *     np.array(np.float64(np.nan), dtype=np.int64)
+         *
+         * behaves differently from:
+         *
+         *     np.array([np.float64(np.nan)], dtype=np.int64)
+         *     arr1d_int64[0] = np.float64(np.nan)
+         *     np.array(np.array(np.nan), dtype=np.int64)
+         *
+         * by not raising an error instead of using typical casting.
+         * The error is desirable, but to always error seems like a
+         * larger change to be considered at some other time and it is
+         * undesirable that 0-D arrays behave differently from scalars.
+         * This retains the behaviour, largely due to issues in pandas
+         * which relied on a try/except (although hopefully that will
+         * have a better solution at some point):
+         * https://github.com/pandas-dev/pandas/issues/35481
+         */
+        return PyArray_FromScalar(op, dtype);
+    }
 
     /* There was no array (or array-like) passed in directly. */
     if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) ||
@@ -1480,7 +1505,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
     if (cache == NULL) {
         /* This is a single item. Set it directly. */
         assert(ndim == 0);
-        if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+
+        if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
             Py_DECREF(ret);
             return NULL;
         }
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index dbe5ba476..109f4a225 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -305,6 +305,18 @@ python_builtins_are_known_scalar_types(
 
 
 static int
+signed_integers_is_known_scalar_types(
+        PyArray_DTypeMeta *cls, PyTypeObject *pytype)
+{
+    if (python_builtins_are_known_scalar_types(cls, pytype)) {
+        return 1;
+    }
+    /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+    return PyType_IsSubtype(pytype, &PyGenericArrType_Type);
+}
+
+
+static int
 datetime_known_scalar_types(
         PyArray_DTypeMeta *cls, PyTypeObject *pytype)
 {
@@ -549,6 +561,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     dtype_class->common_dtype = default_builtin_common_dtype;
     dtype_class->common_instance = NULL;
 
+    if (PyTypeNum_ISSIGNED(dtype_class->type_num)) {
+        /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+        dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types;
+    }
+
     if (PyTypeNum_ISUSERDEF(descr->type_num)) {
         dtype_class->common_dtype = legacy_userdtype_common_dtype_function;
     }
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index e0480c7bf..ce66589ca 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -309,6 +309,13 @@ class TestScalarDiscovery:
                 # coercion should also raise (error type may change)
                 with pytest.raises(Exception):
                     np.array(scalar, dtype=dtype)
+
+                if (isinstance(scalar, rational) and
+                        np.issubdtype(dtype, np.signedinteger)):
+                    return
+
+                with pytest.raises(Exception):
+                    np.array([scalar], dtype=dtype)
                 # assignment should also raise
                 res = np.zeros((), dtype=dtype)
                 with pytest.raises(Exception):
@@ -340,6 +347,30 @@ class TestScalarDiscovery:
         assert discovered_dtype == dtype
         assert discovered_dtype.itemsize == dtype.itemsize
 
+    @pytest.mark.parametrize("dtype", np.typecodes["Integer"])
+    def test_scalar_to_int_coerce_does_not_cast(self, dtype):
+        """
+        Signed integers are currently different in that they do not cast other
+        NumPy scalar, but instead use scalar.__int__(). The harcoded
+        exception to this rule is `np.array(scalar, dtype=integer)`.
+        """
+        dtype = np.dtype(dtype)
+        invalid_int = np.ulonglong(-1)
+
+        float_nan = np.float64(np.nan)
+
+        for scalar in [float_nan, invalid_int]:
+            # This is a special case using casting logic and thus not failing:
+            coerced = np.array(scalar, dtype=dtype)
+            cast = np.array(scalar).astype(dtype)
+            assert_array_equal(coerced, cast)
+
+            # However these fail:
+            with pytest.raises((ValueError, OverflowError)):
+                np.array([scalar], dtype=dtype)
+            with pytest.raises((ValueError, OverflowError)):
+                cast[()] = scalar
+
 
 class TestTimeScalars:
     @pytest.mark.parametrize("dtype", [np.int64, np.float32])
@@ -349,13 +380,21 @@ class TestTimeScalars:
              param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"),
              param(np.datetime64(1, "D"), id="datetime64[D]")],)
     def test_coercion_basic(self, dtype, scalar):
+        # Note the `[scalar]` is there because np.array(scalar) uses stricter
+        # `scalar.__int__()` rules for backward compatibility right now.
         arr = np.array(scalar, dtype=dtype)
         cast = np.array(scalar).astype(dtype)
-        ass = np.ones((), dtype=dtype)
-        ass[()] = scalar  # raises, as would np.array([scalar], dtype=dtype)
-
         assert_array_equal(arr, cast)
-        assert_array_equal(cast, cast)
+
+        ass = np.ones((), dtype=dtype)
+        if issubclass(dtype, np.integer):
+            with pytest.raises(TypeError):
+                # raises, as would np.array([scalar], dtype=dtype), this is
+                # conversion from times, but behaviour of integers.
+                ass[()] = scalar
+        else:
+            ass[()] = scalar
+            assert_array_equal(ass, cast)
 
     @pytest.mark.parametrize("dtype", [np.int64, np.float32])
     @pytest.mark.parametrize("scalar",
author	Sebastian Berg <sebastian@sipsolutions.net>	2020-09-30 13:55:54 -0500
committer	Sebastian Berg <sebastian@sipsolutions.net>	2020-10-01 10:57:06 -0500
commit	a2e76ff3dc1e19203b2529d939f489ca04ab98a0 (patch)
tree	eb1439daeb26ce151a20484fac6b83c291d3a840
parent	60945085cfd1abf4e04db461040bc675f1d325c7 (diff)
download	numpy-a2e76ff3dc1e19203b2529d939f489ca04ab98a0.tar.gz