Merge pull request #17410 from seberg/scalars-to-int-array-special

API: Special case how numpy scalars are coerced to signed integer
author: Matti Picus <matti.picus@gmail.com> 2020-10-07 21:50:34 +0300
committer: GitHub <noreply@github.com> 2020-10-07 21:50:34 +0300
commit: 18af0e10878fe49a893fd576317dabd424c7ca16 (patch)
tree: 7421567bcb95f8dab786f40b6dff14721bee539b
parent: 4ebbaaeff09aca51e0006e62026a35e020c3b49f (diff)
parent: a2e76ff3dc1e19203b2529d939f489ca04ab98a0 (diff)
download: numpy-18af0e10878fe49a893fd576317dabd424c7ca16.tar.gz
4 files changed, 104 insertions, 10 deletions
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst
index d0fd51265..2bbdd883e 100644
--- a/doc/release/upcoming_changes/16200.compatibility.rst
+++ b/doc/release/upcoming_changes/16200.compatibility.rst
@@ -8,14 +8,26 @@ error::
 
     np.array([np.float64(np.nan)], dtype=np.int64)
 
-will succeed at this time (this may change) and return an undefined result
-(usually the smallest possible integer).  This also affects assignments::
+will succeed and return an undefined result (usually the smallest possible
+integer).  This also affects assignments::
 
     arr[0] = np.float64(np.nan)
 
-Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)``
-and that the behaviour is unchanged for ``np.nan`` itself which is a Python
-float.
+At this time, NumPy retains the behaviour for::
+
+    np.array(np.float64(np.nan), dtype=np.int64)
+
+The above changes do not affect Python scalars:
+
+    np.array([float("NaN")], dtype=np.int64)
+
+remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one).
+Unlike signed integers, unsigned integers do not retain this special case,
+since they always behaved more like casting.
+The following code stops raising an error::
+
+    np.array([np.float64(np.nan)], dtype=np.uint64)
+
 To avoid backward compatibility issues, at this time assignment from
 ``datetime64`` scalar to strings of too short length remains supported.
 This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")``
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 55c0a31f0..b09ec9f8e 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1461,6 +1461,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
                 ((PyVoidScalarObject *)op)->flags,
                 NULL, op);
     }
+    else if (cache == 0 && newtype != NULL &&
+            PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) {
+        assert(ndim == 0);
+        /*
+         * This is an (possible) inconsistency where:
+         *
+         *     np.array(np.float64(np.nan), dtype=np.int64)
+         *
+         * behaves differently from:
+         *
+         *     np.array([np.float64(np.nan)], dtype=np.int64)
+         *     arr1d_int64[0] = np.float64(np.nan)
+         *     np.array(np.array(np.nan), dtype=np.int64)
+         *
+         * by not raising an error instead of using typical casting.
+         * The error is desirable, but to always error seems like a
+         * larger change to be considered at some other time and it is
+         * undesirable that 0-D arrays behave differently from scalars.
+         * This retains the behaviour, largely due to issues in pandas
+         * which relied on a try/except (although hopefully that will
+         * have a better solution at some point):
+         * https://github.com/pandas-dev/pandas/issues/35481
+         */
+        return PyArray_FromScalar(op, dtype);
+    }
 
     /* There was no array (or array-like) passed in directly. */
     if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) ||
@@ -1483,7 +1508,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
     if (cache == NULL) {
         /* This is a single item. Set it directly. */
         assert(ndim == 0);
-        if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+
+        if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
             Py_DECREF(ret);
             return NULL;
         }
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index 84d9dc381..af14bb7e5 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -305,6 +305,18 @@ python_builtins_are_known_scalar_types(
 
 
 static int
+signed_integers_is_known_scalar_types(
+        PyArray_DTypeMeta *cls, PyTypeObject *pytype)
+{
+    if (python_builtins_are_known_scalar_types(cls, pytype)) {
+        return 1;
+    }
+    /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+    return PyType_IsSubtype(pytype, &PyGenericArrType_Type);
+}
+
+
+static int
 datetime_known_scalar_types(
         PyArray_DTypeMeta *cls, PyTypeObject *pytype)
 {
@@ -567,6 +579,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     dtype_class->common_dtype = default_builtin_common_dtype;
     dtype_class->common_instance = NULL;
 
+    if (PyTypeNum_ISSIGNED(dtype_class->type_num)) {
+        /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+        dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types;
+    }
+
     if (PyTypeNum_ISUSERDEF(descr->type_num)) {
         dtype_class->common_dtype = legacy_userdtype_common_dtype_function;
     }
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index 79954b998..78def9360 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -309,6 +309,13 @@ class TestScalarDiscovery:
                 # coercion should also raise (error type may change)
                 with pytest.raises(Exception):
                     np.array(scalar, dtype=dtype)
+
+                if (isinstance(scalar, rational) and
+                        np.issubdtype(dtype, np.signedinteger)):
+                    return
+
+                with pytest.raises(Exception):
+                    np.array([scalar], dtype=dtype)
                 # assignment should also raise
                 res = np.zeros((), dtype=dtype)
                 with pytest.raises(Exception):
@@ -340,6 +347,30 @@ class TestScalarDiscovery:
         assert discovered_dtype == dtype
         assert discovered_dtype.itemsize == dtype.itemsize
 
+    @pytest.mark.parametrize("dtype", np.typecodes["Integer"])
+    def test_scalar_to_int_coerce_does_not_cast(self, dtype):
+        """
+        Signed integers are currently different in that they do not cast other
+        NumPy scalar, but instead use scalar.__int__(). The harcoded
+        exception to this rule is `np.array(scalar, dtype=integer)`.
+        """
+        dtype = np.dtype(dtype)
+        invalid_int = np.ulonglong(-1)
+
+        float_nan = np.float64(np.nan)
+
+        for scalar in [float_nan, invalid_int]:
+            # This is a special case using casting logic and thus not failing:
+            coerced = np.array(scalar, dtype=dtype)
+            cast = np.array(scalar).astype(dtype)
+            assert_array_equal(coerced, cast)
+
+            # However these fail:
+            with pytest.raises((ValueError, OverflowError)):
+                np.array([scalar], dtype=dtype)
+            with pytest.raises((ValueError, OverflowError)):
+                cast[()] = scalar
+
 
 class TestTimeScalars:
     @pytest.mark.parametrize("dtype", [np.int64, np.float32])
@@ -349,13 +380,21 @@ class TestTimeScalars:
              param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"),
              param(np.datetime64(1, "D"), id="datetime64[D]")],)
     def test_coercion_basic(self, dtype, scalar):
+        # Note the `[scalar]` is there because np.array(scalar) uses stricter
+        # `scalar.__int__()` rules for backward compatibility right now.
         arr = np.array(scalar, dtype=dtype)
         cast = np.array(scalar).astype(dtype)
-        ass = np.ones((), dtype=dtype)
-        ass[()] = scalar  # raises, as would np.array([scalar], dtype=dtype)
-
         assert_array_equal(arr, cast)
-        assert_array_equal(cast, cast)
+
+        ass = np.ones((), dtype=dtype)
+        if issubclass(dtype, np.integer):
+            with pytest.raises(TypeError):
+                # raises, as would np.array([scalar], dtype=dtype), this is
+                # conversion from times, but behaviour of integers.
+                ass[()] = scalar
+        else:
+            ass[()] = scalar
+            assert_array_equal(ass, cast)
 
     @pytest.mark.parametrize("dtype", [np.int64, np.float32])
     @pytest.mark.parametrize("scalar",
author	Matti Picus <matti.picus@gmail.com>	2020-10-07 21:50:34 +0300
committer	GitHub <noreply@github.com>	2020-10-07 21:50:34 +0300
commit	18af0e10878fe49a893fd576317dabd424c7ca16 (patch)
tree	7421567bcb95f8dab786f40b6dff14721bee539b
parent	4ebbaaeff09aca51e0006e62026a35e020c3b49f (diff)
parent	a2e76ff3dc1e19203b2529d939f489ca04ab98a0 (diff)
download	numpy-18af0e10878fe49a893fd576317dabd424c7ca16.tar.gz