summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2020-10-07 21:50:34 +0300
committerGitHub <noreply@github.com>2020-10-07 21:50:34 +0300
commit18af0e10878fe49a893fd576317dabd424c7ca16 (patch)
tree7421567bcb95f8dab786f40b6dff14721bee539b
parent4ebbaaeff09aca51e0006e62026a35e020c3b49f (diff)
parenta2e76ff3dc1e19203b2529d939f489ca04ab98a0 (diff)
downloadnumpy-18af0e10878fe49a893fd576317dabd424c7ca16.tar.gz
Merge pull request #17410 from seberg/scalars-to-int-array-special
API: Special case how numpy scalars are coerced to signed integer
-rw-r--r--doc/release/upcoming_changes/16200.compatibility.rst22
-rw-r--r--numpy/core/src/multiarray/ctors.c28
-rw-r--r--numpy/core/src/multiarray/dtypemeta.c17
-rw-r--r--numpy/core/tests/test_array_coercion.py47
4 files changed, 104 insertions, 10 deletions
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst
index d0fd51265..2bbdd883e 100644
--- a/doc/release/upcoming_changes/16200.compatibility.rst
+++ b/doc/release/upcoming_changes/16200.compatibility.rst
@@ -8,14 +8,26 @@ error::
np.array([np.float64(np.nan)], dtype=np.int64)
-will succeed at this time (this may change) and return an undefined result
-(usually the smallest possible integer). This also affects assignments::
+will succeed and return an undefined result (usually the smallest possible
+integer). This also affects assignments::
arr[0] = np.float64(np.nan)
-Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)``
-and that the behaviour is unchanged for ``np.nan`` itself which is a Python
-float.
+At this time, NumPy retains the behaviour for::
+
+ np.array(np.float64(np.nan), dtype=np.int64)
+
+The above changes do not affect Python scalars:
+
+ np.array([float("NaN")], dtype=np.int64)
+
+remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one).
+Unlike signed integers, unsigned integers do not retain this special case,
+since they always behaved more like casting.
+The following code stops raising an error::
+
+ np.array([np.float64(np.nan)], dtype=np.uint64)
+
To avoid backward compatibility issues, at this time assignment from
``datetime64`` scalar to strings of too short length remains supported.
This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")``
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 55c0a31f0..b09ec9f8e 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1461,6 +1461,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
((PyVoidScalarObject *)op)->flags,
NULL, op);
}
+ else if (cache == 0 && newtype != NULL &&
+ PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) {
+ assert(ndim == 0);
+ /*
+ * This is an (possible) inconsistency where:
+ *
+ * np.array(np.float64(np.nan), dtype=np.int64)
+ *
+ * behaves differently from:
+ *
+ * np.array([np.float64(np.nan)], dtype=np.int64)
+ * arr1d_int64[0] = np.float64(np.nan)
+ * np.array(np.array(np.nan), dtype=np.int64)
+ *
+ * by not raising an error instead of using typical casting.
+ * The error is desirable, but to always error seems like a
+ * larger change to be considered at some other time and it is
+ * undesirable that 0-D arrays behave differently from scalars.
+ * This retains the behaviour, largely due to issues in pandas
+ * which relied on a try/except (although hopefully that will
+ * have a better solution at some point):
+ * https://github.com/pandas-dev/pandas/issues/35481
+ */
+ return PyArray_FromScalar(op, dtype);
+ }
/* There was no array (or array-like) passed in directly. */
if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) ||
@@ -1483,7 +1508,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
if (cache == NULL) {
/* This is a single item. Set it directly. */
assert(ndim == 0);
- if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+
+ if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
Py_DECREF(ret);
return NULL;
}
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index 84d9dc381..af14bb7e5 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -305,6 +305,18 @@ python_builtins_are_known_scalar_types(
static int
+signed_integers_is_known_scalar_types(
+ PyArray_DTypeMeta *cls, PyTypeObject *pytype)
+{
+ if (python_builtins_are_known_scalar_types(cls, pytype)) {
+ return 1;
+ }
+ /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+ return PyType_IsSubtype(pytype, &PyGenericArrType_Type);
+}
+
+
+static int
datetime_known_scalar_types(
PyArray_DTypeMeta *cls, PyTypeObject *pytype)
{
@@ -567,6 +579,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
dtype_class->common_dtype = default_builtin_common_dtype;
dtype_class->common_instance = NULL;
+ if (PyTypeNum_ISSIGNED(dtype_class->type_num)) {
+ /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+ dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types;
+ }
+
if (PyTypeNum_ISUSERDEF(descr->type_num)) {
dtype_class->common_dtype = legacy_userdtype_common_dtype_function;
}
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index 79954b998..78def9360 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -309,6 +309,13 @@ class TestScalarDiscovery:
# coercion should also raise (error type may change)
with pytest.raises(Exception):
np.array(scalar, dtype=dtype)
+
+ if (isinstance(scalar, rational) and
+ np.issubdtype(dtype, np.signedinteger)):
+ return
+
+ with pytest.raises(Exception):
+ np.array([scalar], dtype=dtype)
# assignment should also raise
res = np.zeros((), dtype=dtype)
with pytest.raises(Exception):
@@ -340,6 +347,30 @@ class TestScalarDiscovery:
assert discovered_dtype == dtype
assert discovered_dtype.itemsize == dtype.itemsize
+ @pytest.mark.parametrize("dtype", np.typecodes["Integer"])
+ def test_scalar_to_int_coerce_does_not_cast(self, dtype):
+ """
+ Signed integers are currently different in that they do not cast other
+ NumPy scalar, but instead use scalar.__int__(). The harcoded
+ exception to this rule is `np.array(scalar, dtype=integer)`.
+ """
+ dtype = np.dtype(dtype)
+ invalid_int = np.ulonglong(-1)
+
+ float_nan = np.float64(np.nan)
+
+ for scalar in [float_nan, invalid_int]:
+ # This is a special case using casting logic and thus not failing:
+ coerced = np.array(scalar, dtype=dtype)
+ cast = np.array(scalar).astype(dtype)
+ assert_array_equal(coerced, cast)
+
+ # However these fail:
+ with pytest.raises((ValueError, OverflowError)):
+ np.array([scalar], dtype=dtype)
+ with pytest.raises((ValueError, OverflowError)):
+ cast[()] = scalar
+
class TestTimeScalars:
@pytest.mark.parametrize("dtype", [np.int64, np.float32])
@@ -349,13 +380,21 @@ class TestTimeScalars:
param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"),
param(np.datetime64(1, "D"), id="datetime64[D]")],)
def test_coercion_basic(self, dtype, scalar):
+ # Note the `[scalar]` is there because np.array(scalar) uses stricter
+ # `scalar.__int__()` rules for backward compatibility right now.
arr = np.array(scalar, dtype=dtype)
cast = np.array(scalar).astype(dtype)
- ass = np.ones((), dtype=dtype)
- ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype)
-
assert_array_equal(arr, cast)
- assert_array_equal(cast, cast)
+
+ ass = np.ones((), dtype=dtype)
+ if issubclass(dtype, np.integer):
+ with pytest.raises(TypeError):
+ # raises, as would np.array([scalar], dtype=dtype), this is
+ # conversion from times, but behaviour of integers.
+ ass[()] = scalar
+ else:
+ ass[()] = scalar
+ assert_array_equal(ass, cast)
@pytest.mark.parametrize("dtype", [np.int64, np.float32])
@pytest.mark.parametrize("scalar",