diff options
-rw-r--r-- | doc/release/1.17.0-notes.rst | 6 | ||||
-rw-r--r-- | numpy/core/_add_newdocs.py | 13 | ||||
-rw-r--r-- | numpy/core/multiarray.py | 14 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 81 | ||||
-rw-r--r-- | numpy/core/tests/test_nditer.py | 2 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 37 | ||||
-rw-r--r-- | numpy/doc/structured_arrays.py | 7 |
7 files changed, 137 insertions, 23 deletions
diff --git a/doc/release/1.17.0-notes.rst b/doc/release/1.17.0-notes.rst index 58d38cba9..297dbbf2d 100644 --- a/doc/release/1.17.0-notes.rst +++ b/doc/release/1.17.0-notes.rst @@ -134,6 +134,12 @@ input. Note that we generally recommend the scipy implementation over the numpy one: it is a proper ufunc written in C, and more than an order of magnitude faster. +``np.can_cast`` no longer assumes all unsafe casting is allowed +--------------------------------------------------------------- +Previously, ``can_cast`` returned `True` for almost all inputs for +``casting='unsafe'``, even for cases where casting was not possible, such as +from a structured dtype to a regular one. This has been fixed, making it +more consistent with actual casting using, e.g., the ``.astype`` method. C API changes ============= diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 9f2b67a6b..0b4bf3342 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -2699,10 +2699,15 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('astype', Notes ----- - Starting in NumPy 1.9, astype method now returns an error if the string - dtype to cast to is not long enough in 'safe' casting mode to hold the max - value of integer/float array that is being casted. Previously the casting - was allowed even if the result was truncated. + .. versionchanged:: 1.17.0 + Casting between a simple data type and a structured one is possible only + for "unsafe" casting. Casting to multiple fields is allowed, but + casting from multiple fields is not. + + .. versionchanged:: 1.9.0 + Casting from numeric to string types in 'safe' casting mode requires + that the string dtype length is long enough to store the max + integer/float value converted. Raises ------ diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 8006dd9b5..118562c89 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -496,11 +496,15 @@ def can_cast(from_, to, casting=None): Notes ----- - Starting in NumPy 1.9, can_cast function now returns False in 'safe' - casting mode for integer/float dtype and string dtype if the string dtype - length is not long enough to store the max integer/float value converted - to a string. Previously can_cast in 'safe' mode returned True for - integer/float dtype and a string dtype of any length. + .. versionchanged:: 1.17.0 + Casting between a simple data type and a structured one is possible only + for "unsafe" casting. Casting to multiple fields is allowed, but + casting from multiple fields is not. + + .. versionchanged:: 1.9.0 + Casting from numeric to string types in 'safe' casting mode requires + that the string dtype length is long enough to store the maximum + integer/float value converted. See also -------- diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index c59979e75..8e24d6361 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -679,15 +679,82 @@ NPY_NO_EXPORT npy_bool PyArray_CanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to, NPY_CASTING casting) { - /* Fast path for unsafe casts or basic types */ - if (casting == NPY_UNSAFE_CASTING || - (NPY_LIKELY(from->type_num < NPY_OBJECT) && - NPY_LIKELY(from->type_num == to->type_num) && - NPY_LIKELY(from->byteorder == to->byteorder))) { + /* + * Fast paths for equality and for basic types. + */ + if (from == to || + ((NPY_LIKELY(PyDataType_ISNUMBER(from)) || + PyDataType_ISOBJECT(from)) && + NPY_LIKELY(from->type_num == to->type_num) && + NPY_LIKELY(from->byteorder == to->byteorder))) { + return 1; + } + /* + * Cases with subarrays and fields need special treatment. + */ + if (PyDataType_HASFIELDS(from)) { + /* + * If from is a structured data type, then it can be cast to a simple + * non-object one only for unsafe casting *and* if it has a single + * field; recurse just in case the single field is itself structured. + */ + if (!PyDataType_HASFIELDS(to) && !PyDataType_ISOBJECT(to)) { + if (casting == NPY_UNSAFE_CASTING && + PyDict_Size(from->fields) == 1) { + Py_ssize_t ppos = 0; + PyObject *tuple; + PyArray_Descr *field; + PyDict_Next(from->fields, &ppos, NULL, &tuple); + field = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0); + /* + * For a subarray, we need to get the underlying type; + * since we already are casting unsafely, we can ignore + * the shape. + */ + if (PyDataType_HASSUBARRAY(field)) { + field = field->subarray->base; + } + return PyArray_CanCastTypeTo(field, to, casting); + } + else { + return 0; + } + } + /* + * Casting from one structured data type to another depends on the fields; + * we pass that case on to the EquivTypenums case below. + * + * TODO: move that part up here? Need to check whether equivalent type + * numbers is an addition constraint that is needed. + * + * TODO/FIXME: For now, always allow structured to structured for unsafe + * casting; this is not correct, but needed since the treatment in can_cast + * below got out of sync with astype; see gh-13667. + */ + if (casting == NPY_UNSAFE_CASTING) { + return 1; + } + } + else if (PyDataType_HASFIELDS(to)) { + /* + * If "from" is a simple data type and "to" has fields, then only + * unsafe casting works (and that works always, even to multiple fields). + */ + return casting == NPY_UNSAFE_CASTING; + } + /* + * Everything else we consider castable for unsafe for now. + * FIXME: ensure what we do here is consistent with "astype", + * i.e., deal more correctly with subarrays and user-defined dtype. + */ + else if (casting == NPY_UNSAFE_CASTING) { return 1; } - /* Equivalent types can be cast with any value of 'casting' */ - else if (PyArray_EquivTypenums(from->type_num, to->type_num)) { + /* + * Equivalent simple types can be cast with any value of 'casting', but + * we need to be careful about structured to structured. + */ + if (PyArray_EquivTypenums(from->type_num, to->type_num)) { /* For complicated case, use EquivTypes (for now) */ if (PyTypeNum_ISUSERDEF(from->type_num) || from->subarray != NULL) { diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index 9499bedec..cf66751f8 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -1864,7 +1864,7 @@ def test_iter_buffered_cast_structured_type(): # make sure multi-field struct type -> simple doesn't work sdt = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] a = np.array([(5.5, 7, 'test'), (8, 10, 11)], dtype=sdt) - assert_raises(ValueError, lambda: ( + assert_raises(TypeError, lambda: ( nditer(a, ['buffered', 'refs_ok'], ['readonly'], casting='unsafe', op_dtypes='i4'))) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 7b6357fe8..1db89e81f 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -899,6 +899,41 @@ class TestTypes(object): # Also test keyword arguments assert_(np.can_cast(from_=np.int32, to=np.int64)) + def test_can_cast_simple_to_structured(self): + # Non-structured can only be cast to structured in 'unsafe' mode. + assert_(not np.can_cast('i4', 'i4,i4')) + assert_(not np.can_cast('i4', 'i4,i2')) + assert_(np.can_cast('i4', 'i4,i4', casting='unsafe')) + assert_(np.can_cast('i4', 'i4,i2', casting='unsafe')) + # Even if there is just a single field which is OK. + assert_(not np.can_cast('i2', [('f1', 'i4')])) + assert_(not np.can_cast('i2', [('f1', 'i4')], casting='same_kind')) + assert_(np.can_cast('i2', [('f1', 'i4')], casting='unsafe')) + # It should be the same for recursive structured or subarrays. + assert_(not np.can_cast('i2', [('f1', 'i4,i4')])) + assert_(np.can_cast('i2', [('f1', 'i4,i4')], casting='unsafe')) + assert_(not np.can_cast('i2', [('f1', '(2,3)i4')])) + assert_(np.can_cast('i2', [('f1', '(2,3)i4')], casting='unsafe')) + + def test_can_cast_structured_to_simple(self): + # Need unsafe casting for structured to simple. + assert_(not np.can_cast([('f1', 'i4')], 'i4')) + assert_(np.can_cast([('f1', 'i4')], 'i4', casting='unsafe')) + assert_(np.can_cast([('f1', 'i4')], 'i2', casting='unsafe')) + # Since it is unclear what is being cast, multiple fields to + # single should not work even for unsafe casting. + assert_(not np.can_cast('i4,i4', 'i4', casting='unsafe')) + # But a single field inside a single field is OK. + assert_(not np.can_cast([('f1', [('x', 'i4')])], 'i4')) + assert_(np.can_cast([('f1', [('x', 'i4')])], 'i4', casting='unsafe')) + # And a subarray is fine too - it will just take the first element + # (arguably not very consistently; might also take the first field). + assert_(not np.can_cast([('f0', '(3,)i4')], 'i4')) + assert_(np.can_cast([('f0', '(3,)i4')], 'i4', casting='unsafe')) + # But a structured subarray with multiple fields should fail. + assert_(not np.can_cast([('f0', ('i4,i4'), (2,))], 'i4', + casting='unsafe')) + def test_can_cast_values(self): # gh-5917 for dt in np.sctypes['int'] + np.sctypes['uint']: @@ -1207,7 +1242,7 @@ class TestNonzero(object): a = np.array([[False], [TrueThenFalse()]]) assert_raises(RuntimeError, np.nonzero, a) - + class TestIndex(object): def test_boolean(self): a = rand(3, 5, 8) diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index c0437dc07..1343d2adc 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -340,11 +340,8 @@ structured datatype has just a single field:: >>> nostruct = np.zeros(2, dtype='i4') >>> nostruct[:] = twofield Traceback (most recent call last): - File "<stdin>", line 1, in <module> - ValueError: Can't cast from structure to non-structure, except if the structure only has a single field. - >>> nostruct[:] = onefield - >>> nostruct - array([0, 0], dtype=int32) + ... + TypeError: Cannot cast scalar from dtype([('A', '<i4'), ('B', '<i4')]) to dtype('int32') according to the rule 'unsafe' Assignment from other Structured Arrays ``````````````````````````````````````` |