diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2020-02-03 16:17:26 -0800 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2020-02-06 20:10:40 -0800 |
commit | 1a1611a33cfb5ea50d16d20affa5c6fa03e148d7 (patch) | |
tree | fb55b590501702b096a1fb3833c90589de1d86bb | |
parent | dae4f67c797176c66281101be8f3b4d6c424735c (diff) | |
download | numpy-1a1611a33cfb5ea50d16d20affa5c6fa03e148d7.tar.gz |
DEP: Do not allow "abstract" dtype conversion/creation
These dtypes do not really make sense as instances. We can (somewhat)
reasonably define np.dtype(np.int64) as the default (machine endianess)
int64. (Arguably, it is unclear that `np.array(arr_of_>f8, dtype="f")`
should return arr_of_<f8, but that would be very noisy!)
However, `np.integer` as equivalent to long, is not well defined.
Similarly, `dtype=Decimal` may be neat to spell `dtype=object` when you
intend to put Decimal objects into the array. But it is misleading,
since there is no special meaning to it at this time.
The biggest issue with it, is that `arr.astype(np.floating)` looks
like it will let float32 or float128 pass, but it will force a
float64 output! Arguably downcasting is a bug in this case.
A related issue is `np.dtype("S")` and especially "S0". The dtype "S"
does make sense for most or all places where `dtype=...` can be
passed. However, it is conceptionally different from other dtypes, since
it will not end up being attached to the array (unlike "S2" which
would be). The dtype "S" really means the type number/DType class
of String, and not a specific dtype instance.
-rw-r--r-- | doc/release/upcoming_changes/15534.deprecation.rst | 11 | ||||
-rw-r--r-- | numpy/core/defchararray.py | 18 | ||||
-rw-r--r-- | numpy/core/src/multiarray/descriptor.c | 17 | ||||
-rw-r--r-- | numpy/core/src/multiarray/scalarapi.c | 39 | ||||
-rw-r--r-- | numpy/core/tests/test_defchararray.py | 4 | ||||
-rw-r--r-- | numpy/core/tests/test_deprecations.py | 29 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 2 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 2 | ||||
-rw-r--r-- | numpy/core/tests/test_numerictypes.py | 10 | ||||
-rw-r--r-- | numpy/core/tests/test_regression.py | 2 | ||||
-rw-r--r-- | numpy/lib/_iotools.py | 17 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 8 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 2 |
13 files changed, 135 insertions, 26 deletions
diff --git a/doc/release/upcoming_changes/15534.deprecation.rst b/doc/release/upcoming_changes/15534.deprecation.rst new file mode 100644 index 000000000..243e224ba --- /dev/null +++ b/doc/release/upcoming_changes/15534.deprecation.rst @@ -0,0 +1,11 @@ +Converting certain types to dtypes is Deprecated +------------------------------------------------ +The super classes of scalar types, such as ``np.integer``, ``np.generic``, +or ``np.inexact`` will now give a deprecation warning when converted +to a dtype (or used in a dtype keyword argument). +The reason for this is that `np.integer` is converted to ``np.int_``, +while it would be expected to represent *any* integer (e.g. also +``int8``, ``int16``, etc. +For example, ``dtype=np.floating`` is currently identical to +``dtype=np.float64``, even though also ``np.float32`` is a subclass of +``np.floating``. diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py index 942a698a9..26a9013e6 100644 --- a/numpy/core/defchararray.py +++ b/numpy/core/defchararray.py @@ -17,7 +17,8 @@ The preferred alias for `defchararray` is `numpy.char`. """ import functools import sys -from .numerictypes import string_, unicode_, integer, object_, bool_, character +from .numerictypes import ( + string_, unicode_, integer, int_, object_, bool_, character) from .numeric import ndarray, compare_chararrays from .numeric import array as narray from numpy.core.multiarray import _vec_string @@ -276,7 +277,10 @@ def str_len(a): -------- builtins.len """ - return _vec_string(a, integer, '__len__') + # Note: __len__, etc. currently return ints, which are not C-integers. + # Generally intp would be expected for lengths, although int is sufficient + # due to the dtype itemsize limitation. + return _vec_string(a, int_, '__len__') @array_function_dispatch(_binary_op_dispatcher) @@ -500,7 +504,7 @@ def count(a, sub, start=0, end=None): array([1, 0, 0]) """ - return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end)) + return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end)) def _code_dispatcher(a, encoding=None, errors=None): @@ -710,7 +714,7 @@ def find(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'find', [sub, start] + _clean_args(end)) + a, int_, 'find', [sub, start] + _clean_args(end)) @array_function_dispatch(_count_dispatcher) @@ -739,7 +743,7 @@ def index(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'index', [sub, start] + _clean_args(end)) + a, int_, 'index', [sub, start] + _clean_args(end)) @array_function_dispatch(_unary_op_dispatcher) @@ -1199,7 +1203,7 @@ def rfind(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'rfind', [sub, start] + _clean_args(end)) + a, int_, 'rfind', [sub, start] + _clean_args(end)) @array_function_dispatch(_count_dispatcher) @@ -1229,7 +1233,7 @@ def rindex(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'rindex', [sub, start] + _clean_args(end)) + a, int_, 'rindex', [sub, start] + _clean_args(end)) @array_function_dispatch(_just_dispatcher) diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 215c8b0ab..0079aa86e 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1399,14 +1399,25 @@ _convert_from_type(PyObject *obj) { return PyArray_DescrFromType(NPY_BOOL); } else if (typ == &PyBytes_Type) { + /* + * TODO: This should be deprecated, and have special handling for + * dtype=bytes/"S" in coercion: It should not rely on "S0". + */ return PyArray_DescrFromType(NPY_STRING); } else if (typ == &PyUnicode_Type) { + /* + * TODO: This should be deprecated, and have special handling for + * dtype=str/"U" in coercion: It should not rely on "U0". + */ return PyArray_DescrFromType(NPY_UNICODE); } else if (typ == &PyMemoryView_Type) { return PyArray_DescrFromType(NPY_VOID); } + else if (typ == &PyBaseObject_Type) { + return PyArray_DescrFromType(NPY_OBJECT); + } else { PyArray_Descr *ret = _try_convert_from_dtype_attr(obj); if ((PyObject *)ret != Py_NotImplemented) { @@ -1425,6 +1436,12 @@ _convert_from_type(PyObject *obj) { } Py_DECREF(ret); + if (DEPRECATE("Converting a type/class not known to NumPy to a dtype " + "currently always returns `np.dtype(object)`. This loses " + "the type information and is deprecated.") < 0) { + return NULL; + } + /* All other classes are treated as object */ return PyArray_DescrFromType(NPY_OBJECT); } diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index 5c4332364..4cabc6bb3 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -435,23 +435,59 @@ PyArray_DescrFromTypeObject(PyObject *type) if ((type == (PyObject *) &PyNumberArrType_Type) || (type == (PyObject *) &PyInexactArrType_Type) || (type == (PyObject *) &PyFloatingArrType_Type)) { + if (DEPRECATE("Converting `np.inexact` or `np.floating` to " + "a dtype is deprecated. The current result is `float64` " + "which is not strictly correct.") < 0) { + return NULL; + } typenum = NPY_DOUBLE; } else if (type == (PyObject *)&PyComplexFloatingArrType_Type) { + if (DEPRECATE("Converting `np.complex` to a dtype is deprecated. " + "The current result is `complex128` which is not " + "strictly correct.") < 0) { + return NULL; + } typenum = NPY_CDOUBLE; } else if ((type == (PyObject *)&PyIntegerArrType_Type) || (type == (PyObject *)&PySignedIntegerArrType_Type)) { + if (DEPRECATE("Converting `np.integer` or `np.signedinteger` to " + "a dtype is deprecated. The current result is " + "`np.dtype(np.int_)` which is not strictly correct. " + "Note that the result depends on the system. To ensure " + "stable results use may want to use `np.int64` or " + "`np.int32`.") < 0) { + return NULL; + } typenum = NPY_LONG; } else if (type == (PyObject *) &PyUnsignedIntegerArrType_Type) { + if (DEPRECATE("Converting `np.unsignedinteger` to a dtype is " + "deprecated. The current result is `np.dtype(np.uint)` " + "which is not strictly correct. Note that the result " + "depends on the system. To ensure stable results you may " + "want to use `np.uint64` or `np.uint32`.") < 0) { + return NULL; + } typenum = NPY_ULONG; } else if (type == (PyObject *) &PyCharacterArrType_Type) { + if (DEPRECATE("Converting `np.character` to a dtype is deprecated. " + "The current result is `np.dtype(np.str_)` " + "which is not strictly correct. Note that `np.character` " + "is generally deprecated and 'S1' should be used.") < 0) { + return NULL; + } typenum = NPY_STRING; } else if ((type == (PyObject *) &PyGenericArrType_Type) || (type == (PyObject *) &PyFlexibleArrType_Type)) { + if (DEPRECATE("Converting `np.generic` to a dtype is " + "deprecated. The current result is `np.dtype(np.void)` " + "which is not strictly correct.") < 0) { + return NULL; + } typenum = NPY_VOID; } @@ -561,6 +597,9 @@ PyArray_DescrFromScalar(PyObject *sc) } descr = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(sc)); + if (descr == NULL) { + return NULL; + } if (PyDataType_ISUNSIZED(descr)) { PyArray_DESCR_REPLACE(descr); type_num = descr->type_num; diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py index 39600553d..bbb94f7d3 100644 --- a/numpy/core/tests/test_defchararray.py +++ b/numpy/core/tests/test_defchararray.py @@ -119,14 +119,14 @@ class TestVecString: def test_invalid_result_type(self): def fail(): - _vec_string(['a'], np.integer, 'strip') + _vec_string(['a'], np.int_, 'strip') assert_raises(TypeError, fail) def test_broadcast_error(self): def fail(): - _vec_string([['abc', 'def']], np.integer, 'find', (['a', 'd', 'j'],)) + _vec_string([['abc', 'def']], np.int_, 'find', (['a', 'd', 'j'],)) assert_raises(ValueError, fail) diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 01b35ec90..a89fc70d5 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -547,3 +547,32 @@ def test_deprecate_ragged_arrays(): with assert_warns(np.VisibleDeprecationWarning): np.array(arg) + +class TestAbstractDTypeCoercion(_DeprecationTestCase): + # 2020-02-06 1.19.0 + message = "Converting .* to a dtype .*is deprecated" + deprecated_types = [ + # The builtin scalar super types: + np.generic, np.flexible, np.number, + np.inexact, np.floating, np.complexfloating, + np.integer, np.unsignedinteger, np.signedinteger, + # character is a deprecated S1 special case: + np.character, + # Test python types that do not map to a NumPy type cleanly + # (currenlty map to object) + type, list, tuple, dict, + ] + + def test_dtype_coercion(self): + for scalar_type in self.deprecated_types: + self.assert_deprecated(np.dtype, args=(scalar_type,)) + + def test_array_construction(self): + for scalar_type in self.deprecated_types: + self.assert_deprecated(np.array, args=([], scalar_type,)) + + def test_not_deprecated(self): + # All specific types are not deprecated: + for group in np.sctypes.values(): + for scalar_type in group: + self.assert_not_deprecated(np.dtype, args=(scalar_type,)) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index ad38911cb..f6181c900 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -6456,7 +6456,7 @@ class TestRepeat: NEIGH_MODE = {'zero': 0, 'one': 1, 'constant': 2, 'circular': 3, 'mirror': 4} -@pytest.mark.parametrize('dt', [float, Decimal], ids=['float', 'object']) +@pytest.mark.parametrize('dt', [float, object], ids=['float', 'object']) class TestNeighborhoodIter: # Simple, 2d tests def test_simple2d(self, dt): diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 3bc4cd187..0f7ac036f 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -2543,7 +2543,7 @@ class TestCorrelate: assert_array_almost_equal(z, self.zs) def test_object(self): - self._setup(Decimal) + self._setup(object) z = np.correlate(self.x, self.y, 'full') assert_array_almost_equal(z, self.z1) z = np.correlate(self.y, self.x, 'full') diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py index c72d13947..22fc2ed58 100644 --- a/numpy/core/tests/test_numerictypes.py +++ b/numpy/core/tests/test_numerictypes.py @@ -3,7 +3,8 @@ import itertools import pytest import numpy as np -from numpy.testing import assert_, assert_equal, assert_raises, IS_PYPY +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_warns, IS_PYPY) # This is the structure of the table used for plain objects: # @@ -451,8 +452,11 @@ class Test_sctype2char: def test_other_type(self): assert_equal(np.sctype2char(float), 'd') - assert_equal(np.sctype2char(list), 'O') - assert_equal(np.sctype2char(np.ndarray), 'O') + assert_equal(np.sctype2char(object), 'O') + with assert_warns(DeprecationWarning): + assert_equal(np.sctype2char(list), 'O') + with assert_warns(DeprecationWarning): + assert_equal(np.sctype2char(np.ndarray), 'O') def test_third_party_scalar_type(self): from numpy.core._rational_tests import rational diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 321723b9b..50a543625 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1152,7 +1152,7 @@ class TestRegression: assert_(dat.argmax(1).info == 'jubba') assert_(dat.argmin(1).info == 'jubba') assert_(dat.argsort(1).info == 'jubba') - assert_(dat.astype(TestArray).info == 'jubba') + assert_(dat.astype(object).info == 'jubba') assert_(dat.byteswap().info == 'jubba') assert_(dat.clip(2, 7).info == 'jubba') assert_(dat.compress([0, 1, 1]).info == 'jubba') diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 251d2d2a7..48d130bac 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -504,18 +504,23 @@ class StringConverter: """ # _mapper = [(nx.bool_, str2bool, False), - (nx.integer, int, -1)] + (nx.int_, int, -1),] # On 32-bit systems, we need to make sure that we explicitly include - # nx.int64 since ns.integer is nx.int32. - if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize: + # nx.int64 since ns.int_ is nx.int32. + if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize: _mapper.append((nx.int64, int, -1)) - _mapper.extend([(nx.floating, float, nx.nan), - (nx.complexfloating, complex, nx.nan + 0j), + _mapper.extend([(nx.float64, float, nx.nan), + (nx.complex128, complex, nx.nan + 0j), (nx.longdouble, nx.longdouble, nx.nan), (nx.unicode_, asunicode, '???'), - (nx.string_, asbytes, '???')]) + (nx.string_, asbytes, '???'), + # If a non-default dtype is passed, fall back to generic + # ones (should only be used for the converter) + (nx.integer, int, -1), + (nx.floating, float, nx.nan), + (nx.complexfloating, complex, nx.nan + 0j),]) (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 7953de15d..3fb27bbbe 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1899,7 +1899,7 @@ class TestCov: frequencies = np.array([1, 4, 1]) x2_repeats = np.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T res2 = np.array([[0.4, -0.4], [-0.4, 0.4]]) - unit_frequencies = np.ones(3, dtype=np.integer) + unit_frequencies = np.ones(3, dtype=np.int_) weights = np.array([1.0, 4.0, 1.0]) res3 = np.array([[2. / 3., -2. / 3.], [-2. / 3., 2. / 3.]]) unit_weights = np.ones(3) @@ -1952,11 +1952,11 @@ class TestCov: self.res1) nonint = self.frequencies + 0.5 assert_raises(TypeError, cov, self.x1, fweights=nonint) - f = np.ones((2, 3), dtype=np.integer) + f = np.ones((2, 3), dtype=np.int_) assert_raises(RuntimeError, cov, self.x1, fweights=f) - f = np.ones(2, dtype=np.integer) + f = np.ones(2, dtype=np.int_) assert_raises(RuntimeError, cov, self.x1, fweights=f) - f = -1 * np.ones(3, dtype=np.integer) + f = -1 * np.ones(3, dtype=np.int_) assert_raises(ValueError, cov, self.x1, fweights=f) def test_aweights(self): diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 436cd1d24..db9f35f2a 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -2332,7 +2332,7 @@ M 33 21.99 assert_(test.dtype['f0'] == float) assert_(test.dtype['f1'] == np.int64) - assert_(test.dtype['f2'] == np.integer) + assert_(test.dtype['f2'] == np.int_) assert_allclose(test['f0'], 73786976294838206464.) assert_equal(test['f1'], 17179869184) |