DEP: Do not allow "abstract" dtype conversion/creation

These dtypes do not really make sense as instances. We can (somewhat) reasonably define np.dtype(np.int64) as the default (machine endianess) int64. (Arguably, it is unclear that `np.array(arr_of_>f8, dtype="f")` should return arr_of_<f8, but that would be very noisy!) However, `np.integer` as equivalent to long, is not well defined. Similarly, `dtype=Decimal` may be neat to spell `dtype=object` when you intend to put Decimal objects into the array. But it is misleading, since there is no special meaning to it at this time. The biggest issue with it, is that `arr.astype(np.floating)` looks like it will let float32 or float128 pass, but it will force a float64 output! Arguably downcasting is a bug in this case. A related issue is `np.dtype("S")` and especially "S0". The dtype "S" does make sense for most or all places where `dtype=...` can be passed. However, it is conceptionally different from other dtypes, since it will not end up being attached to the array (unlike "S2" which would be). The dtype "S" really means the type number/DType class of String, and not a specific dtype instance.
author: Sebastian Berg <sebastian@sipsolutions.net> 2020-02-03 16:17:26 -0800
committer: Sebastian Berg <sebastian@sipsolutions.net> 2020-02-06 20:10:40 -0800
commit: 1a1611a33cfb5ea50d16d20affa5c6fa03e148d7 (patch)
tree: fb55b590501702b096a1fb3833c90589de1d86bb
parent: dae4f67c797176c66281101be8f3b4d6c424735c (diff)
download: numpy-1a1611a33cfb5ea50d16d20affa5c6fa03e148d7.tar.gz
13 files changed, 135 insertions, 26 deletions
diff --git a/doc/release/upcoming_changes/15534.deprecation.rst b/doc/release/upcoming_changes/15534.deprecation.rst
new file mode 100644
index 000000000..243e224ba
--- /dev/null
+++ b/doc/release/upcoming_changes/15534.deprecation.rst
@@ -0,0 +1,11 @@
+Converting certain types to dtypes is Deprecated
+------------------------------------------------
+The super classes of scalar types, such as ``np.integer``, ``np.generic``,
+or ``np.inexact`` will now give a deprecation warning when converted
+to a dtype (or used in a dtype keyword argument).
+The reason for this is that `np.integer` is converted to ``np.int_``,
+while it would be expected to represent *any* integer (e.g. also
+``int8``, ``int16``, etc.
+For example, ``dtype=np.floating`` is currently identical to
+``dtype=np.float64``, even though also ``np.float32`` is a subclass of
+``np.floating``.
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 942a698a9..26a9013e6 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -17,7 +17,8 @@ The preferred alias for `defchararray` is `numpy.char`.
 """
 import functools
 import sys
-from .numerictypes import string_, unicode_, integer, object_, bool_, character
+from .numerictypes import (
+    string_, unicode_, integer, int_, object_, bool_, character)
 from .numeric import ndarray, compare_chararrays
 from .numeric import array as narray
 from numpy.core.multiarray import _vec_string
@@ -276,7 +277,10 @@ def str_len(a):
     --------
     builtins.len
     """
-    return _vec_string(a, integer, '__len__')
+    # Note: __len__, etc. currently return ints, which are not C-integers.
+    # Generally intp would be expected for lengths, although int is sufficient
+    # due to the dtype itemsize limitation.
+    return _vec_string(a, int_, '__len__')
 
 
 @array_function_dispatch(_binary_op_dispatcher)
@@ -500,7 +504,7 @@ def count(a, sub, start=0, end=None):
     array([1, 0, 0])
 
     """
-    return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
+    return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
 
 
 def _code_dispatcher(a, encoding=None, errors=None):
@@ -710,7 +714,7 @@ def find(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'find', [sub, start] + _clean_args(end))
+        a, int_, 'find', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_count_dispatcher)
@@ -739,7 +743,7 @@ def index(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'index', [sub, start] + _clean_args(end))
+        a, int_, 'index', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_unary_op_dispatcher)
@@ -1199,7 +1203,7 @@ def rfind(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'rfind', [sub, start] + _clean_args(end))
+        a, int_, 'rfind', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_count_dispatcher)
@@ -1229,7 +1233,7 @@ def rindex(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'rindex', [sub, start] + _clean_args(end))
+        a, int_, 'rindex', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_just_dispatcher)
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 215c8b0ab..0079aa86e 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -1399,14 +1399,25 @@ _convert_from_type(PyObject *obj) {
         return PyArray_DescrFromType(NPY_BOOL);
     }
     else if (typ == &PyBytes_Type) {
+        /*
+         * TODO: This should be deprecated, and have special handling for
+         *       dtype=bytes/"S" in coercion: It should not rely on "S0".
+         */
         return PyArray_DescrFromType(NPY_STRING);
     }
     else if (typ == &PyUnicode_Type) {
+        /*
+         * TODO: This should be deprecated, and have special handling for
+         *       dtype=str/"U" in coercion: It should not rely on "U0".
+         */
         return PyArray_DescrFromType(NPY_UNICODE);
     }
     else if (typ == &PyMemoryView_Type) {
         return PyArray_DescrFromType(NPY_VOID);
     }
+    else if (typ == &PyBaseObject_Type) {
+        return PyArray_DescrFromType(NPY_OBJECT);
+    }
     else {
         PyArray_Descr *ret = _try_convert_from_dtype_attr(obj);
         if ((PyObject *)ret != Py_NotImplemented) {
@@ -1425,6 +1436,12 @@ _convert_from_type(PyObject *obj) {
         }
         Py_DECREF(ret);
 
+        if (DEPRECATE("Converting a type/class not known to NumPy to a dtype "
+                      "currently always returns `np.dtype(object)`. This loses "
+                      "the type information and is deprecated.") < 0) {
+            return NULL;
+        }
+
         /* All other classes are treated as object */
         return PyArray_DescrFromType(NPY_OBJECT);
     }
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index 5c4332364..4cabc6bb3 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -435,23 +435,59 @@ PyArray_DescrFromTypeObject(PyObject *type)
     if ((type == (PyObject *) &PyNumberArrType_Type) ||
             (type == (PyObject *) &PyInexactArrType_Type) ||
             (type == (PyObject *) &PyFloatingArrType_Type)) {
+        if (DEPRECATE("Converting `np.inexact` or `np.floating` to "
+                      "a dtype is deprecated. The current result is `float64` "
+                      "which is not strictly correct.") < 0) {
+            return NULL;
+        }
         typenum = NPY_DOUBLE;
     }
     else if (type == (PyObject *)&PyComplexFloatingArrType_Type) {
+        if (DEPRECATE("Converting `np.complex` to a dtype is deprecated. "
+                      "The current result is `complex128` which is not "
+                      "strictly correct.") < 0) {
+            return NULL;
+        }
         typenum = NPY_CDOUBLE;
     }
     else if ((type == (PyObject *)&PyIntegerArrType_Type) ||
             (type == (PyObject *)&PySignedIntegerArrType_Type)) {
+        if (DEPRECATE("Converting `np.integer` or `np.signedinteger` to "
+                      "a dtype is deprecated. The current result is "
+                      "`np.dtype(np.int_)` which is not strictly correct. "
+                      "Note that the result depends on the system. To ensure "
+                      "stable results use may want to use `np.int64` or "
+                      "`np.int32`.") < 0) {
+            return NULL;
+        }
         typenum = NPY_LONG;
     }
     else if (type == (PyObject *) &PyUnsignedIntegerArrType_Type) {
+        if (DEPRECATE("Converting `np.unsignedinteger` to a dtype is "
+                      "deprecated. The current result is `np.dtype(np.uint)` "
+                      "which is not strictly correct. Note that the result "
+                      "depends on the system. To ensure stable results you may "
+                      "want to use `np.uint64` or `np.uint32`.") < 0) {
+            return NULL;
+        }
         typenum = NPY_ULONG;
     }
     else if (type == (PyObject *) &PyCharacterArrType_Type) {
+        if (DEPRECATE("Converting `np.character` to a dtype is deprecated. "
+                      "The current result is `np.dtype(np.str_)` "
+                      "which is not strictly correct. Note that `np.character` "
+                      "is generally deprecated and 'S1' should be used.") < 0) {
+            return NULL;
+        }
         typenum = NPY_STRING;
     }
     else if ((type == (PyObject *) &PyGenericArrType_Type) ||
             (type == (PyObject *) &PyFlexibleArrType_Type)) {
+        if (DEPRECATE("Converting `np.generic` to a dtype is "
+                      "deprecated. The current result is `np.dtype(np.void)` "
+                      "which is not strictly correct.") < 0) {
+            return NULL;
+        }
         typenum = NPY_VOID;
     }
 
@@ -561,6 +597,9 @@ PyArray_DescrFromScalar(PyObject *sc)
     }
 
     descr = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(sc));
+    if (descr == NULL) {
+        return NULL;
+    }
     if (PyDataType_ISUNSIZED(descr)) {
         PyArray_DESCR_REPLACE(descr);
         type_num = descr->type_num;
diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py
index 39600553d..bbb94f7d3 100644
--- a/numpy/core/tests/test_defchararray.py
+++ b/numpy/core/tests/test_defchararray.py
@@ -119,14 +119,14 @@ class TestVecString:
     def test_invalid_result_type(self):
 
         def fail():
-            _vec_string(['a'], np.integer, 'strip')
+            _vec_string(['a'], np.int_, 'strip')
 
         assert_raises(TypeError, fail)
 
     def test_broadcast_error(self):
 
         def fail():
-            _vec_string([['abc', 'def']], np.integer, 'find', (['a', 'd', 'j'],))
+            _vec_string([['abc', 'def']], np.int_, 'find', (['a', 'd', 'j'],))
 
         assert_raises(ValueError, fail)
 
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 01b35ec90..a89fc70d5 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -547,3 +547,32 @@ def test_deprecate_ragged_arrays():
     with assert_warns(np.VisibleDeprecationWarning):
         np.array(arg)
 
+
+class TestAbstractDTypeCoercion(_DeprecationTestCase):
+    # 2020-02-06 1.19.0
+    message = "Converting .* to a dtype .*is deprecated"
+    deprecated_types = [
+        # The builtin scalar super types:
+        np.generic, np.flexible, np.number,
+        np.inexact, np.floating, np.complexfloating,
+        np.integer, np.unsignedinteger, np.signedinteger,
+        # character is a deprecated S1 special case:
+        np.character,
+        # Test python types that do not map to a NumPy type cleanly
+        # (currenlty map to object)
+        type, list, tuple, dict,
+    ]
+
+    def test_dtype_coercion(self):
+        for scalar_type in self.deprecated_types:
+            self.assert_deprecated(np.dtype, args=(scalar_type,))
+
+    def test_array_construction(self):
+        for scalar_type in self.deprecated_types:
+            self.assert_deprecated(np.array, args=([], scalar_type,))
+
+    def test_not_deprecated(self):
+        # All specific types are not deprecated:
+        for group in np.sctypes.values():
+            for scalar_type in group:
+                self.assert_not_deprecated(np.dtype, args=(scalar_type,))
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index ad38911cb..f6181c900 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -6456,7 +6456,7 @@ class TestRepeat:
 NEIGH_MODE = {'zero': 0, 'one': 1, 'constant': 2, 'circular': 3, 'mirror': 4}
 
 
-@pytest.mark.parametrize('dt', [float, Decimal], ids=['float', 'object'])
+@pytest.mark.parametrize('dt', [float, object], ids=['float', 'object'])
 class TestNeighborhoodIter:
     # Simple, 2d tests
     def test_simple2d(self, dt):
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 3bc4cd187..0f7ac036f 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -2543,7 +2543,7 @@ class TestCorrelate:
         assert_array_almost_equal(z, self.zs)
 
     def test_object(self):
-        self._setup(Decimal)
+        self._setup(object)
         z = np.correlate(self.x, self.y, 'full')
         assert_array_almost_equal(z, self.z1)
         z = np.correlate(self.y, self.x, 'full')
diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py
index c72d13947..22fc2ed58 100644
--- a/numpy/core/tests/test_numerictypes.py
+++ b/numpy/core/tests/test_numerictypes.py
@@ -3,7 +3,8 @@ import itertools
 
 import pytest
 import numpy as np
-from numpy.testing import assert_, assert_equal, assert_raises, IS_PYPY
+from numpy.testing import (
+    assert_, assert_equal, assert_raises, assert_warns, IS_PYPY)
 
 # This is the structure of the table used for plain objects:
 #
@@ -451,8 +452,11 @@ class Test_sctype2char:
 
     def test_other_type(self):
         assert_equal(np.sctype2char(float), 'd')
-        assert_equal(np.sctype2char(list), 'O')
-        assert_equal(np.sctype2char(np.ndarray), 'O')
+        assert_equal(np.sctype2char(object), 'O')
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.sctype2char(list), 'O')
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.sctype2char(np.ndarray), 'O')
 
     def test_third_party_scalar_type(self):
         from numpy.core._rational_tests import rational
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index 321723b9b..50a543625 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -1152,7 +1152,7 @@ class TestRegression:
         assert_(dat.argmax(1).info == 'jubba')
         assert_(dat.argmin(1).info == 'jubba')
         assert_(dat.argsort(1).info == 'jubba')
-        assert_(dat.astype(TestArray).info == 'jubba')
+        assert_(dat.astype(object).info == 'jubba')
         assert_(dat.byteswap().info == 'jubba')
         assert_(dat.clip(2, 7).info == 'jubba')
         assert_(dat.compress([0, 1, 1]).info == 'jubba')
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 251d2d2a7..48d130bac 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -504,18 +504,23 @@ class StringConverter:
     """
     #
     _mapper = [(nx.bool_, str2bool, False),
-               (nx.integer, int, -1)]
+               (nx.int_, int, -1),]
 
     # On 32-bit systems, we need to make sure that we explicitly include
-    # nx.int64 since ns.integer is nx.int32.
-    if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize:
+    # nx.int64 since ns.int_ is nx.int32.
+    if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize:
         _mapper.append((nx.int64, int, -1))
 
-    _mapper.extend([(nx.floating, float, nx.nan),
-                    (nx.complexfloating, complex, nx.nan + 0j),
+    _mapper.extend([(nx.float64, float, nx.nan),
+                    (nx.complex128, complex, nx.nan + 0j),
                     (nx.longdouble, nx.longdouble, nx.nan),
                     (nx.unicode_, asunicode, '???'),
-                    (nx.string_, asbytes, '???')])
+                    (nx.string_, asbytes, '???'),
+                    # If a non-default dtype is passed, fall back to generic
+                    # ones (should only be used for the converter)
+                    (nx.integer, int, -1),
+                    (nx.floating, float, nx.nan),
+                    (nx.complexfloating, complex, nx.nan + 0j),])
 
     (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
 
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 7953de15d..3fb27bbbe 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1899,7 +1899,7 @@ class TestCov:
     frequencies = np.array([1, 4, 1])
     x2_repeats = np.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T
     res2 = np.array([[0.4, -0.4], [-0.4, 0.4]])
-    unit_frequencies = np.ones(3, dtype=np.integer)
+    unit_frequencies = np.ones(3, dtype=np.int_)
     weights = np.array([1.0, 4.0, 1.0])
     res3 = np.array([[2. / 3., -2. / 3.], [-2. / 3., 2. / 3.]])
     unit_weights = np.ones(3)
@@ -1952,11 +1952,11 @@ class TestCov:
                         self.res1)
         nonint = self.frequencies + 0.5
         assert_raises(TypeError, cov, self.x1, fweights=nonint)
-        f = np.ones((2, 3), dtype=np.integer)
+        f = np.ones((2, 3), dtype=np.int_)
         assert_raises(RuntimeError, cov, self.x1, fweights=f)
-        f = np.ones(2, dtype=np.integer)
+        f = np.ones(2, dtype=np.int_)
         assert_raises(RuntimeError, cov, self.x1, fweights=f)
-        f = -1 * np.ones(3, dtype=np.integer)
+        f = -1 * np.ones(3, dtype=np.int_)
         assert_raises(ValueError, cov, self.x1, fweights=f)
 
     def test_aweights(self):
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 436cd1d24..db9f35f2a 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -2332,7 +2332,7 @@ M   33  21.99
 
         assert_(test.dtype['f0'] == float)
         assert_(test.dtype['f1'] == np.int64)
-        assert_(test.dtype['f2'] == np.integer)
+        assert_(test.dtype['f2'] == np.int_)
 
         assert_allclose(test['f0'], 73786976294838206464.)
         assert_equal(test['f1'], 17179869184)
author	Sebastian Berg <sebastian@sipsolutions.net>	2020-02-03 16:17:26 -0800
committer	Sebastian Berg <sebastian@sipsolutions.net>	2020-02-06 20:10:40 -0800
commit	1a1611a33cfb5ea50d16d20affa5c6fa03e148d7 (patch)
tree	fb55b590501702b096a1fb3833c90589de1d86bb
parent	dae4f67c797176c66281101be8f3b4d6c424735c (diff)
download	numpy-1a1611a33cfb5ea50d16d20affa5c6fa03e148d7.tar.gz