Merge pull request #15534 from seberg/deprecate-abstract-scalar-types

DEP: Do not allow "abstract" dtype conversion/creation
author: Matti Picus <matti.picus@gmail.com> 2020-03-06 00:23:43 +0200
committer: GitHub <noreply@github.com> 2020-03-06 00:23:43 +0200
commit: ff4cfe7ecd46ee15fd88297964f6a5cb5423c291 (patch)
tree: ab318ae312db68d4329e23ade0457a9f026dbb4c
parent: 901211eddd5e17c8c7c85bc5f791679559d4efb5 (diff)
parent: a5a653d25e7329b9366be6f44b052d41bf297b0f (diff)
download: numpy-ff4cfe7ecd46ee15fd88297964f6a5cb5423c291.tar.gz
10 files changed, 141 insertions, 23 deletions
diff --git a/doc/release/upcoming_changes/15534.deprecation.rst b/doc/release/upcoming_changes/15534.deprecation.rst
new file mode 100644
index 000000000..243e224ba
--- /dev/null
+++ b/doc/release/upcoming_changes/15534.deprecation.rst
@@ -0,0 +1,11 @@
+Converting certain types to dtypes is Deprecated
+------------------------------------------------
+The super classes of scalar types, such as ``np.integer``, ``np.generic``,
+or ``np.inexact`` will now give a deprecation warning when converted
+to a dtype (or used in a dtype keyword argument).
+The reason for this is that `np.integer` is converted to ``np.int_``,
+while it would be expected to represent *any* integer (e.g. also
+``int8``, ``int16``, etc.
+For example, ``dtype=np.floating`` is currently identical to
+``dtype=np.float64``, even though also ``np.float32`` is a subclass of
+``np.floating``.
diff --git a/doc/source/reference/arrays.dtypes.rst b/doc/source/reference/arrays.dtypes.rst
index e97ee3c3a..f8d40b13c 100644
--- a/doc/source/reference/arrays.dtypes.rst
+++ b/doc/source/reference/arrays.dtypes.rst
@@ -156,6 +156,14 @@ Array-scalar types
 
 Generic types
 
+    .. deprecated NumPy 1.19::
+
+        The use of generic types is deprecated. This is because it can be
+        unexpected in a context such as ``arr.astype(dtype=np.floating)``.
+        ``arr.astype(dtype=np.floating)`` which casts an array of ``float32``
+        to an array of ``float64``, even though ``float32`` is a subdtype of
+        ``np.floating``.
+
     The generic hierarchical type objects convert to corresponding
     type objects according to the associations:
 
@@ -179,8 +187,7 @@ Built-in Python types
     :class:`float`    :class:`float\_`
     :class:`complex`  :class:`cfloat`
     :class:`bytes`    :class:`bytes\_`
-    :class:`str`      :class:`bytes\_` (Python2) or :class:`unicode\_` (Python3)
-    :class:`unicode`  :class:`unicode\_`
+    :class:`str`      :class:`str\_`
     :class:`buffer`   :class:`void`
     (all others)      :class:`object_`
     ================  ===============
@@ -196,6 +203,11 @@ Built-in Python types
        >>> dt = np.dtype(int)     # Python-compatible integer
        >>> dt = np.dtype(object)  # Python object
 
+    .. note::
+
+        All other types map to ``object_`` for convenience. Code should expect
+        that such types may map to a specific (new) dtype in future the future.
+
 Types with ``.dtype``
 
     Any type object with a ``dtype`` attribute: The attribute will be
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index b22d6b85e..1292b738c 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -17,7 +17,8 @@ The preferred alias for `defchararray` is `numpy.char`.
 """
 import functools
 import sys
-from .numerictypes import string_, unicode_, integer, object_, bool_, character
+from .numerictypes import (
+    string_, unicode_, integer, int_, object_, bool_, character)
 from .numeric import ndarray, compare_chararrays
 from .numeric import array as narray
 from numpy.core.multiarray import _vec_string
@@ -276,7 +277,10 @@ def str_len(a):
     --------
     builtins.len
     """
-    return _vec_string(a, integer, '__len__')
+    # Note: __len__, etc. currently return ints, which are not C-integers.
+    # Generally intp would be expected for lengths, although int is sufficient
+    # due to the dtype itemsize limitation.
+    return _vec_string(a, int_, '__len__')
 
 
 @array_function_dispatch(_binary_op_dispatcher)
@@ -500,7 +504,7 @@ def count(a, sub, start=0, end=None):
     array([1, 0, 0])
 
     """
-    return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
+    return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
 
 
 def _code_dispatcher(a, encoding=None, errors=None):
@@ -710,7 +714,7 @@ def find(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'find', [sub, start] + _clean_args(end))
+        a, int_, 'find', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_count_dispatcher)
@@ -739,7 +743,7 @@ def index(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'index', [sub, start] + _clean_args(end))
+        a, int_, 'index', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_unary_op_dispatcher)
@@ -1199,7 +1203,7 @@ def rfind(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'rfind', [sub, start] + _clean_args(end))
+        a, int_, 'rfind', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_count_dispatcher)
@@ -1229,7 +1233,7 @@ def rindex(a, sub, start=0, end=None):
 
     """
     return _vec_string(
-        a, integer, 'rindex', [sub, start] + _clean_args(end))
+        a, int_, 'rindex', [sub, start] + _clean_args(end))
 
 
 @array_function_dispatch(_just_dispatcher)
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 215c8b0ab..b26a26abf 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -1399,14 +1399,25 @@ _convert_from_type(PyObject *obj) {
         return PyArray_DescrFromType(NPY_BOOL);
     }
     else if (typ == &PyBytes_Type) {
+        /*
+         * TODO: This should be deprecated, and have special handling for
+         *       dtype=bytes/"S" in coercion: It should not rely on "S0".
+         */
         return PyArray_DescrFromType(NPY_STRING);
     }
     else if (typ == &PyUnicode_Type) {
+        /*
+         * TODO: This should be deprecated, and have special handling for
+         *       dtype=str/"U" in coercion: It should not rely on "U0".
+         */
         return PyArray_DescrFromType(NPY_UNICODE);
     }
     else if (typ == &PyMemoryView_Type) {
         return PyArray_DescrFromType(NPY_VOID);
     }
+    else if (typ == &PyBaseObject_Type) {
+        return PyArray_DescrFromType(NPY_OBJECT);
+    }
     else {
         PyArray_Descr *ret = _try_convert_from_dtype_attr(obj);
         if ((PyObject *)ret != Py_NotImplemented) {
@@ -1425,7 +1436,13 @@ _convert_from_type(PyObject *obj) {
         }
         Py_DECREF(ret);
 
-        /* All other classes are treated as object */
+        /*
+         * All other classes are treated as object. This can be convenient
+         * to convey an intention of using it for a specific python type
+         * and possibly allow converting to a new type-specific dtype in the future. It may make sense to
+         * only allow this only within `dtype=...` keyword argument context
+         * in the future.
+         */
         return PyArray_DescrFromType(NPY_OBJECT);
     }
 }
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index 6d3276e18..8a7139fb2 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -433,23 +433,59 @@ PyArray_DescrFromTypeObject(PyObject *type)
     if ((type == (PyObject *) &PyNumberArrType_Type) ||
             (type == (PyObject *) &PyInexactArrType_Type) ||
             (type == (PyObject *) &PyFloatingArrType_Type)) {
+        if (DEPRECATE("Converting `np.inexact` or `np.floating` to "
+                      "a dtype is deprecated. The current result is `float64` "
+                      "which is not strictly correct.") < 0) {
+            return NULL;
+        }
         typenum = NPY_DOUBLE;
     }
     else if (type == (PyObject *)&PyComplexFloatingArrType_Type) {
+        if (DEPRECATE("Converting `np.complex` to a dtype is deprecated. "
+                      "The current result is `complex128` which is not "
+                      "strictly correct.") < 0) {
+            return NULL;
+        }
         typenum = NPY_CDOUBLE;
     }
     else if ((type == (PyObject *)&PyIntegerArrType_Type) ||
             (type == (PyObject *)&PySignedIntegerArrType_Type)) {
+        if (DEPRECATE("Converting `np.integer` or `np.signedinteger` to "
+                      "a dtype is deprecated. The current result is "
+                      "`np.dtype(np.int_)` which is not strictly correct. "
+                      "Note that the result depends on the system. To ensure "
+                      "stable results use may want to use `np.int64` or "
+                      "`np.int32`.") < 0) {
+            return NULL;
+        }
         typenum = NPY_LONG;
     }
     else if (type == (PyObject *) &PyUnsignedIntegerArrType_Type) {
+        if (DEPRECATE("Converting `np.unsignedinteger` to a dtype is "
+                      "deprecated. The current result is `np.dtype(np.uint)` "
+                      "which is not strictly correct. Note that the result "
+                      "depends on the system. To ensure stable results you may "
+                      "want to use `np.uint64` or `np.uint32`.") < 0) {
+            return NULL;
+        }
         typenum = NPY_ULONG;
     }
     else if (type == (PyObject *) &PyCharacterArrType_Type) {
+        if (DEPRECATE("Converting `np.character` to a dtype is deprecated. "
+                      "The current result is `np.dtype(np.str_)` "
+                      "which is not strictly correct. Note that `np.character` "
+                      "is generally deprecated and 'S1' should be used.") < 0) {
+            return NULL;
+        }
         typenum = NPY_STRING;
     }
     else if ((type == (PyObject *) &PyGenericArrType_Type) ||
             (type == (PyObject *) &PyFlexibleArrType_Type)) {
+        if (DEPRECATE("Converting `np.generic` to a dtype is "
+                      "deprecated. The current result is `np.dtype(np.void)` "
+                      "which is not strictly correct.") < 0) {
+            return NULL;
+        }
         typenum = NPY_VOID;
     }
 
@@ -559,6 +595,9 @@ PyArray_DescrFromScalar(PyObject *sc)
     }
 
     descr = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(sc));
+    if (descr == NULL) {
+        return NULL;
+    }
     if (PyDataType_ISUNSIZED(descr)) {
         PyArray_DESCR_REPLACE(descr);
         type_num = descr->type_num;
diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py
index 39600553d..bbb94f7d3 100644
--- a/numpy/core/tests/test_defchararray.py
+++ b/numpy/core/tests/test_defchararray.py
@@ -119,14 +119,14 @@ class TestVecString:
     def test_invalid_result_type(self):
 
         def fail():
-            _vec_string(['a'], np.integer, 'strip')
+            _vec_string(['a'], np.int_, 'strip')
 
         assert_raises(TypeError, fail)
 
     def test_broadcast_error(self):
 
         def fail():
-            _vec_string([['abc', 'def']], np.integer, 'find', (['a', 'd', 'j'],))
+            _vec_string([['abc', 'def']], np.int_, 'find', (['a', 'd', 'j'],))
 
         assert_raises(ValueError, fail)
 
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 01b35ec90..d2cf315a9 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -547,3 +547,33 @@ def test_deprecate_ragged_arrays():
     with assert_warns(np.VisibleDeprecationWarning):
         np.array(arg)
 
+
+class TestDTypeCoercion(_DeprecationTestCase):
+    # 2020-02-06 1.19.0
+    message = "Converting .* to a dtype .*is deprecated"
+    deprecated_types = [
+        # The builtin scalar super types:
+        np.generic, np.flexible, np.number,
+        np.inexact, np.floating, np.complexfloating,
+        np.integer, np.unsignedinteger, np.signedinteger,
+        # character is a deprecated S1 special case:
+        np.character,
+    ]
+
+    def test_dtype_coercion(self):
+        for scalar_type in self.deprecated_types:
+            self.assert_deprecated(np.dtype, args=(scalar_type,))
+
+    def test_array_construction(self):
+        for scalar_type in self.deprecated_types:
+            self.assert_deprecated(np.array, args=([], scalar_type,))
+
+    def test_not_deprecated(self):
+        # All specific types are not deprecated:
+        for group in np.sctypes.values():
+            for scalar_type in group:
+                self.assert_not_deprecated(np.dtype, args=(scalar_type,))
+
+        for scalar_type in [type, dict, list, tuple]:
+            # Typical python types are coerced to object currently:
+            self.assert_not_deprecated(np.dtype, args=(scalar_type,))
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 251d2d2a7..48d130bac 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -504,18 +504,23 @@ class StringConverter:
     """
     #
     _mapper = [(nx.bool_, str2bool, False),
-               (nx.integer, int, -1)]
+               (nx.int_, int, -1),]
 
     # On 32-bit systems, we need to make sure that we explicitly include
-    # nx.int64 since ns.integer is nx.int32.
-    if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize:
+    # nx.int64 since ns.int_ is nx.int32.
+    if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize:
         _mapper.append((nx.int64, int, -1))
 
-    _mapper.extend([(nx.floating, float, nx.nan),
-                    (nx.complexfloating, complex, nx.nan + 0j),
+    _mapper.extend([(nx.float64, float, nx.nan),
+                    (nx.complex128, complex, nx.nan + 0j),
                     (nx.longdouble, nx.longdouble, nx.nan),
                     (nx.unicode_, asunicode, '???'),
-                    (nx.string_, asbytes, '???')])
+                    (nx.string_, asbytes, '???'),
+                    # If a non-default dtype is passed, fall back to generic
+                    # ones (should only be used for the converter)
+                    (nx.integer, int, -1),
+                    (nx.floating, float, nx.nan),
+                    (nx.complexfloating, complex, nx.nan + 0j),])
 
     (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
 
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 83fbd8bcc..751a7a212 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1899,7 +1899,7 @@ class TestCov:
     frequencies = np.array([1, 4, 1])
     x2_repeats = np.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T
     res2 = np.array([[0.4, -0.4], [-0.4, 0.4]])
-    unit_frequencies = np.ones(3, dtype=np.integer)
+    unit_frequencies = np.ones(3, dtype=np.int_)
     weights = np.array([1.0, 4.0, 1.0])
     res3 = np.array([[2. / 3., -2. / 3.], [-2. / 3., 2. / 3.]])
     unit_weights = np.ones(3)
@@ -1952,11 +1952,11 @@ class TestCov:
                         self.res1)
         nonint = self.frequencies + 0.5
         assert_raises(TypeError, cov, self.x1, fweights=nonint)
-        f = np.ones((2, 3), dtype=np.integer)
+        f = np.ones((2, 3), dtype=np.int_)
         assert_raises(RuntimeError, cov, self.x1, fweights=f)
-        f = np.ones(2, dtype=np.integer)
+        f = np.ones(2, dtype=np.int_)
         assert_raises(RuntimeError, cov, self.x1, fweights=f)
-        f = -1 * np.ones(3, dtype=np.integer)
+        f = -1 * np.ones(3, dtype=np.int_)
         assert_raises(ValueError, cov, self.x1, fweights=f)
 
     def test_aweights(self):
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 436cd1d24..db9f35f2a 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -2332,7 +2332,7 @@ M   33  21.99
 
         assert_(test.dtype['f0'] == float)
         assert_(test.dtype['f1'] == np.int64)
-        assert_(test.dtype['f2'] == np.integer)
+        assert_(test.dtype['f2'] == np.int_)
 
         assert_allclose(test['f0'], 73786976294838206464.)
         assert_equal(test['f1'], 17179869184)
author	Matti Picus <matti.picus@gmail.com>	2020-03-06 00:23:43 +0200
committer	GitHub <noreply@github.com>	2020-03-06 00:23:43 +0200
commit	ff4cfe7ecd46ee15fd88297964f6a5cb5423c291 (patch)
tree	ab318ae312db68d4329e23ade0457a9f026dbb4c
parent	901211eddd5e17c8c7c85bc5f791679559d4efb5 (diff)
parent	a5a653d25e7329b9366be6f44b052d41bf297b0f (diff)
download	numpy-ff4cfe7ecd46ee15fd88297964f6a5cb5423c291.tar.gz