BUG,MAINT: Never use negative offset and tighten promote-types

Negative offsets in structured dtypes give indirect access outside of the original data in principle, thus we should always reject them (this is as of now practically unused in NumPy). Further, `np.result_type` and `np.promote_types` should behave the same as canonicalization for twice the same input (at least mostly, metadata is a rare exception). This was not the case, because promote-types had a fast path that would return the wrong thing for structured dtypes. Thus, reject them from the fast-path, and limit it to builtin types for now.
author: Sebastian Berg <sebastian@sipsolutions.net> 2022-02-21 16:18:54 -0600
committer: Sebastian Berg <sebastian@sipsolutions.net> 2022-05-09 17:13:13 +0200
commit: 9de1ca82f835ce41c027af0f68ecdf4015809cb6 (patch)
tree: fec59614af178fd71e7773699b7663d176eb90f4 /numpy
parent: f376d23acc05dcebc4eeb501073b5e2a50f0af4f (diff)
download: numpy-9de1ca82f835ce41c027af0f68ecdf4015809cb6.tar.gz
5 files changed, 42 insertions, 32 deletions
diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c
index a56781527..b8a190f83 100644
--- a/numpy/core/src/multiarray/array_method.c
+++ b/numpy/core/src/multiarray/array_method.c
@@ -588,7 +588,7 @@ boundarraymethod__resolve_descripors(
         return NULL;
     }
     else if (casting < 0) {
-        return Py_BuildValue("iO", casting, Py_None, Py_None);
+        return Py_BuildValue("iOO", casting, Py_None, Py_None);
     }
 
     PyObject *result_tuple = PyTuple_New(nin + nout);
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 0cbbc44e7..b01d87216 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -1396,8 +1396,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
          */
 
         if (PyArray_TYPE(self) == NPY_VOID) {
-            int _res;
-
             array_other = (PyArrayObject *)PyArray_FROM_O(other);
             /*
              * If not successful, indicate that the items cannot be compared
@@ -1430,8 +1428,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
          */
 
         if (PyArray_TYPE(self) == NPY_VOID) {
-            int _res;
-
             array_other = (PyArrayObject *)PyArray_FROM_O(other);
             /*
              * If not successful, indicate that the items cannot be compared
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index cf8a20db2..557bb913b 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -1074,7 +1074,10 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
     PyArray_Descr *res;
 
     /* Fast path for identical inputs (NOTE: This path preserves metadata!) */
-    if (type1 == type2 && PyArray_ISNBO(type1->byteorder)) {
+    if (type1 == type2
+            /* Use for builtin except void, void has no reliable byteorder */
+            && type1->type_num > 0 && type1->type_num < NPY_NTYPES
+            && PyArray_ISNBO(type1->byteorder) && type1->type_num != NPY_VOID) {
         Py_INCREF(type1);
         return type1;
     }
@@ -2973,7 +2976,7 @@ nonstructured_to_structured_resolve_descriptors(
                     *view_offset = field_view_off - to_off;
                 }
             }
-            if (PyTuple_Size(given_descrs[1]->names) != 1) {
+            if (PyTuple_Size(given_descrs[1]->names) != 1 || *view_offset < 0) {
                 /*
                  * Assume that a view is impossible when there is more than one
                  * field.  (Fields could overlap, but that seems weird...)
@@ -3359,23 +3362,19 @@ can_cast_fields_safety(
         }
     }
 
-    /*
-     * If the itemsize (includes padding at the end), does not match,
-     * this is not a "no" cast (identical dtypes) and may not be viewable.
-     */
-    if (from->elsize != to->elsize) {
-        /*
-         * The itemsize may mismatch even if all fields and formats match
-         * (due to additional padding).
-         */
+    if (*view_offset != 0 || from->elsize != to->elsize) {
+        /* Can never be considered "no" casting. */
         casting = PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
-        if (from->elsize < to->elsize) {
-            *view_offset = NPY_MIN_INTP;
-        }
     }
-    else if (*view_offset != 0) {
-        /* If the calculated `view_offset` is not 0, it can only be "equiv" */
-        casting = PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
+
+    /* The new dtype may have access outside the old one due to padding: */
+    if (*view_offset < 0) {
+        /* negative offsets would give indirect access before original dtype */
+        *view_offset = NPY_MIN_INTP;
+    }
+    if (from->elsize < to->elsize + *view_offset) {
+        /* new dtype has indirect access outside of the original dtype */
+        *view_offset = NPY_MIN_INTP;
     }
 
     return casting;
diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py
index a57e46fd0..5c5ff55b4 100644
--- a/numpy/core/tests/test_casting_unittests.py
+++ b/numpy/core/tests/test_casting_unittests.py
@@ -715,23 +715,25 @@ class TestCasting:
     @pytest.mark.parametrize(["to_dt", "expected_off"],
             [  # Same as `from_dt` but with both fields shifted:
              (np.dtype({"names": ["a", "b"], "formats": ["i4", "f4"],
-                        "offsets": [2, 6]}), -2),
+                        "offsets": [0, 4]}), 2),
              # Additional change of the names
-             # TODO: Tests will need changing for order vs. name based casting:
-             (np.dtype({"names": ["b", "a"], "formats": ["f4", "i4"],
-                        "offsets": [6, 2]}), -2),
-             # Incompatible field offset change (offsets -2 and 0)
-             (np.dtype({"names": ["b", "a"], "formats": ["f4", "i4"],
-                        "offsets": [6, 0]}), None)])
+             (np.dtype({"names": ["b", "a"], "formats": ["i4", "f4"],
+                        "offsets": [0, 4]}), 2),
+             # Incompatible field offset change
+             (np.dtype({"names": ["b", "a"], "formats": ["i4", "f4"],
+                        "offsets": [0, 6]}), None)])
     def test_structured_field_offsets(self, to_dt, expected_off):
         # This checks the cast-safety and view offset for swapped and "shifted"
         # fields which are viewable
         from_dt = np.dtype({"names": ["a", "b"],
                             "formats": ["i4", "f4"],
-                            "offsets": [0, 4]})
+                            "offsets": [2, 6]})
         cast = get_castingimpl(type(from_dt), type(to_dt))
         safety, _, view_off = cast._resolve_descriptors((from_dt, to_dt))
-        assert safety == Casting.equiv
+        if from_dt.names == to_dt.names:
+            assert safety == Casting.equiv
+        else:
+            assert safety == Casting.safe
         # Shifting the original data pointer by -2 will align both by
         # effectively adding 2 bytes of spacing before `from_dt`.
         assert view_off == expected_off
@@ -742,7 +744,9 @@ class TestCasting:
             ("(1,1)i", "i", 0),
             ("(2,1)i", "(2,1)i", 0),
             # field cases (field to field is tested explicitly also):
-            ("i", dict(names=["a"], formats=["i"], offsets=[2]), -2),
+            # Not considered viewable, because a negative offset would allow
+            # may structured dtype to indirectly access invalid memory.
+            ("i", dict(names=["a"], formats=["i"], offsets=[2]), None),
             (dict(names=["a"], formats=["i"], offsets=[2]), "i", 2),
             # Currently considered not viewable, due to multiple fields
             # even though they overlap (maybe we should not allow that?)
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index c98297a04..e91aeeb3f 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -1157,6 +1157,9 @@ class TestDTypeMakeCanonical:
     def test_make_canonical_hypothesis(self, dtype):
         canonical = np.result_type(dtype)
         self.check_canonical(dtype, canonical)
+        # np.result_type with two arguments should always identical results:
+        two_arg_result = np.result_type(dtype, dtype)
+        assert np.can_cast(two_arg_result, canonical, casting="no")
 
     @pytest.mark.slow
     @hypothesis.given(
@@ -1171,6 +1174,10 @@ class TestDTypeMakeCanonical:
         assert dtype_with_empty_space.itemsize == dtype.itemsize
         canonicalized = np.result_type(dtype_with_empty_space)
         self.check_canonical(dtype_with_empty_space, canonicalized)
+        # np.promote_types with two arguments should always identical results:
+        two_arg_result = np.promote_types(
+                dtype_with_empty_space, dtype_with_empty_space)
+        assert np.can_cast(two_arg_result, canonicalized, casting="no")
 
         # Ensure that we also check aligned struct (check the opposite, in
         # case hypothesis grows support for `align`.  Then repeat the test:
@@ -1179,6 +1186,10 @@ class TestDTypeMakeCanonical:
         assert dtype_with_empty_space.itemsize == dtype_aligned.itemsize
         canonicalized = np.result_type(dtype_with_empty_space)
         self.check_canonical(dtype_with_empty_space, canonicalized)
+        # np.promote_types with two arguments should always identical results:
+        two_arg_result = np.promote_types(
+            dtype_with_empty_space, dtype_with_empty_space)
+        assert np.can_cast(two_arg_result, canonicalized, casting="no")
 
 
 class TestPickling:
author	Sebastian Berg <sebastian@sipsolutions.net>	2022-02-21 16:18:54 -0600
committer	Sebastian Berg <sebastian@sipsolutions.net>	2022-05-09 17:13:13 +0200
commit	9de1ca82f835ce41c027af0f68ecdf4015809cb6 (patch)
tree	fec59614af178fd71e7773699b7663d176eb90f4 /numpy
parent	f376d23acc05dcebc4eeb501073b5e2a50f0af4f (diff)
download	numpy-9de1ca82f835ce41c027af0f68ecdf4015809cb6.tar.gz