Merge pull request #6562 from ahaldane/disable_view_safety_checks

Disable view safety checks
author: Charles Harris <charlesr.harris@gmail.com> 2015-10-26 21:26:57 -0600
committer: Charles Harris <charlesr.harris@gmail.com> 2015-10-26 21:26:57 -0600
commit: 522a0f749d1ac8d0acece68f9c65d46e96529407 (patch)
tree: 8218e17e6ecfc29ff64343c2ad7a91e37fa1075c /numpy/core
parent: 2962abccf92d8c4cf5552de2add81a460fd31c8a (diff)
parent: 796a5f87c11fdc3a345fca5448b27a02856c2e4d (diff)
download: numpy-522a0f749d1ac8d0acece68f9c65d46e96529407.tar.gz
2 files changed, 13 insertions, 207 deletions
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 3ddc2c64d..47c933411 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -306,94 +306,6 @@ def _copy_fields(ary):
                   'formats': [dt.fields[name][0] for name in dt.names]}
     return array(ary, dtype=copy_dtype, copy=True)
 
-def _get_all_field_offsets(dtype, base_offset=0):
-    """ Returns the types and offsets of all fields in a (possibly structured)
-    data type, including nested fields and subarrays.
-
-    Parameters
-    ----------
-    dtype : data-type
-        Data type to extract fields from.
-    base_offset : int, optional
-        Additional offset to add to all field offsets.
-
-    Returns
-    -------
-    fields : list of (data-type, int) pairs
-        A flat list of (dtype, byte offset) pairs.
-
-    """
-    fields = []
-    if dtype.fields is not None:
-        for name in dtype.names:
-            sub_dtype = dtype.fields[name][0]
-            sub_offset = dtype.fields[name][1] + base_offset
-            fields.extend(_get_all_field_offsets(sub_dtype, sub_offset))
-    else:
-        if dtype.shape:
-            sub_offsets = _get_all_field_offsets(dtype.base, base_offset)
-            count = 1
-            for dim in dtype.shape:
-                count *= dim
-            fields.extend((typ, off + dtype.base.itemsize*j)
-                           for j in range(count) for (typ, off) in sub_offsets)
-        else:
-            fields.append((dtype, base_offset))
-    return fields
-
-def _check_field_overlap(new_fields, old_fields):
-    """ Perform object memory overlap tests for two data-types (see
-    _view_is_safe).
-
-    This function checks that new fields only access memory contained in old
-    fields, and that non-object fields are not interpreted as objects and vice
-    versa.
-
-    Parameters
-    ----------
-    new_fields : list of (data-type, int) pairs
-        Flat list of (dtype, byte offset) pairs for the new data type, as
-        returned by _get_all_field_offsets.
-    old_fields: list of (data-type, int) pairs
-        Flat list of (dtype, byte offset) pairs for the old data type, as
-        returned by _get_all_field_offsets.
-
-    Raises
-    ------
-    TypeError
-        If the new fields are incompatible with the old fields
-
-    """
-
-    #first go byte by byte and check we do not access bytes not in old_fields
-    new_bytes = set()
-    for tp, off in new_fields:
-        new_bytes.update(set(range(off, off+tp.itemsize)))
-    old_bytes = set()
-    for tp, off in old_fields:
-        old_bytes.update(set(range(off, off+tp.itemsize)))
-    if new_bytes.difference(old_bytes):
-        raise TypeError("view would access data parent array doesn't own")
-
-    #next check that we do not interpret non-Objects as Objects, and vv
-    obj_offsets = [off for (tp, off) in old_fields if tp.type is object_]
-    obj_size = dtype(object_).itemsize
-
-    for fld_dtype, fld_offset in new_fields:
-        if fld_dtype.type is object_:
-            # check we do not create object views where
-            # there are no objects.
-            if fld_offset not in obj_offsets:
-                raise TypeError("cannot view non-Object data as Object type")
-        else:
-            # next check we do not create non-object views
-            # where there are already objects.
-            # see validate_object_field_overlap for a similar computation.
-            for obj_offset in obj_offsets:
-                if (fld_offset < obj_offset + obj_size and
-                        obj_offset < fld_offset + fld_dtype.itemsize):
-                    raise TypeError("cannot view Object as non-Object type")
-
 def _getfield_is_safe(oldtype, newtype, offset):
     """ Checks safety of getfield for object arrays.
 
@@ -415,10 +327,16 @@ def _getfield_is_safe(oldtype, newtype, offset):
         If the field access is invalid
 
     """
-    new_fields = _get_all_field_offsets(newtype, offset)
-    old_fields = _get_all_field_offsets(oldtype)
-    # raises if there is a problem
-    _check_field_overlap(new_fields, old_fields)
+    if newtype.hasobject or oldtype.hasobject:
+        if offset == 0 and newtype == oldtype:
+            return
+        if oldtype.names:
+            for name in oldtype.names:
+                if (oldtype.fields[name][1] == offset and
+                        oldtype.fields[name][0] == newtype):
+                    return
+        raise TypeError("Cannot get/set field of an object array")
+    return
 
 def _view_is_safe(oldtype, newtype):
     """ Checks safety of a view involving object arrays, for example when
@@ -426,13 +344,6 @@ def _view_is_safe(oldtype, newtype):
 
         np.zeros(10, dtype=oldtype).view(newtype)
 
-    We need to check that
-    1) No memory that is not an object will be interpreted as a object,
-    2) No memory containing an object will be interpreted as an arbitrary type.
-    Both cases can cause segfaults, eg in the case the view is written to.
-    Strategy here is to also disallow views where newtype has any field in a
-    place oldtype doesn't.
-
     Parameters
     ----------
     oldtype : data-type
@@ -452,31 +363,9 @@ def _view_is_safe(oldtype, newtype):
     if oldtype == newtype:
         return
 
-    new_fields = _get_all_field_offsets(newtype)
-    new_size = newtype.itemsize
-
-    old_fields = _get_all_field_offsets(oldtype)
-    old_size = oldtype.itemsize
-
-    # if the itemsizes are not equal, we need to check that all the
-    # 'tiled positions' of the object match up. Here, we allow
-    # for arbirary itemsizes (even those possibly disallowed
-    # due to stride/data length issues).
-    if old_size == new_size:
-        new_num = old_num = 1
-    else:
-        gcd_new_old = _gcd(new_size, old_size)
-        new_num = old_size // gcd_new_old
-        old_num = new_size // gcd_new_old
-
-    # get position of fields within the tiling
-    new_fieldtile = [(tp, off + new_size*j)
-                     for j in range(new_num) for (tp, off) in new_fields]
-    old_fieldtile = [(tp, off + old_size*j)
-                     for j in range(old_num) for (tp, off) in old_fields]
-
-    # raises if there is a problem
-    _check_field_overlap(new_fieldtile, old_fieldtile)
+    if newtype.hasobject or oldtype.hasobject:
+        raise TypeError("Cannot change data-type for object array.")
+    return
 
 # Given a string containing a PEP 3118 format specifier,
 # construct a Numpy dtype
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 41b54a18b..15dd9302c 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -5845,89 +5845,6 @@ class TestHashing(TestCase):
         x = np.array([])
         self.assertFalse(isinstance(x, collections.Hashable))
 
-from numpy.core._internal import _view_is_safe
-
-class TestObjViewSafetyFuncs(TestCase):
-    def test_view_safety(self):
-        psize = np.dtype('p').itemsize
-
-        # creates dtype but with extra character code - for missing 'p' fields
-        def mtype(s):
-            n, offset, fields = 0, 0, []
-            for c in s.split(','):  # subarrays won't work
-                if c != '-':
-                    fields.append(('f{0}'.format(n), c, offset))
-                    n += 1
-                offset += np.dtype(c).itemsize if c != '-' else psize
-
-            names, formats, offsets = zip(*fields)
-            return np.dtype({'names': names, 'formats': formats,
-                          'offsets': offsets, 'itemsize': offset})
-
-        # test nonequal itemsizes with objects:
-        # these should succeed:
-        _view_is_safe(np.dtype('O,p,O,p'), np.dtype('O,p,O,p,O,p'))
-        _view_is_safe(np.dtype('O,O'), np.dtype('O,O,O'))
-        # these should fail:
-        assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('O,O'))
-        assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('O,p'))
-        assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('p,O'))
-
-        # test nonequal itemsizes with missing fields:
-        # these should succeed:
-        _view_is_safe(mtype('-,p,-,p'), mtype('-,p,-,p,-,p'))
-        _view_is_safe(np.dtype('p,p'), np.dtype('p,p,p'))
-        # these should fail:
-        assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('p,p'))
-        assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('p,-'))
-        assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('-,p'))
-
-        # scans through positions at which we can view a type
-        def scanView(d1, otype):
-            goodpos = []
-            for shift in range(d1.itemsize - np.dtype(otype).itemsize+1):
-                d2 = np.dtype({'names': ['f0'], 'formats': [otype],
-                            'offsets': [shift], 'itemsize': d1.itemsize})
-                try:
-                    _view_is_safe(d1, d2)
-                except TypeError:
-                    pass
-                else:
-                    goodpos.append(shift)
-            return goodpos
-
-        # test partial overlap with object field
-        assert_equal(scanView(np.dtype('p,O,p,p,O,O'), 'p'),
-                     [0] + list(range(2*psize, 3*psize+1)))
-        assert_equal(scanView(np.dtype('p,O,p,p,O,O'), 'O'),
-                     [psize, 4*psize, 5*psize])
-
-        # test partial overlap with missing field
-        assert_equal(scanView(mtype('p,-,p,p,-,-'), 'p'),
-                     [0] + list(range(2*psize, 3*psize+1)))
-
-        # test nested structures with objects:
-        nestedO = np.dtype([('f0', 'p'), ('f1', 'p,O,p')])
-        assert_equal(scanView(nestedO, 'p'), list(range(psize+1)) + [3*psize])
-        assert_equal(scanView(nestedO, 'O'), [2*psize])
-
-        # test nested structures with missing fields:
-        nestedM = np.dtype([('f0', 'p'), ('f1', mtype('p,-,p'))])
-        assert_equal(scanView(nestedM, 'p'), list(range(psize+1)) + [3*psize])
-
-        # test subarrays with objects
-        subarrayO = np.dtype('p,(2,3)O,p')
-        assert_equal(scanView(subarrayO, 'p'), [0, 7*psize])
-        assert_equal(scanView(subarrayO, 'O'),
-                     list(range(psize, 6*psize+1, psize)))
-
-        #test dtype with overlapping fields
-        overlapped = np.dtype({'names': ['f0', 'f1', 'f2', 'f3'],
-                            'formats': ['p', 'p', 'p', 'p'],
-                            'offsets': [0, 1, 3*psize-1, 3*psize],
-                            'itemsize': 4*psize})
-        assert_equal(scanView(overlapped, 'p'), [0, 1, 3*psize-1, 3*psize])
-
 
 class TestArrayPriority(TestCase):
     # This will go away when __array_priority__ is settled, meanwhile
author	Charles Harris <charlesr.harris@gmail.com>	2015-10-26 21:26:57 -0600
committer	Charles Harris <charlesr.harris@gmail.com>	2015-10-26 21:26:57 -0600
commit	522a0f749d1ac8d0acece68f9c65d46e96529407 (patch)
tree	8218e17e6ecfc29ff64343c2ad7a91e37fa1075c /numpy/core
parent	2962abccf92d8c4cf5552de2add81a460fd31c8a (diff)
parent	796a5f87c11fdc3a345fca5448b27a02856c2e4d (diff)
download	numpy-522a0f749d1ac8d0acece68f9c65d46e96529407.tar.gz