From 5657a6cd5ed1c54986f697660c6336d2fb2b1c21 Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Thu, 9 Jun 2016 13:11:53 +0100 Subject: BUG: Fix segfaults in np.random.shuffle np.random.shuffle will allocate a buffer based on the size of the first element of an array of strings. If the first element is smaller than another in the array this buffer will overflow, causing a segfault when garbage is collected. Additionally if the array contains objects then one would be left in the buffer and have it's refcount erroniously decrimented on function exit, causing that object to be deallocated too early. To fix this we change the buffer to be an array of int8 of the the size of the array's dtype, which sidesteps both issues. Fixes #7710 --- numpy/random/mtrand/mtrand.pyx | 6 +++++- numpy/random/tests/test_regression.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx index f01aa6931..44ae956c8 100644 --- a/numpy/random/mtrand/mtrand.pyx +++ b/numpy/random/mtrand/mtrand.pyx @@ -5064,7 +5064,11 @@ cdef class RandomState: x_ptr = x.ctypes.data stride = x.strides[0] itemsize = x.dtype.itemsize - buf = np.empty_like(x[0]) # GC'd at function exit + # As the array x could contain python objects we use a buffer + # of bytes for the swaps to avoid leaving one of the objects + # within the buffer and erroneously decrementing it's refcount + # when the function exits. + buf = np.empty(itemsize, dtype=np.int8) # GC'd at function exit buf_ptr = buf.ctypes.data with self.lock: # We trick gcc into providing a specialized implementation for diff --git a/numpy/random/tests/test_regression.py b/numpy/random/tests/test_regression.py index 133a1aa5a..b50b6b260 100644 --- a/numpy/random/tests/test_regression.py +++ b/numpy/random/tests/test_regression.py @@ -113,5 +113,34 @@ class TestRegression(TestCase): assert_(c in a) assert_raises(ValueError, np.random.choice, a, p=probs*0.9) + def test_shuffle_of_array_of_different_length_strings(self): + # Test that permuting an array of different length strings + # will not cause a segfault on garbage collection + # Tests gh-7710 + np.random.seed(1234) + + a = np.array(['a', 'a' * 1000]) + + for _ in range(100): + np.random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + + def test_shuffle_of_array_of_objects(self): + # Test that permuting an array of objects will not cause + # a segfault on garbage collection. + # See gh-7719 + np.random.seed(1234) + a = np.array([np.arange(1), np.arange(4)]) + + for _ in range(1000): + np.random.shuffle(a) + + # Force Garbage Collection - should not segfault. + import gc + gc.collect() + if __name__ == "__main__": run_module_suite() -- cgit v1.2.1