diff options
-rw-r--r-- | doc/release/1.8.0-notes.rst | 13 | ||||
-rw-r--r-- | numpy/core/src/multiarray/scalartypes.c.src | 31 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 16 |
3 files changed, 55 insertions, 5 deletions
diff --git a/doc/release/1.8.0-notes.rst b/doc/release/1.8.0-notes.rst index 288b68865..3422622b3 100644 --- a/doc/release/1.8.0-notes.rst +++ b/doc/release/1.8.0-notes.rst @@ -15,6 +15,12 @@ a view onto the original array, instead of producing a copy. selecting multiple fields out of an array also produces a view. +The hash function of numpy.void scalars has been changed. +Previously the pointer to the data was hashed as an integer. +Now, the hash function uses the tuple-hash algorithm to combine +the hash functions of the elements of the scalar, but only if +the scalar is read-only. + New features ============ @@ -25,9 +31,10 @@ Preliminary support for NA missing values similar to those in R has been implemented. This was done by adding an NA mask to an array sub-type .. note:: The NA API is *experimental*, and may undergo changes in future - versions of NumPy. The current implementation based on masks will likely be - supplemented by a second one based on bit-patterns, and it is possible that - a difference will be made between missing and ignored data. + versions of NumPy. The current implementation based on masks will + likely be supplemented by a second one based on bit-patterns, and it + is possible that a difference will be made between missing and + ignored data. While a significant amount of the NumPy functionality has been extended to support NA masks, not everything is yet supported. Here is an (incomplete) diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 12745d720..8f39b57c4 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -3070,11 +3070,38 @@ object_arrtype_hash(PyObject *obj) return PyObject_Hash(((PyObjectScalarObject *)obj)->obval); } -/* just hash the pointer */ +/* we used to just hash the pointer */ +/* now use tuplehash algorithm using voidtype_item to get the object +*/ static long void_arrtype_hash(PyObject *obj) { - return _Py_HashPointer((void *)(((PyVoidScalarObject *)obj)->obval)); + register long x, y; + register Py_ssize_t len, n; + register PyVoidScalarObject *p; + register PyObject *element; + long mult = 1000003L; + x = 0x345678L; + p = (PyVoidScalarObject *)obj; + /* Cannot hash mutable void scalars */ + if (p->flags & NPY_ARRAY_WRITEABLE) { + PyErr_SetString(PyExc_TypeError, "unhashable type: 'writeable void-scalar'"); + return -1; + } + len = voidtype_length(p); + for (n=0; n < len; n++) { + element = voidtype_item(p, n); + y = PyObject_Hash(element); + Py_DECREF(element); + if (y == -1) + return -1; + x = (x ^ y) * mult; + mult += (long)(82520L + len + len); + } + x += 97531L; + if (x == -1) + x = -2; + return x; } /*object arrtype getattro and setattro */ diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index b6f05237d..29e28ff26 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -2019,6 +2019,22 @@ class TestRecord(TestCase): assert_equal(collect_warning_types(subset['f1'].__setitem__, 0, 10), []) + def test_record_hash(self): + a = np.array([(1,2),(1,2)], dtype='i1,i2') + a.flags.writeable = False + b = np.array([(1,2),(3,4)], dtype=[('num1', 'i1'), ('num2', 'i2')]) + b.flags.writeable = False + c = np.array([(1,2),(3,4)], dtype='i1,i2') + c.flags.writeable = False + self.assertTrue(hash(a[0]) == hash(a[1])) + self.assertTrue(hash(a[0]) == hash(b[0])) + self.assertTrue(hash(a[0]) != hash(b[1])) + self.assertTrue(hash(c[0]) == hash(a[0]) and c[0] == a[0]) + + def test_record_no_hash(self): + a = np.array([(1,2),(1,2)], dtype='i1,i2') + self.assertRaises(TypeError, hash, a[0]) + class TestView(TestCase): def test_basic(self): x = np.array([(1,2,3,4),(5,6,7,8)],dtype=[('r',np.int8),('g',np.int8), |