summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.8.0-notes.rst13
-rw-r--r--numpy/core/src/multiarray/scalartypes.c.src31
-rw-r--r--numpy/core/tests/test_multiarray.py16
3 files changed, 55 insertions, 5 deletions
diff --git a/doc/release/1.8.0-notes.rst b/doc/release/1.8.0-notes.rst
index 288b68865..3422622b3 100644
--- a/doc/release/1.8.0-notes.rst
+++ b/doc/release/1.8.0-notes.rst
@@ -15,6 +15,12 @@ a view onto the original array, instead of producing a copy.
selecting multiple fields out of an array also produces a view.
+The hash function of numpy.void scalars has been changed.
+Previously the pointer to the data was hashed as an integer.
+Now, the hash function uses the tuple-hash algorithm to combine
+the hash functions of the elements of the scalar, but only if
+the scalar is read-only.
+
New features
============
@@ -25,9 +31,10 @@ Preliminary support for NA missing values similar to those in R has
been implemented. This was done by adding an NA mask to an array sub-type
.. note:: The NA API is *experimental*, and may undergo changes in future
- versions of NumPy. The current implementation based on masks will likely be
- supplemented by a second one based on bit-patterns, and it is possible that
- a difference will be made between missing and ignored data.
+ versions of NumPy. The current implementation based on masks will
+ likely be supplemented by a second one based on bit-patterns, and it
+ is possible that a difference will be made between missing and
+ ignored data.
While a significant amount of the NumPy functionality has been extended to
support NA masks, not everything is yet supported. Here is an (incomplete)
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 12745d720..8f39b57c4 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -3070,11 +3070,38 @@ object_arrtype_hash(PyObject *obj)
return PyObject_Hash(((PyObjectScalarObject *)obj)->obval);
}
-/* just hash the pointer */
+/* we used to just hash the pointer */
+/* now use tuplehash algorithm using voidtype_item to get the object
+*/
static long
void_arrtype_hash(PyObject *obj)
{
- return _Py_HashPointer((void *)(((PyVoidScalarObject *)obj)->obval));
+ register long x, y;
+ register Py_ssize_t len, n;
+ register PyVoidScalarObject *p;
+ register PyObject *element;
+ long mult = 1000003L;
+ x = 0x345678L;
+ p = (PyVoidScalarObject *)obj;
+ /* Cannot hash mutable void scalars */
+ if (p->flags & NPY_ARRAY_WRITEABLE) {
+ PyErr_SetString(PyExc_TypeError, "unhashable type: 'writeable void-scalar'");
+ return -1;
+ }
+ len = voidtype_length(p);
+ for (n=0; n < len; n++) {
+ element = voidtype_item(p, n);
+ y = PyObject_Hash(element);
+ Py_DECREF(element);
+ if (y == -1)
+ return -1;
+ x = (x ^ y) * mult;
+ mult += (long)(82520L + len + len);
+ }
+ x += 97531L;
+ if (x == -1)
+ x = -2;
+ return x;
}
/*object arrtype getattro and setattro */
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index b6f05237d..29e28ff26 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -2019,6 +2019,22 @@ class TestRecord(TestCase):
assert_equal(collect_warning_types(subset['f1'].__setitem__, 0, 10),
[])
+ def test_record_hash(self):
+ a = np.array([(1,2),(1,2)], dtype='i1,i2')
+ a.flags.writeable = False
+ b = np.array([(1,2),(3,4)], dtype=[('num1', 'i1'), ('num2', 'i2')])
+ b.flags.writeable = False
+ c = np.array([(1,2),(3,4)], dtype='i1,i2')
+ c.flags.writeable = False
+ self.assertTrue(hash(a[0]) == hash(a[1]))
+ self.assertTrue(hash(a[0]) == hash(b[0]))
+ self.assertTrue(hash(a[0]) != hash(b[1]))
+ self.assertTrue(hash(c[0]) == hash(a[0]) and c[0] == a[0])
+
+ def test_record_no_hash(self):
+ a = np.array([(1,2),(1,2)], dtype='i1,i2')
+ self.assertRaises(TypeError, hash, a[0])
+
class TestView(TestCase):
def test_basic(self):
x = np.array([(1,2,3,4),(5,6,7,8)],dtype=[('r',np.int8),('g',np.int8),