diff options
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r-- | Objects/setobject.c | 265 |
1 files changed, 155 insertions, 110 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c index d962c1e44a..d0fb4f14f4 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -29,7 +29,6 @@ #include "Python.h" #include "structmember.h" -#include "stringlib/eq.h" /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; @@ -52,9 +51,8 @@ static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { setentry *table = so->table; - setentry *freeslot = NULL; setentry *entry; - size_t perturb = hash; + size_t perturb; size_t mask = so->mask; size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */ size_t j; @@ -64,6 +62,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == NULL) return entry; + perturb = hash; + while (1) { if (entry->hash == hash) { PyObject *startkey = entry->key; @@ -73,7 +73,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && unicode_eq(startkey, key)) + && _PyUnicode_EQ(startkey, key)) return entry; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); @@ -86,14 +86,12 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; mask = so->mask; /* help avoid a register spill */ } - if (entry->hash == -1 && freeslot == NULL) - freeslot = entry; if (i + LINEAR_PROBES <= mask) { for (j = 0 ; j < LINEAR_PROBES ; j++) { entry++; - if (entry->key == NULL) - goto found_null; + if (entry->hash == 0 && entry->key == NULL) + return entry; if (entry->hash == hash) { PyObject *startkey = entry->key; assert(startkey != dummy); @@ -101,7 +99,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && unicode_eq(startkey, key)) + && _PyUnicode_EQ(startkey, key)) return entry; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); @@ -114,7 +112,90 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; mask = so->mask; } - if (entry->hash == -1 && freeslot == NULL) + } + } + + perturb >>= PERTURB_SHIFT; + i = (i * 5 + 1 + perturb) & mask; + + entry = &table[i]; + if (entry->key == NULL) + return entry; + } +} + +static int set_table_resize(PySetObject *, Py_ssize_t); + +static int +set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) +{ + setentry *table = so->table; + setentry *freeslot; + setentry *entry; + size_t perturb; + size_t mask = so->mask; + size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */ + size_t j; + int cmp; + + entry = &table[i]; + if (entry->key == NULL) + goto found_unused; + + freeslot = NULL; + perturb = hash; + + while (1) { + if (entry->hash == hash) { + PyObject *startkey = entry->key; + /* startkey cannot be a dummy because the dummy hash field is -1 */ + assert(startkey != dummy); + if (startkey == key) + goto found_active; + if (PyUnicode_CheckExact(startkey) + && PyUnicode_CheckExact(key) + && _PyUnicode_EQ(startkey, key)) + goto found_active; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) /* unlikely */ + return -1; + if (table != so->table || entry->key != startkey) /* unlikely */ + return set_insert_key(so, key, hash); + if (cmp > 0) /* likely */ + goto found_active; + mask = so->mask; /* help avoid a register spill */ + } + if (entry->hash == -1 && freeslot == NULL) + freeslot = entry; + + if (i + LINEAR_PROBES <= mask) { + for (j = 0 ; j < LINEAR_PROBES ; j++) { + entry++; + if (entry->hash == 0 && entry->key == NULL) + goto found_unused_or_dummy; + if (entry->hash == hash) { + PyObject *startkey = entry->key; + assert(startkey != dummy); + if (startkey == key) + goto found_active; + if (PyUnicode_CheckExact(startkey) + && PyUnicode_CheckExact(key) + && _PyUnicode_EQ(startkey, key)) + goto found_active; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) + return -1; + if (table != so->table || entry->key != startkey) + return set_insert_key(so, key, hash); + if (cmp > 0) + goto found_active; + mask = so->mask; + } + else if (entry->hash == -1 && freeslot == NULL) freeslot = entry; } } @@ -124,10 +205,30 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) entry = &table[i]; if (entry->key == NULL) - goto found_null; + goto found_unused_or_dummy; } - found_null: - return freeslot == NULL ? entry : freeslot; + + found_unused_or_dummy: + if (freeslot == NULL) + goto found_unused; + Py_INCREF(key); + so->used++; + freeslot->key = key; + freeslot->hash = hash; + return 0; + + found_unused: + Py_INCREF(key); + so->fill++; + so->used++; + entry->key = key; + entry->hash = hash; + if ((size_t)so->fill*3 < mask*2) + return 0; + return set_table_resize(so, so->used); + + found_active: + return 0; } /* @@ -172,38 +273,6 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) /* ======== End logic for probing the hash table ========================== */ /* ======================================================================== */ - -/* -Internal routine to insert a new key into the table. -Used by the public insert routine. -Eats a reference to key. -*/ -static int -set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) -{ - setentry *entry; - - entry = set_lookkey(so, key, hash); - if (entry == NULL) - return -1; - if (entry->key == NULL) { - /* UNUSED */ - entry->key = key; - entry->hash = hash; - so->fill++; - so->used++; - } else if (entry->key == dummy) { - /* DUMMY */ - entry->key = key; - entry->hash = hash; - so->used++; - } else { - /* ACTIVE */ - Py_DECREF(key); - } - return 0; -} - /* Restructure the table by allocating a new table and reinserting all keys again. When entries have been deleted, the new table may @@ -220,6 +289,7 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) setentry small_copy[PySet_MINSIZE]; assert(minused >= 0); + minused = (minused > 50000) ? minused * 2 : minused * 4; /* Find the smallest table size > minused. */ /* XXX speed-up with intrinsics */ @@ -295,31 +365,15 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) return 0; } -/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ - static int set_add_entry(PySetObject *so, setentry *entry) { - Py_ssize_t n_used; - PyObject *key = entry->key; - Py_hash_t hash = entry->hash; - - assert(so->fill <= so->mask); /* at least one empty slot */ - n_used = so->used; - Py_INCREF(key); - if (set_insert_key(so, key, hash)) { - Py_DECREF(key); - return -1; - } - if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) - return 0; - return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); + return set_insert_key(so, entry->key, entry->hash); } static int set_add_key(PySetObject *so, PyObject *key) { - setentry entry; Py_hash_t hash; if (!PyUnicode_CheckExact(key) || @@ -328,9 +382,7 @@ set_add_key(PySetObject *so, PyObject *key) if (hash == -1) return -1; } - entry.key = key; - entry.hash = hash; - return set_add_entry(so, &entry); + return set_insert_key(so, key, hash); } #define DISCARD_NOTFOUND 0 @@ -345,7 +397,7 @@ set_discard_entry(PySetObject *so, setentry *oldentry) entry = set_lookkey(so, oldentry->key, oldentry->hash); if (entry == NULL) return -1; - if (entry->key == NULL || entry->key == dummy) + if (entry->key == NULL) return DISCARD_NOTFOUND; old_key = entry->key; entry->key = dummy; @@ -563,8 +615,8 @@ set_merge(PySetObject *so, PyObject *otherset) * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if ((so->fill + other->used)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + other->used)*2) != 0) + if ((so->fill + other->used)*3 >= so->mask*2) { + if (set_table_resize(so, so->used + other->used) != 0) return -1; } so_entry = so->table; @@ -603,11 +655,8 @@ set_merge(PySetObject *so, PyObject *otherset) for (i = 0; i <= other->mask; i++, other_entry++) { key = other_entry->key; if (key != NULL && key != dummy) { - Py_INCREF(key); - if (set_insert_key(so, key, other_entry->hash)) { - Py_DECREF(key); + if (set_insert_key(so, key, other_entry->hash)) return -1; - } } } return 0; @@ -623,13 +672,13 @@ set_contains_entry(PySetObject *so, setentry *entry) if (lu_entry == NULL) return -1; key = lu_entry->key; - return key != NULL && key != dummy; + return key != NULL; } static int set_contains_key(PySetObject *so, PyObject *key) { - setentry entry; + setentry *entry; Py_hash_t hash; if (!PyUnicode_CheckExact(key) || @@ -638,9 +687,10 @@ set_contains_key(PySetObject *so, PyObject *key) if (hash == -1) return -1; } - entry.key = key; - entry.hash = hash; - return set_contains_entry(so, &entry); + entry = set_lookkey(so, key, hash); + if (entry == NULL) + return -1; + return entry->key != NULL; } static PyObject * @@ -912,18 +962,14 @@ set_update_internal(PySetObject *so, PyObject *other) * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if (dictsize == -1) + if (dictsize < 0) return -1; - if ((so->fill + dictsize)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + dictsize)*2) != 0) + if ((so->fill + dictsize)*3 >= so->mask*2) { + if (set_table_resize(so, so->used + dictsize) != 0) return -1; } while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - setentry an_entry; - - an_entry.hash = hash; - an_entry.key = key; - if (set_add_entry(so, &an_entry)) + if (set_insert_key(so, key, hash)) return -1; } return 0; @@ -1223,7 +1269,7 @@ set_intersection(PySetObject *so, PyObject *other) while (set_next((PySetObject *)other, &pos, &entry)) { int rv = set_contains_entry(so, entry); - if (rv == -1) { + if (rv < 0) { Py_DECREF(result); return NULL; } @@ -1257,7 +1303,7 @@ set_intersection(PySetObject *so, PyObject *other) entry.hash = hash; entry.key = key; rv = set_contains_entry(so, &entry); - if (rv == -1) { + if (rv < 0) { Py_DECREF(it); Py_DECREF(result); Py_DECREF(key); @@ -1384,7 +1430,7 @@ set_isdisjoint(PySetObject *so, PyObject *other) } while (set_next((PySetObject *)other, &pos, &entry)) { int rv = set_contains_entry(so, entry); - if (rv == -1) + if (rv < 0) return NULL; if (rv) Py_RETURN_FALSE; @@ -1410,7 +1456,7 @@ set_isdisjoint(PySetObject *so, PyObject *other) entry.key = key; rv = set_contains_entry(so, &entry); Py_DECREF(key); - if (rv == -1) { + if (rv < 0) { Py_DECREF(it); return NULL; } @@ -1439,7 +1485,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other) Py_ssize_t pos = 0; while (set_next((PySetObject *)other, &pos, &entry)) - if (set_discard_entry(so, entry) == -1) + if (set_discard_entry(so, entry) < 0) return -1; } else { PyObject *key, *it; @@ -1448,7 +1494,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other) return -1; while ((key = PyIter_Next(it)) != NULL) { - if (set_discard_key(so, key) == -1) { + if (set_discard_key(so, key) < 0) { Py_DECREF(it); Py_DECREF(key); return -1; @@ -1459,10 +1505,10 @@ set_difference_update_internal(PySetObject *so, PyObject *other) if (PyErr_Occurred()) return -1; } - /* If more than 1/5 are dummies, then resize them away. */ - if ((so->fill - so->used) * 5 < so->mask) + /* If more than 1/4th are dummies, then resize them away. */ + if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4) return 0; - return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); + return set_table_resize(so, so->used); } static PyObject * @@ -1489,7 +1535,7 @@ set_copy_and_difference(PySetObject *so, PyObject *other) result = set_copy(so); if (result == NULL) return NULL; - if (set_difference_update_internal((PySetObject *) result, other) != -1) + if (set_difference_update_internal((PySetObject *) result, other) == 0) return result; Py_DECREF(result); return NULL; @@ -1518,17 +1564,16 @@ set_difference(PySetObject *so, PyObject *other) if (PyDict_CheckExact(other)) { while (set_next(so, &pos, &entry)) { - setentry entrycopy; + PyObject *key = entry->key; + Py_hash_t hash = entry->hash; int rv; - entrycopy.hash = entry->hash; - entrycopy.key = entry->key; - rv = _PyDict_Contains(other, entry->key, entry->hash); + rv = _PyDict_Contains(other, key, hash); if (rv < 0) { Py_DECREF(result); return NULL; } if (!rv) { - if (set_add_entry((PySetObject *)result, &entrycopy)) { + if (set_insert_key((PySetObject *)result, key, hash)) { Py_DECREF(result); return NULL; } @@ -1540,7 +1585,7 @@ set_difference(PySetObject *so, PyObject *other) /* Iterate over so, checking for common elements in other. */ while (set_next(so, &pos, &entry)) { int rv = set_contains_entry((PySetObject *)other, entry); - if (rv == -1) { + if (rv < 0) { Py_DECREF(result); return NULL; } @@ -1624,7 +1669,7 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) an_entry.key = key; rv = set_discard_entry(so, &an_entry); - if (rv == -1) { + if (rv < 0) { Py_DECREF(key); return NULL; } @@ -1650,7 +1695,7 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) while (set_next(otherset, &pos, &entry)) { int rv = set_discard_entry(so, entry); - if (rv == -1) { + if (rv < 0) { Py_DECREF(otherset); return NULL; } @@ -1732,7 +1777,7 @@ set_issubset(PySetObject *so, PyObject *other) while (set_next(so, &pos, &entry)) { int rv = set_contains_entry((PySetObject *)other, entry); - if (rv == -1) + if (rv < 0) return NULL; if (!rv) Py_RETURN_FALSE; @@ -1823,7 +1868,7 @@ set_contains(PySetObject *so, PyObject *key) int rv; rv = set_contains_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return -1; PyErr_Clear(); @@ -1842,7 +1887,7 @@ set_direct_contains(PySetObject *so, PyObject *key) long result; result = set_contains(so, key); - if (result == -1) + if (result < 0) return NULL; return PyBool_FromLong(result); } @@ -1856,7 +1901,7 @@ set_remove(PySetObject *so, PyObject *key) int rv; rv = set_discard_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; PyErr_Clear(); @@ -1865,7 +1910,7 @@ set_remove(PySetObject *so, PyObject *key) return NULL; rv = set_discard_key(so, tmpkey); Py_DECREF(tmpkey); - if (rv == -1) + if (rv < 0) return NULL; } @@ -1888,7 +1933,7 @@ set_discard(PySetObject *so, PyObject *key) int rv; rv = set_discard_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; PyErr_Clear(); @@ -1897,7 +1942,7 @@ set_discard(PySetObject *so, PyObject *key) return NULL; rv = set_discard_key(so, tmpkey); Py_DECREF(tmpkey); - if (rv == -1) + if (rv < 0) return NULL; } Py_RETURN_NONE; |