summaryrefslogtreecommitdiff
path: root/Objects/setobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r--Objects/setobject.c199
1 files changed, 131 insertions, 68 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c
index ea5a24c516..6327a312c9 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -68,6 +68,11 @@ chaining would be substantial (100% with typical malloc overhead).
The initial probe index is computed as hash mod the table size. Subsequent
probe indices are computed as explained in Objects/dictobject.c.
+To improve cache locality, each probe is done in pairs.
+After the probe is examined, an adjacent entry is then examined as well.
+The likelihood is that an adjacent entry is in the same cache line and
+can be examined more cheaply than another probe elsewhere in memory.
+
All arithmetic on hash should ignore overflow.
Unlike the dictionary implementation, the lookkey functions can return
@@ -75,25 +80,53 @@ NULL if the rich comparison returns an error.
*/
static setentry *
-set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
+set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register size_t i; /* Unsigned for defined overflow behavior. */
- register size_t perturb;
- register setentry *freeslot;
- register size_t mask = so->mask;
+ size_t i, j; /* Unsigned for defined overflow behavior. */
+ size_t perturb;
+ setentry *freeslot;
+ size_t mask = so->mask;
setentry *table = so->table;
- register setentry *entry;
- register int cmp;
+ setentry *entry;
+ int cmp;
PyObject *startkey;
i = (size_t)hash & mask;
entry = &table[i];
if (entry->key == NULL || entry->key == key)
return entry;
+ if (entry->hash == hash) {
+ startkey = entry->key;
+ Py_INCREF(startkey);
+ cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+ Py_DECREF(startkey);
+ if (cmp < 0)
+ return NULL;
+ if (table == so->table && entry->key == startkey) {
+ if (cmp > 0)
+ return entry;
+ }
+ else {
+ /* Start over if the compare altered the set */
+ return set_lookkey(so, key, hash);
+ }
+ }
+ freeslot = (entry->key == dummy) ? entry : NULL;
- if (entry->key == dummy)
- freeslot = entry;
- else {
+ /* In the loop, key == dummy is by far (factor of 100s)
+ the least likely outcome, so test for that last. */
+ j = i;
+ perturb = hash;
+ while (1) {
+ j ^= 1;
+ entry = &table[j];
+ if (entry->key == NULL) {
+ if (freeslot != NULL)
+ entry = freeslot;
+ break;
+ }
+ if (entry->key == key)
+ break;
if (entry->hash == hash) {
startkey = entry->key;
Py_INCREF(startkey);
@@ -103,23 +136,20 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
return NULL;
if (table == so->table && entry->key == startkey) {
if (cmp > 0)
- return entry;
+ break;
}
else {
- /* The compare did major nasty stuff to the
- * set: start over.
- */
return set_lookkey(so, key, hash);
}
}
- freeslot = NULL;
- }
+ if (entry->key == dummy && freeslot == NULL)
+ freeslot = entry;
- /* In the loop, key == dummy is by far (factor of 100s) the
- least likely outcome, so test for that last. */
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- entry = &table[i & mask];
+ i = i * 5 + perturb + 1;
+ j = i & mask;
+ perturb >>= PERTURB_SHIFT;
+
+ entry = &table[j];
if (entry->key == NULL) {
if (freeslot != NULL)
entry = freeslot;
@@ -127,7 +157,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
}
if (entry->key == key)
break;
- if (entry->hash == hash && entry->key != dummy) {
+ if (entry->hash == hash) {
startkey = entry->key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
@@ -139,14 +169,12 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
break;
}
else {
- /* The compare did major nasty stuff to the
- * set: start over.
- */
return set_lookkey(so, key, hash);
}
}
- else if (entry->key == dummy && freeslot == NULL)
+ if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
+
}
return entry;
}
@@ -157,14 +185,14 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
* see if the comparison altered the table.
*/
static setentry *
-set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
+set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register size_t i; /* Unsigned for defined overflow behavior. */
- register size_t perturb;
- register setentry *freeslot;
- register size_t mask = so->mask;
+ size_t i, j; /* Unsigned for defined overflow behavior. */
+ size_t perturb;
+ setentry *freeslot;
+ size_t mask = so->mask;
setentry *table = so->table;
- register setentry *entry;
+ setentry *entry;
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
@@ -174,6 +202,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
so->lookup = set_lookkey;
return set_lookkey(so, key, hash);
}
+
i = (size_t)hash & mask;
entry = &table[i];
if (entry->key == NULL || entry->key == key)
@@ -186,11 +215,37 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
freeslot = NULL;
}
- /* In the loop, key == dummy is by far (factor of 100s) the
- least likely outcome, so test for that last. */
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- entry = &table[i & mask];
+ entry = &table[i ^ 1];
+ if (entry->key == NULL)
+ return freeslot == NULL ? entry : freeslot;
+ if (entry->key == key
+ || (entry->hash == hash
+ && entry->key != dummy
+ && unicode_eq(entry->key, key)))
+ return entry;
+ if (entry->key == dummy && freeslot == NULL)
+ freeslot = entry;
+
+ j = i;
+ perturb = hash;
+ while (1) {
+ j ^= 1;
+ entry = &table[j];
+ if (entry->key == NULL)
+ return freeslot == NULL ? entry : freeslot;
+ if (entry->key == key
+ || (entry->hash == hash
+ && entry->key != dummy
+ && unicode_eq(entry->key, key)))
+ return entry;
+ if (entry->key == dummy && freeslot == NULL)
+ freeslot = entry;
+
+ i = i * 5 + perturb + 1;
+ j = i & mask;
+ perturb >>= PERTURB_SHIFT;
+
+ entry = &table[j];
if (entry->key == NULL)
return freeslot == NULL ? entry : freeslot;
if (entry->key == key
@@ -211,9 +266,9 @@ Used by the public insert routine.
Eats a reference to key.
*/
static int
-set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash)
+set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register setentry *entry;
+ setentry *entry;
assert(so->lookup != NULL);
entry = so->lookup(so, key, hash);
@@ -247,19 +302,25 @@ Note that no refcounts are changed by this routine; if needed, the caller
is responsible for incref'ing `key`.
*/
static void
-set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash)
+set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register size_t i;
- register size_t perturb;
- register size_t mask = (size_t)so->mask;
setentry *table = so->table;
- register setentry *entry;
+ setentry *entry;
+ size_t perturb = hash;
+ size_t mask = (size_t)so->mask;
+ size_t i, j;
- i = (size_t)hash & mask;
- entry = &table[i];
- for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- entry = &table[i & mask];
+ i = j = (size_t)hash & mask;
+ while (1) {
+ entry = &table[j];
+ if (entry->key == NULL)
+ break;
+ entry = &table[j ^ 1];
+ if (entry->key == NULL)
+ break;
+ i = i * 5 + perturb + 1;
+ j = i & mask;
+ perturb >>= PERTURB_SHIFT;
}
so->fill++;
entry->key = key;
@@ -280,6 +341,7 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
Py_ssize_t i;
int is_oldtable_malloced;
setentry small_copy[PySet_MINSIZE];
+ PyObject *dummy_entry;
assert(minused >= 0);
@@ -336,11 +398,12 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
/* Copy the data over; this is refcount-neutral for active entries;
dummy entries aren't copied over, of course */
+ dummy_entry = dummy;
for (entry = oldtable; i > 0; entry++) {
if (entry->key == NULL) {
/* UNUSED */
;
- } else if (entry->key == dummy) {
+ } else if (entry->key == dummy_entry) {
/* DUMMY */
--i;
assert(entry->key == dummy);
@@ -360,9 +423,9 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
static int
-set_add_entry(register PySetObject *so, setentry *entry)
+set_add_entry(PySetObject *so, setentry *entry)
{
- register Py_ssize_t n_used;
+ Py_ssize_t n_used;
PyObject *key = entry->key;
Py_hash_t hash = entry->hash;
@@ -379,10 +442,10 @@ set_add_entry(register PySetObject *so, setentry *entry)
}
static int
-set_add_key(register PySetObject *so, PyObject *key)
+set_add_key(PySetObject *so, PyObject *key)
{
- register Py_hash_t hash;
- register Py_ssize_t n_used;
+ Py_hash_t hash;
+ Py_ssize_t n_used;
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -407,7 +470,7 @@ set_add_key(register PySetObject *so, PyObject *key)
static int
set_discard_entry(PySetObject *so, setentry *oldentry)
-{ register setentry *entry;
+{ setentry *entry;
PyObject *old_key;
entry = (so->lookup)(so, oldentry->key, oldentry->hash);
@@ -426,8 +489,8 @@ set_discard_entry(PySetObject *so, setentry *oldentry)
static int
set_discard_key(PySetObject *so, PyObject *key)
{
- register Py_hash_t hash;
- register setentry *entry;
+ Py_hash_t hash;
+ setentry *entry;
PyObject *old_key;
assert (PyAnySet_Check(so));
@@ -533,7 +596,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
{
Py_ssize_t i;
Py_ssize_t mask;
- register setentry *table;
+ setentry *table;
assert (PyAnySet_Check(so));
i = *pos_ptr;
@@ -553,7 +616,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
static void
set_dealloc(PySetObject *so)
{
- register setentry *entry;
+ setentry *entry;
Py_ssize_t fill = so->fill;
PyObject_GC_UnTrack(so);
Py_TRASHCAN_SAFE_BEGIN(so)
@@ -632,8 +695,8 @@ set_merge(PySetObject *so, PyObject *otherset)
PySetObject *other;
PyObject *key;
Py_hash_t hash;
- register Py_ssize_t i;
- register setentry *entry;
+ Py_ssize_t i;
+ setentry *entry;
assert (PyAnySet_Check(so));
assert (PyAnySet_Check(otherset));
@@ -701,8 +764,8 @@ set_contains_entry(PySetObject *so, setentry *entry)
static PyObject *
set_pop(PySetObject *so)
{
- register Py_ssize_t i = 0;
- register setentry *entry;
+ Py_ssize_t i = 0;
+ setentry *entry;
PyObject *key;
assert (PyAnySet_Check(so));
@@ -869,8 +932,8 @@ static PyMethodDef setiter_methods[] = {
static PyObject *setiter_iternext(setiterobject *si)
{
PyObject *key;
- register Py_ssize_t i, mask;
- register setentry *entry;
+ Py_ssize_t i, mask;
+ setentry *entry;
PySetObject *so = si->si_set;
if (so == NULL)
@@ -1024,10 +1087,10 @@ PyDoc_STRVAR(update_doc,
static PyObject *
make_new_set(PyTypeObject *type, PyObject *iterable)
{
- register PySetObject *so = NULL;
+ PySetObject *so = NULL;
if (dummy == NULL) { /* Auto-initialize dummy */
- dummy = PyUnicode_FromString("<dummy key>");
+ dummy = _PyObject_New(&PyBaseObject_Type);
if (dummy == NULL)
return NULL;
}