diff options
Diffstat (limited to 'Objects/setobject.c')
| -rw-r--r-- | Objects/setobject.c | 615 | 
1 files changed, 337 insertions, 278 deletions
| diff --git a/Objects/setobject.c b/Objects/setobject.c index 4ef692db33..6dd403f30f 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -26,7 +26,6 @@  #include "Python.h"  #include "structmember.h" -#include "stringlib/eq.h"  /* Object used as dummy key to fill deleted entries */  static PyObject _dummy_struct; @@ -48,19 +47,20 @@ static PyObject _dummy_struct;  static setentry *  set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)  { -    setentry *table = so->table; -    setentry *freeslot = NULL; +    setentry *table;      setentry *entry; -    size_t perturb = hash; +    size_t perturb;      size_t mask = so->mask;      size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */      size_t j;      int cmp; -    entry = &table[i]; +    entry = &so->table[i];      if (entry->key == NULL)          return entry; +    perturb = hash; +      while (1) {          if (entry->hash == hash) {              PyObject *startkey = entry->key; @@ -70,8 +70,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)                  return entry;              if (PyUnicode_CheckExact(startkey)                  && PyUnicode_CheckExact(key) -                && unicode_eq(startkey, key)) +                && _PyUnicode_EQ(startkey, key))                  return entry; +            table = so->table;              Py_INCREF(startkey);              cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);              Py_DECREF(startkey); @@ -83,14 +84,12 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)                  return entry;              mask = so->mask;                 /* help avoid a register spill */          } -        if (entry->hash == -1 && freeslot == NULL) -            freeslot = entry;          if (i + LINEAR_PROBES <= mask) {              for (j = 0 ; j < LINEAR_PROBES ; j++) {                  entry++; -                if (entry->key == NULL) -                    goto found_null; +                if (entry->hash == 0 && entry->key == NULL) +                    return entry;                  if (entry->hash == hash) {                      PyObject *startkey = entry->key;                      assert(startkey != dummy); @@ -98,8 +97,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)                          return entry;                      if (PyUnicode_CheckExact(startkey)                          && PyUnicode_CheckExact(key) -                        && unicode_eq(startkey, key)) +                        && _PyUnicode_EQ(startkey, key))                          return entry; +                    table = so->table;                      Py_INCREF(startkey);                      cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);                      Py_DECREF(startkey); @@ -111,7 +111,104 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)                          return entry;                      mask = so->mask;                  } -                if (entry->hash == -1 && freeslot == NULL) +            } +        } + +        perturb >>= PERTURB_SHIFT; +        i = (i * 5 + 1 + perturb) & mask; + +        entry = &so->table[i]; +        if (entry->key == NULL) +            return entry; +    } +} + +static int set_table_resize(PySetObject *, Py_ssize_t); + +static int +set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) +{ +    setentry *table; +    setentry *freeslot; +    setentry *entry; +    size_t perturb; +    size_t mask; +    size_t i;                       /* Unsigned for defined overflow behavior */ +    size_t j; +    int cmp; + +    /* Pre-increment is necessary to prevent arbitrary code in the rich +       comparison from deallocating the key just before the insertion. */ +    Py_INCREF(key); + +  restart: + +    mask = so->mask; +    i = (size_t)hash & mask; + +    entry = &so->table[i]; +    if (entry->key == NULL) +        goto found_unused; + +    freeslot = NULL; +    perturb = hash; + +    while (1) { +        if (entry->hash == hash) { +            PyObject *startkey = entry->key; +            /* startkey cannot be a dummy because the dummy hash field is -1 */ +            assert(startkey != dummy); +            if (startkey == key) +                goto found_active; +            if (PyUnicode_CheckExact(startkey) +                && PyUnicode_CheckExact(key) +                && _PyUnicode_EQ(startkey, key)) +                goto found_active; +            table = so->table; +            Py_INCREF(startkey); +            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); +            Py_DECREF(startkey); +            if (cmp > 0)                                          /* likely */ +                goto found_active; +            if (cmp < 0) +                goto comparison_error; +            /* Continuing the search from the current entry only makes +               sense if the table and entry are unchanged; otherwise, +               we have to restart from the beginning */ +            if (table != so->table || entry->key != startkey) +                goto restart; +            mask = so->mask;                 /* help avoid a register spill */ +        } +        else if (entry->hash == -1 && freeslot == NULL) +            freeslot = entry; + +        if (i + LINEAR_PROBES <= mask) { +            for (j = 0 ; j < LINEAR_PROBES ; j++) { +                entry++; +                if (entry->hash == 0 && entry->key == NULL) +                    goto found_unused_or_dummy; +                if (entry->hash == hash) { +                    PyObject *startkey = entry->key; +                    assert(startkey != dummy); +                    if (startkey == key) +                        goto found_active; +                    if (PyUnicode_CheckExact(startkey) +                        && PyUnicode_CheckExact(key) +                        && _PyUnicode_EQ(startkey, key)) +                        goto found_active; +                    table = so->table; +                    Py_INCREF(startkey); +                    cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); +                    Py_DECREF(startkey); +                    if (cmp > 0) +                        goto found_active; +                    if (cmp < 0) +                        goto comparison_error; +                    if (table != so->table || entry->key != startkey) +                        goto restart; +                    mask = so->mask; +                } +                else if (entry->hash == -1 && freeslot == NULL)                      freeslot = entry;              }          } @@ -119,29 +216,51 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)          perturb >>= PERTURB_SHIFT;          i = (i * 5 + 1 + perturb) & mask; -        entry = &table[i]; +        entry = &so->table[i];          if (entry->key == NULL) -            goto found_null; +            goto found_unused_or_dummy;      } -  found_null: -    return freeslot == NULL ? entry : freeslot; + +  found_unused_or_dummy: +    if (freeslot == NULL) +        goto found_unused; +    so->used++; +    freeslot->key = key; +    freeslot->hash = hash; +    return 0; + +  found_unused: +    so->fill++; +    so->used++; +    entry->key = key; +    entry->hash = hash; +    if ((size_t)so->fill*3 < mask*2) +        return 0; +    return set_table_resize(so, so->used); + +  found_active: +    Py_DECREF(key); +    return 0; + +  comparison_error: +    Py_DECREF(key); +    return -1;  }  /*  Internal routine used by set_table_resize() to insert an item which is  known to be absent from the set.  This routine also assumes that  the set contains no deleted entries.  Besides the performance benefit, -using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209). -Note that no refcounts are changed by this routine; if needed, the caller -is responsible for incref'ing `key`. +there is also safety benefit since using set_add_entry() risks making +a callback in the middle of a set_table_resize(), see issue 1456209. +The caller is responsible for updating the key's reference count and +the setobject's fill and used fields.  */  static void -set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) +set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)  { -    setentry *table = so->table;      setentry *entry;      size_t perturb = hash; -    size_t mask = (size_t)so->mask;      size_t i = (size_t)hash & mask;      size_t j; @@ -162,45 +281,11 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)    found_null:      entry->key = key;      entry->hash = hash; -    so->fill++; -    so->used++;  }  /* ======== End logic for probing the hash table ========================== */  /* ======================================================================== */ - -/* -Internal routine to insert a new key into the table. -Used by the public insert routine. -Eats a reference to key. -*/ -static int -set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) -{ -    setentry *entry; - -    entry = set_lookkey(so, key, hash); -    if (entry == NULL) -        return -1; -    if (entry->key == NULL) { -        /* UNUSED */ -        entry->key = key; -        entry->hash = hash; -        so->fill++; -        so->used++; -    } else if (entry->key == dummy) { -        /* DUMMY */ -        entry->key = key; -        entry->hash = hash; -        so->used++; -    } else { -        /* ACTIVE */ -        Py_DECREF(key); -    } -    return 0; -} -  /*  Restructure the table by allocating a new table and reinserting all  keys again.  When entries have been deleted, the new table may @@ -213,10 +298,13 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)      setentry *oldtable, *newtable, *entry;      Py_ssize_t oldfill = so->fill;      Py_ssize_t oldused = so->used; +    Py_ssize_t oldmask = so->mask; +    size_t newmask;      int is_oldtable_malloced;      setentry small_copy[PySet_MINSIZE];      assert(minused >= 0); +    minused = (minused > 50000) ? minused * 2 : minused * 4;      /* Find the smallest table size > minused. */      /* XXX speed-up with intrinsics */ @@ -264,25 +352,24 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)      /* Make the set empty, using the new table. */      assert(newtable != oldtable);      memset(newtable, 0, sizeof(setentry) * newsize); -    so->fill = 0; -    so->used = 0; +    so->fill = oldused; +    so->used = oldused;      so->mask = newsize - 1;      so->table = newtable;      /* Copy the data over; this is refcount-neutral for active entries;         dummy entries aren't copied over, of course */ +    newmask = (size_t)so->mask;      if (oldfill == oldused) { -        for (entry = oldtable; oldused > 0; entry++) { +        for (entry = oldtable; entry <= oldtable + oldmask; entry++) {              if (entry->key != NULL) { -                oldused--; -                set_insert_clean(so, entry->key, entry->hash); +                set_insert_clean(newtable, newmask, entry->key, entry->hash);              }          }      } else { -        for (entry = oldtable; oldused > 0; entry++) { +        for (entry = oldtable; entry <= oldtable + oldmask; entry++) {              if (entry->key != NULL && entry->key != dummy) { -                oldused--; -                set_insert_clean(so, entry->key, entry->hash); +                set_insert_clean(newtable, newmask, entry->key, entry->hash);              }          }      } @@ -292,31 +379,42 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)      return 0;  } -/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ +static int +set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash) +{ +    setentry *entry; + +    entry = set_lookkey(so, key, hash); +    if (entry != NULL) +        return entry->key != NULL; +    return -1; +} + +#define DISCARD_NOTFOUND 0 +#define DISCARD_FOUND 1  static int -set_add_entry(PySetObject *so, setentry *entry) +set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)  { -    Py_ssize_t n_used; -    PyObject *key = entry->key; -    Py_hash_t hash = entry->hash; +    setentry *entry; +    PyObject *old_key; -    assert(so->fill <= so->mask);  /* at least one empty slot */ -    n_used = so->used; -    Py_INCREF(key); -    if (set_insert_key(so, key, hash)) { -        Py_DECREF(key); +    entry = set_lookkey(so, key, hash); +    if (entry == NULL)          return -1; -    } -    if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) -        return 0; -    return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); +    if (entry->key == NULL) +        return DISCARD_NOTFOUND; +    old_key = entry->key; +    entry->key = dummy; +    entry->hash = -1; +    so->used--; +    Py_DECREF(old_key); +    return DISCARD_FOUND;  }  static int  set_add_key(PySetObject *so, PyObject *key)  { -    setentry entry;      Py_hash_t hash;      if (!PyUnicode_CheckExact(key) || @@ -325,50 +423,35 @@ set_add_key(PySetObject *so, PyObject *key)          if (hash == -1)              return -1;      } -    entry.key = key; -    entry.hash = hash; -    return set_add_entry(so, &entry); +    return set_add_entry(so, key, hash);  } -#define DISCARD_NOTFOUND 0 -#define DISCARD_FOUND 1 -  static int -set_discard_entry(PySetObject *so, setentry *oldentry) +set_contains_key(PySetObject *so, PyObject *key)  { -    setentry *entry; -    PyObject *old_key; +    Py_hash_t hash; -    entry = set_lookkey(so, oldentry->key, oldentry->hash); -    if (entry == NULL) -        return -1; -    if (entry->key == NULL  ||  entry->key == dummy) -        return DISCARD_NOTFOUND; -    old_key = entry->key; -    entry->key = dummy; -    entry->hash = -1; -    so->used--; -    Py_DECREF(old_key); -    return DISCARD_FOUND; +    if (!PyUnicode_CheckExact(key) || +        (hash = ((PyASCIIObject *) key)->hash) == -1) { +        hash = PyObject_Hash(key); +        if (hash == -1) +            return -1; +    } +    return set_contains_entry(so, key, hash);  }  static int  set_discard_key(PySetObject *so, PyObject *key)  { -    setentry entry;      Py_hash_t hash; -    assert (PyAnySet_Check(so)); -      if (!PyUnicode_CheckExact(key) ||          (hash = ((PyASCIIObject *) key)->hash) == -1) {          hash = PyObject_Hash(key);          if (hash == -1)              return -1;      } -    entry.key = key; -    entry.hash = hash; -    return set_discard_entry(so, &entry); +    return set_discard_entry(so, key, hash);  }  static void @@ -449,20 +532,22 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)  {      Py_ssize_t i;      Py_ssize_t mask; -    setentry *table; +    setentry *entry;      assert (PyAnySet_Check(so));      i = *pos_ptr;      assert(i >= 0); -    table = so->table;      mask = so->mask; -    while (i <= mask && (table[i].key == NULL || table[i].key == dummy)) +    entry = &so->table[i]; +    while (i <= mask && (entry->key == NULL || entry->key == dummy)) {          i++; +        entry++; +    }      *pos_ptr = i+1;      if (i > mask)          return 0; -    assert(table[i].key != NULL); -    *entry_ptr = &table[i]; +    assert(entry != NULL); +    *entry_ptr = entry;      return 1;  } @@ -560,8 +645,8 @@ set_merge(PySetObject *so, PyObject *otherset)       * incrementally resizing as we insert new keys.  Expect       * that there will be no (or few) overlapping keys.       */ -    if ((so->fill + other->used)*3 >= (so->mask+1)*2) { -       if (set_table_resize(so, (so->used + other->used)*2) != 0) +    if ((so->fill + other->used)*3 >= so->mask*2) { +       if (set_table_resize(so, so->used + other->used) != 0)             return -1;      }      so_entry = so->table; @@ -586,11 +671,15 @@ set_merge(PySetObject *so, PyObject *otherset)      /* If our table is empty, we can use set_insert_clean() */      if (so->fill == 0) { -        for (i = 0; i <= other->mask; i++, other_entry++) { +        setentry *newtable = so->table; +        size_t newmask = (size_t)so->mask; +        so->fill = other->used; +        so->used = other->used; +        for (i = other->mask + 1; i > 0 ; i--, other_entry++) {              key = other_entry->key;              if (key != NULL && key != dummy) {                  Py_INCREF(key); -                set_insert_clean(so, key, other_entry->hash); +                set_insert_clean(newtable, newmask, key, other_entry->hash);              }          }          return 0; @@ -601,46 +690,13 @@ set_merge(PySetObject *so, PyObject *otherset)          other_entry = &other->table[i];          key = other_entry->key;          if (key != NULL && key != dummy) { -            Py_INCREF(key); -            if (set_insert_key(so, key, other_entry->hash)) { -                Py_DECREF(key); +            if (set_add_entry(so, key, other_entry->hash))                  return -1; -            }          }      }      return 0;  } -static int -set_contains_entry(PySetObject *so, setentry *entry) -{ -    PyObject *key; -    setentry *lu_entry; - -    lu_entry = set_lookkey(so, entry->key, entry->hash); -    if (lu_entry == NULL) -        return -1; -    key = lu_entry->key; -    return key != NULL && key != dummy; -} - -static int -set_contains_key(PySetObject *so, PyObject *key) -{ -    setentry entry; -    Py_hash_t hash; - -    if (!PyUnicode_CheckExact(key) || -        (hash = ((PyASCIIObject *) key)->hash) == -1) { -        hash = PyObject_Hash(key); -        if (hash == -1) -            return -1; -    } -    entry.key = key; -    entry.hash = hash; -    return set_contains_entry(so, &entry); -} -  static PyObject *  set_pop(PySetObject *so)  { @@ -682,43 +738,64 @@ set_traverse(PySetObject *so, visitproc visit, void *arg)      return 0;  } -static Py_hash_t -frozenset_hash(PyObject *self) +/* Work to increase the bit dispersion for closely spaced hash values. +   This is important because some use cases have many combinations of a +   small number of elements with nearby hashes so that many distinct +   combinations collapse to only a handful of distinct hash values. */ + +static Py_uhash_t +_shuffle_bits(Py_uhash_t h)  { -    /* Most of the constants in this hash algorithm are randomly choosen -       large primes with "interesting bit patterns" and that passed -       tests for good collision statistics on a variety of problematic -       datasets such as: +    return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; +} -          ps = [] -          for r in range(21): -              ps += itertools.combinations(range(20), r) -          num_distinct_hashes = len({hash(frozenset(s)) for s in ps}) +/* Most of the constants in this hash algorithm are randomly chosen +   large primes with "interesting bit patterns" and that passed tests +   for good collision statistics on a variety of problematic datasets +   including powersets and graph structures (such as David Eppstein's +   graph recipes in Lib/test/test_set.py) */ -    */ +static Py_hash_t +frozenset_hash(PyObject *self) +{      PySetObject *so = (PySetObject *)self; -    Py_uhash_t h, hash = 1927868237UL; +    Py_uhash_t hash = 0;      setentry *entry; -    Py_ssize_t pos = 0;      if (so->hash != -1)          return so->hash; -    hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1; -    while (set_next(so, &pos, &entry)) { -        /* Work to increase the bit dispersion for closely spaced hash -           values.  This is important because some use cases have many -           combinations of a small number of elements with nearby -           hashes so that many distinct combinations collapse to only -           a handful of distinct hash values. */ -        h = entry->hash; -        hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; -    } -    /* Make the final result spread-out in a different pattern -       than the algorithm for tuples or other python objects. */ +    /* Xor-in shuffled bits from every entry's hash field because xor is +       commutative and a frozenset hash should be independent of order. + +       For speed, include null entries and dummy entries and then +       subtract out their effect afterwards so that the final hash +       depends only on active entries.  This allows the code to be +       vectorized by the compiler and it saves the unpredictable +       branches that would arise when trying to exclude null and dummy +       entries on every iteration. */ + +    for (entry = so->table; entry <= &so->table[so->mask]; entry++) +        hash ^= _shuffle_bits(entry->hash); + +    /* Remove the effect of an odd number of NULL entries */ +    if ((so->mask + 1 - so->fill) & 1) +        hash ^= _shuffle_bits(0); + +    /* Remove the effect of an odd number of dummy entries */ +    if ((so->fill - so->used) & 1) +        hash ^= _shuffle_bits(-1); + +    /* Factor in the number of active entries */ +    hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL; + +    /* Disperse patterns arising in nested frozensets */      hash = hash * 69069U + 907133923UL; + +    /* -1 is reserved as an error code */      if (hash == (Py_uhash_t)-1)          hash = 590923713UL; +      so->hash = hash;      return hash;  } @@ -865,7 +942,7 @@ PyTypeObject PySetIter_Type = {      PyObject_GenericGetAttr,                    /* tp_getattro */      0,                                          /* tp_setattro */      0,                                          /* tp_as_buffer */ -    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ +    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */      0,                                          /* tp_doc */      (traverseproc)setiter_traverse,             /* tp_traverse */      0,                                          /* tp_clear */ @@ -910,18 +987,14 @@ set_update_internal(PySetObject *so, PyObject *other)          * incrementally resizing as we insert new keys.  Expect          * that there will be no (or few) overlapping keys.          */ -        if (dictsize == -1) +        if (dictsize < 0)              return -1; -        if ((so->fill + dictsize)*3 >= (so->mask+1)*2) { -            if (set_table_resize(so, (so->used + dictsize)*2) != 0) +        if ((so->fill + dictsize)*3 >= so->mask*2) { +            if (set_table_resize(so, so->used + dictsize) != 0)                  return -1;          }          while (_PyDict_Next(other, &pos, &key, &value, &hash)) { -            setentry an_entry; - -            an_entry.hash = hash; -            an_entry.key = key; -            if (set_add_entry(so, &an_entry)) +            if (set_add_entry(so, key, hash))                  return -1;          }          return 0; @@ -970,9 +1043,8 @@ PyDoc_STRVAR(update_doc,  static PyObject *  make_new_set(PyTypeObject *type, PyObject *iterable)  { -    PySetObject *so = NULL; +    PySetObject *so; -    /* create PySetObject structure */      so = (PySetObject *)type->tp_alloc(type, 0);      if (so == NULL)          return NULL; @@ -1015,7 +1087,8 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)  {      PyObject *iterable = NULL, *result; -    if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds)) +    if (kwds != NULL && type == &PyFrozenSet_Type +        && !_PyArg_NoKeywords("frozenset()", kwds))          return NULL;      if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) @@ -1042,24 +1115,9 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)      return emptyfrozenset;  } -int -PySet_ClearFreeList(void) -{ -    return 0; -} - -void -PySet_Fini(void) -{ -    Py_CLEAR(emptyfrozenset); -} -  static PyObject *  set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)  { -    if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds)) -        return NULL; -      return make_new_set(type, NULL);  } @@ -1201,6 +1259,8 @@ set_intersection(PySetObject *so, PyObject *other)  {      PySetObject *result;      PyObject *key, *it, *tmp; +    Py_hash_t hash; +    int rv;      if ((PyObject *)so == other)          return set_copy(so); @@ -1220,13 +1280,15 @@ set_intersection(PySetObject *so, PyObject *other)          }          while (set_next((PySetObject *)other, &pos, &entry)) { -            int rv = set_contains_entry(so, entry); -            if (rv == -1) { +            key = entry->key; +            hash = entry->hash; +            rv = set_contains_entry(so, key, hash); +            if (rv < 0) {                  Py_DECREF(result);                  return NULL;              }              if (rv) { -                if (set_add_entry(result, entry)) { +                if (set_add_entry(result, key, hash)) {                      Py_DECREF(result);                      return NULL;                  } @@ -1242,32 +1304,15 @@ set_intersection(PySetObject *so, PyObject *other)      }      while ((key = PyIter_Next(it)) != NULL) { -        int rv; -        setentry entry; -        Py_hash_t hash = PyObject_Hash(key); - -        if (hash == -1) { -            Py_DECREF(it); -            Py_DECREF(result); -            Py_DECREF(key); -            return NULL; -        } -        entry.hash = hash; -        entry.key = key; -        rv = set_contains_entry(so, &entry); -        if (rv == -1) { -            Py_DECREF(it); -            Py_DECREF(result); -            Py_DECREF(key); -            return NULL; -        } +        hash = PyObject_Hash(key); +        if (hash == -1) +            goto error; +        rv = set_contains_entry(so, key, hash); +        if (rv < 0) +            goto error;          if (rv) { -            if (set_add_entry(result, &entry)) { -                Py_DECREF(it); -                Py_DECREF(result); -                Py_DECREF(key); -                return NULL; -            } +            if (set_add_entry(result, key, hash)) +                goto error;          }          Py_DECREF(key);      } @@ -1277,6 +1322,11 @@ set_intersection(PySetObject *so, PyObject *other)          return NULL;      }      return (PyObject *)result; +  error: +    Py_DECREF(it); +    Py_DECREF(result); +    Py_DECREF(key); +    return NULL;  }  static PyObject * @@ -1363,6 +1413,7 @@ static PyObject *  set_isdisjoint(PySetObject *so, PyObject *other)  {      PyObject *key, *it, *tmp; +    int rv;      if ((PyObject *)so == other) {          if (PySet_GET_SIZE(so) == 0) @@ -1381,8 +1432,8 @@ set_isdisjoint(PySetObject *so, PyObject *other)              other = tmp;          }          while (set_next((PySetObject *)other, &pos, &entry)) { -            int rv = set_contains_entry(so, entry); -            if (rv == -1) +            rv = set_contains_entry(so, entry->key, entry->hash); +            if (rv < 0)                  return NULL;              if (rv)                  Py_RETURN_FALSE; @@ -1395,8 +1446,6 @@ set_isdisjoint(PySetObject *so, PyObject *other)          return NULL;      while ((key = PyIter_Next(it)) != NULL) { -        int rv; -        setentry entry;          Py_hash_t hash = PyObject_Hash(key);          if (hash == -1) { @@ -1404,11 +1453,9 @@ set_isdisjoint(PySetObject *so, PyObject *other)              Py_DECREF(it);              return NULL;          } -        entry.hash = hash; -        entry.key = key; -        rv = set_contains_entry(so, &entry); +        rv = set_contains_entry(so, key, hash);          Py_DECREF(key); -        if (rv == -1) { +        if (rv < 0) {              Py_DECREF(it);              return NULL;          } @@ -1437,7 +1484,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other)          Py_ssize_t pos = 0;          while (set_next((PySetObject *)other, &pos, &entry)) -            if (set_discard_entry(so, entry) == -1) +            if (set_discard_entry(so, entry->key, entry->hash) < 0)                  return -1;      } else {          PyObject *key, *it; @@ -1446,7 +1493,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other)              return -1;          while ((key = PyIter_Next(it)) != NULL) { -            if (set_discard_key(so, key) == -1) { +            if (set_discard_key(so, key) < 0) {                  Py_DECREF(it);                  Py_DECREF(key);                  return -1; @@ -1457,10 +1504,10 @@ set_difference_update_internal(PySetObject *so, PyObject *other)          if (PyErr_Occurred())              return -1;      } -    /* If more than 1/5 are dummies, then resize them away. */ -    if ((so->fill - so->used) * 5 < so->mask) +    /* If more than 1/4th are dummies, then resize them away. */ +    if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)          return 0; -    return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); +    return set_table_resize(so, so->used);  }  static PyObject * @@ -1487,7 +1534,7 @@ set_copy_and_difference(PySetObject *so, PyObject *other)      result = set_copy(so);      if (result == NULL)          return NULL; -    if (set_difference_update_internal((PySetObject *) result, other) != -1) +    if (set_difference_update_internal((PySetObject *) result, other) == 0)          return result;      Py_DECREF(result);      return NULL; @@ -1497,8 +1544,11 @@ static PyObject *  set_difference(PySetObject *so, PyObject *other)  {      PyObject *result; +    PyObject *key; +    Py_hash_t hash;      setentry *entry;      Py_ssize_t pos = 0; +    int rv;      if (!PyAnySet_Check(other)  && !PyDict_CheckExact(other)) {          return set_copy_and_difference(so, other); @@ -1516,17 +1566,15 @@ set_difference(PySetObject *so, PyObject *other)      if (PyDict_CheckExact(other)) {          while (set_next(so, &pos, &entry)) { -            setentry entrycopy; -            int rv; -            entrycopy.hash = entry->hash; -            entrycopy.key = entry->key; -            rv = _PyDict_Contains(other, entry->key, entry->hash); +            key = entry->key; +            hash = entry->hash; +            rv = _PyDict_Contains(other, key, hash);              if (rv < 0) {                  Py_DECREF(result);                  return NULL;              }              if (!rv) { -                if (set_add_entry((PySetObject *)result, &entrycopy)) { +                if (set_add_entry((PySetObject *)result, key, hash)) {                      Py_DECREF(result);                      return NULL;                  } @@ -1537,13 +1585,15 @@ set_difference(PySetObject *so, PyObject *other)      /* Iterate over so, checking for common elements in other. */      while (set_next(so, &pos, &entry)) { -        int rv = set_contains_entry((PySetObject *)other, entry); -        if (rv == -1) { +        key = entry->key; +        hash = entry->hash; +        rv = set_contains_entry((PySetObject *)other, key, hash); +        if (rv < 0) {              Py_DECREF(result);              return NULL;          }          if (!rv) { -            if (set_add_entry((PySetObject *)result, entry)) { +            if (set_add_entry((PySetObject *)result, key, hash)) {                  Py_DECREF(result);                  return NULL;              } @@ -1605,29 +1655,24 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)      PySetObject *otherset;      PyObject *key;      Py_ssize_t pos = 0; +    Py_hash_t hash;      setentry *entry; +    int rv;      if ((PyObject *)so == other)          return set_clear(so);      if (PyDict_CheckExact(other)) {          PyObject *value; -        int rv; -        Py_hash_t hash;          while (_PyDict_Next(other, &pos, &key, &value, &hash)) { -            setentry an_entry; -              Py_INCREF(key); -            an_entry.hash = hash; -            an_entry.key = key; - -            rv = set_discard_entry(so, &an_entry); -            if (rv == -1) { +            rv = set_discard_entry(so, key, hash); +            if (rv < 0) {                  Py_DECREF(key);                  return NULL;              }              if (rv == DISCARD_NOTFOUND) { -                if (set_add_entry(so, &an_entry)) { +                if (set_add_entry(so, key, hash)) {                      Py_DECREF(key);                      return NULL;                  } @@ -1647,13 +1692,15 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)      }      while (set_next(otherset, &pos, &entry)) { -        int rv = set_discard_entry(so, entry); -        if (rv == -1) { +        key = entry->key; +        hash = entry->hash; +        rv = set_discard_entry(so, key, hash); +        if (rv < 0) {              Py_DECREF(otherset);              return NULL;          }          if (rv == DISCARD_NOTFOUND) { -            if (set_add_entry(so, entry)) { +            if (set_add_entry(so, key, hash)) {                  Py_DECREF(otherset);                  return NULL;              } @@ -1715,6 +1762,7 @@ set_issubset(PySetObject *so, PyObject *other)  {      setentry *entry;      Py_ssize_t pos = 0; +    int rv;      if (!PyAnySet_Check(other)) {          PyObject *tmp, *result; @@ -1729,8 +1777,8 @@ set_issubset(PySetObject *so, PyObject *other)          Py_RETURN_FALSE;      while (set_next(so, &pos, &entry)) { -        int rv = set_contains_entry((PySetObject *)other, entry); -        if (rv == -1) +        rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash); +        if (rv < 0)              return NULL;          if (!rv)              Py_RETURN_FALSE; @@ -1821,7 +1869,7 @@ set_contains(PySetObject *so, PyObject *key)      int rv;      rv = set_contains_key(so, key); -    if (rv == -1) { +    if (rv < 0) {          if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))              return -1;          PyErr_Clear(); @@ -1840,7 +1888,7 @@ set_direct_contains(PySetObject *so, PyObject *key)      long result;      result = set_contains(so, key); -    if (result == -1) +    if (result < 0)          return NULL;      return PyBool_FromLong(result);  } @@ -1854,7 +1902,7 @@ set_remove(PySetObject *so, PyObject *key)      int rv;      rv = set_discard_key(so, key); -    if (rv == -1) { +    if (rv < 0) {          if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))              return NULL;          PyErr_Clear(); @@ -1863,7 +1911,7 @@ set_remove(PySetObject *so, PyObject *key)              return NULL;          rv = set_discard_key(so, tmpkey);          Py_DECREF(tmpkey); -        if (rv == -1) +        if (rv < 0)              return NULL;      } @@ -1886,7 +1934,7 @@ set_discard(PySetObject *so, PyObject *key)      int rv;      rv = set_discard_key(so, key); -    if (rv == -1) { +    if (rv < 0) {          if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))              return NULL;          PyErr_Clear(); @@ -1895,7 +1943,7 @@ set_discard(PySetObject *so, PyObject *key)              return NULL;          rv = set_discard_key(so, tmpkey);          Py_DECREF(tmpkey); -        if (rv == -1) +        if (rv < 0)              return NULL;      }      Py_RETURN_NONE; @@ -1949,13 +1997,12 @@ set_init(PySetObject *self, PyObject *args, PyObject *kwds)  {      PyObject *iterable = NULL; -    if (!PyAnySet_Check(self)) -        return -1; -    if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds)) +    if (kwds != NULL && !_PyArg_NoKeywords("set()", kwds))          return -1;      if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))          return -1; -    set_clear_internal(self); +    if (self->fill) +        set_clear_internal(self);      self->hash = -1;      if (iterable == NULL)          return 0; @@ -2122,7 +2169,7 @@ static PyMethodDef frozenset_methods[] = {       copy_doc},      {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,       difference_doc}, -    {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS, +    {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,       intersection_doc},      {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,       isdisjoint_doc}, @@ -2193,7 +2240,7 @@ PyTypeObject PyFrozenSet_Type = {      (traverseproc)set_traverse,         /* tp_traverse */      (inquiry)set_clear_internal,        /* tp_clear */      (richcmpfunc)set_richcompare,       /* tp_richcompare */ -    offsetof(PySetObject, weakreflist),         /* tp_weaklistoffset */ +    offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */      (getiterfunc)set_iter,              /* tp_iter */      0,                                  /* tp_iternext */      frozenset_methods,                  /* tp_methods */ @@ -2277,6 +2324,18 @@ PySet_Add(PyObject *anyset, PyObject *key)  }  int +PySet_ClearFreeList(void) +{ +    return 0; +} + +void +PySet_Fini(void) +{ +    Py_CLEAR(emptyfrozenset); +} + +int  _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)  {      setentry *entry; | 
