diff options
| author | Inada Naoki <songofacandy@gmail.com> | 2020-08-07 14:08:55 +0900 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-08-07 14:08:55 +0900 | 
| commit | d9323a8c6e07071a59dc4c910661db33236c01b2 (patch) | |
| tree | 113cc103733582d9bc58875aba732f886481fa62 /Objects/dictobject.c | |
| parent | 5f0769a7529fcbc28190864f098f192053a10747 (diff) | |
| download | cpython-git-d9323a8c6e07071a59dc4c910661db33236c01b2.tar.gz | |
bpo-41493: Refactoring dictresize (GH-21751)
Split newsize calculation into new function. dictresize() now accepts exact newsize.
Diffstat (limited to 'Objects/dictobject.c')
| -rw-r--r-- | Objects/dictobject.c | 67 | 
1 files changed, 41 insertions, 26 deletions
| diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1b7ae06d82..6c3fc62d2e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -111,6 +111,7 @@ converting the dict to the combined table.  #define PyDict_MINSIZE 8  #include "Python.h" +#include "pycore_bitutils.h" // _Py_bit_length  #include "pycore_gc.h"       // _PyObject_GC_IS_TRACKED()  #include "pycore_object.h"   // _PyObject_GC_TRACK()  #include "pycore_pyerrors.h" // _PyErr_Fetch() @@ -236,7 +237,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,  static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,                                   Py_hash_t hash, PyObject **value_addr); -static int dictresize(PyDictObject *mp, Py_ssize_t minused); +static int dictresize(PyDictObject *mp, Py_ssize_t newsize);  static PyObject* dict_iter(PyDictObject *dict); @@ -411,18 +412,40 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)   */  #define USABLE_FRACTION(n) (((n) << 1)/3) -/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION. +/* Find the smallest dk_size >= minsize. */ +static inline Py_ssize_t +calculate_keysize(Py_ssize_t minsize) +{ +#if SIZEOF_LONG == SIZEOF_SIZE_T +    minsize = (minsize | PyDict_MINSIZE) - 1; +    return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1)); +#elif defined(_MSC_VER) +    // On 64bit Windows, sizeof(long) == 4. +    minsize = (minsize | PyDict_MINSIZE) - 1; +    unsigned long msb; +    _BitScanReverse64(&msb, (uint64_t)minsize); +    return 1LL << (msb + 1); +#else +    Py_ssize_t size; +    for (size = PyDict_MINSIZE; +            size < minsize && size > 0; +            size <<= 1) +        ; +    return size; +#endif +} + +/* estimate_keysize is reverse function of USABLE_FRACTION. + *   * This can be used to reserve enough size to insert n entries without   * resizing.   */ -#define ESTIMATE_SIZE(n)  (((n)*3+1) >> 1) +static inline Py_ssize_t +estimate_keysize(Py_ssize_t n) +{ +    return calculate_keysize((n*3 + 1) / 2); +} -/* Alternative fraction that is otherwise close enough to 2n/3 to make - * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10. - * 32 * 2/3 = 21, 32 * 5/8 = 20. - * Its advantage is that it is faster to compute on machines with slow division. - * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) - */  /* GROWTH_RATE. Growth rate upon hitting maximum load.   * Currently set to used*3. @@ -1036,7 +1059,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)  static int  insertion_resize(PyDictObject *mp)  { -    return dictresize(mp, GROWTH_RATE(mp)); +    return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));  }  /* @@ -1194,22 +1217,19 @@ After resizing a table is always combined,  but can be resplit by make_keys_shared().  */  static int -dictresize(PyDictObject *mp, Py_ssize_t minsize) +dictresize(PyDictObject *mp, Py_ssize_t newsize)  { -    Py_ssize_t newsize, numentries; +    Py_ssize_t numentries;      PyDictKeysObject *oldkeys;      PyObject **oldvalues;      PyDictKeyEntry *oldentries, *newentries; -    /* Find the smallest table size > minused. */ -    for (newsize = PyDict_MINSIZE; -         newsize < minsize && newsize > 0; -         newsize <<= 1) -        ;      if (newsize <= 0) {          PyErr_NoMemory();          return -1;      } +    assert(IS_POWER_OF_2(newsize)); +    assert(newsize >= PyDict_MINSIZE);      oldkeys = mp->ma_keys; @@ -1355,13 +1375,8 @@ _PyDict_NewPresized(Py_ssize_t minused)          newsize = max_presize;      }      else { -        Py_ssize_t minsize = ESTIMATE_SIZE(minused); -        newsize = PyDict_MINSIZE*2; -        while (newsize < minsize) { -            newsize <<= 1; -        } +        newsize = estimate_keysize(minused);      } -    assert(IS_POWER_OF_2(newsize));      new_keys = new_keys_object(newsize);      if (new_keys == NULL) @@ -1930,7 +1945,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)              PyObject *key;              Py_hash_t hash; -            if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) { +            if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {                  Py_DECREF(d);                  return NULL;              } @@ -1949,7 +1964,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)              PyObject *key;              Py_hash_t hash; -            if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) { +            if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {                  Py_DECREF(d);                  return NULL;              } @@ -2558,7 +2573,7 @@ dict_merge(PyObject *a, PyObject *b, int override)           * that there will be no (or few) overlapping keys.           */          if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) { -            if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) { +            if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {                 return -1;              }          } | 
