diff options
Diffstat (limited to 'Objects/dictobject.c')
| -rw-r--r-- | Objects/dictobject.c | 224 |
1 files changed, 115 insertions, 109 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c index d8ab91fb12..2a01f70b71 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -10,8 +10,9 @@ This implements the dictionary's hashtable. -As of Python 3.6, this is compact and ordered. Basic idea is described here. -https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html +As of Python 3.6, this is compact and ordered. Basic idea is described here: +* https://mail.python.org/pipermail/python-dev/2012-December/123028.html +* https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html layout: @@ -388,7 +389,7 @@ dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix) * This can be used to reserve enough size to insert n entries without * resizing. */ -#define ESTIMATE_SIZE(n) (((n)*3) >> 1) +#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1) /* Alternative fraction that is otherwise close enough to 2n/3 to make * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10. @@ -682,7 +683,7 @@ the <dummy> value. For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns where the key index should be inserted. */ -static Py_ssize_t +static Py_ssize_t _Py_HOT_FUNCTION lookdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos) { @@ -797,7 +798,7 @@ top: } /* Specialized version for string-only keys */ -static Py_ssize_t +static Py_ssize_t _Py_HOT_FUNCTION lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos) { @@ -872,7 +873,7 @@ lookdict_unicode(PyDictObject *mp, PyObject *key, /* Faster version of lookdict_unicode when it is known that no <dummy> keys * will be present. */ -static Py_ssize_t +static Py_ssize_t _Py_HOT_FUNCTION lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos) @@ -940,7 +941,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key, * Split tables only contain unicode keys and no dummy keys, * so algorithm is the same as lookdict_unicode_nodummy. */ -static Py_ssize_t +static Py_ssize_t _Py_HOT_FUNCTION lookdict_split(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos) { @@ -1197,41 +1198,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) } /* -Internal routine used by dictresize() to insert an item which is -known to be absent from the dict. This routine also assumes that -the dict contains no deleted entries. Besides the performance benefit, -using insertdict() in dictresize() is dangerous (SF bug #1456209). -Note that no refcounts are changed by this routine; if needed, the caller -is responsible for incref'ing `key` and `value`. -Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller -must set them correctly +Internal routine used by dictresize() to buid a hashtable of entries. */ static void -insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash, - PyObject *value) +build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n) { - size_t i; - PyDictKeysObject *k = mp->ma_keys; - size_t mask = (size_t)DK_SIZE(k)-1; - PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); - PyDictKeyEntry *ep; - - assert(k->dk_lookup != NULL); - assert(value != NULL); - assert(key != NULL); - assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict); - i = hash & mask; - for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) { - perturb >>= PERTURB_SHIFT; - i = mask & ((i << 2) + i + perturb + 1); + size_t mask = (size_t)DK_SIZE(keys) - 1; + for (Py_ssize_t ix = 0; ix != n; ix++, ep++) { + Py_hash_t hash = ep->me_hash; + size_t i = hash & mask; + for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) { + perturb >>= PERTURB_SHIFT; + i = mask & ((i << 2) + i + perturb + 1); + } + dk_set_index(keys, i, ix); } - ep = &ep0[k->dk_nentries]; - assert(ep->me_value == NULL); - dk_set_index(k, i, k->dk_nentries); - k->dk_nentries++; - ep->me_key = key; - ep->me_hash = hash; - ep->me_value = value; } /* @@ -1247,10 +1228,10 @@ but can be resplit by make_keys_shared(). static int dictresize(PyDictObject *mp, Py_ssize_t minused) { - Py_ssize_t i, newsize; + Py_ssize_t newsize, numentries; PyDictKeysObject *oldkeys; PyObject **oldvalues; - PyDictKeyEntry *ep0; + PyDictKeyEntry *oldentries, *newentries; /* Find the smallest table size > minused. */ for (newsize = PyDict_MINSIZE; @@ -1261,8 +1242,14 @@ dictresize(PyDictObject *mp, Py_ssize_t minused) PyErr_NoMemory(); return -1; } + oldkeys = mp->ma_keys; - oldvalues = mp->ma_values; + + /* NOTE: Current odict checks mp->ma_keys to detect resize happen. + * So we can't reuse oldkeys even if oldkeys->dk_size == newsize. + * TODO: Try reusing oldkeys when reimplement odict. + */ + /* Allocate a new table. */ mp->ma_keys = new_keys_object(newsize); if (mp->ma_keys == NULL) { @@ -1271,42 +1258,59 @@ dictresize(PyDictObject *mp, Py_ssize_t minused) } if (oldkeys->dk_lookup == lookdict) mp->ma_keys->dk_lookup = lookdict; - mp->ma_values = NULL; - ep0 = DK_ENTRIES(oldkeys); - /* Main loop below assumes we can transfer refcount to new keys - * and that value is stored in me_value. - * Increment ref-counts and copy values here to compensate - * This (resizing a split table) should be relatively rare */ + + numentries = mp->ma_used; + oldentries = DK_ENTRIES(oldkeys); + newentries = DK_ENTRIES(mp->ma_keys); + oldvalues = mp->ma_values; if (oldvalues != NULL) { - for (i = 0; i < oldkeys->dk_nentries; i++) { - if (oldvalues[i] != NULL) { - Py_INCREF(ep0[i].me_key); - ep0[i].me_value = oldvalues[i]; - } - } - } - /* Main loop */ - for (i = 0; i < oldkeys->dk_nentries; i++) { - PyDictKeyEntry *ep = &ep0[i]; - if (ep->me_value != NULL) { - insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value); + /* Convert split table into new combined table. + * We must incref keys; we can transfer values. + * Note that values of split table is always dense. + */ + for (Py_ssize_t i = 0; i < numentries; i++) { + assert(oldvalues[i] != NULL); + PyDictKeyEntry *ep = &oldentries[i]; + PyObject *key = ep->me_key; + Py_INCREF(key); + newentries[i].me_key = key; + newentries[i].me_hash = ep->me_hash; + newentries[i].me_value = oldvalues[i]; } - } - mp->ma_keys->dk_usable -= mp->ma_used; - if (oldvalues != NULL) { - /* NULL out me_value slot in oldkeys, in case it was shared */ - for (i = 0; i < oldkeys->dk_nentries; i++) - ep0[i].me_value = NULL; + DK_DECREF(oldkeys); + mp->ma_values = NULL; if (oldvalues != empty_values) { free_values(oldvalues); } } - else { + else { // combined table. + if (oldkeys->dk_nentries == numentries) { + memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry)); + } + else { + PyDictKeyEntry *ep = oldentries; + for (Py_ssize_t i = 0; i < numentries; i++) { + while (ep->me_value == NULL) + ep++; + newentries[i] = *ep++; + } + } + assert(oldkeys->dk_lookup != lookdict_split); assert(oldkeys->dk_refcnt == 1); - DK_DEBUG_DECREF PyObject_FREE(oldkeys); + if (oldkeys->dk_size == PyDict_MINSIZE && + numfreekeys < PyDict_MAXFREELIST) { + DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys; + } + else { + DK_DEBUG_DECREF PyObject_FREE(oldkeys); + } } + + build_indices(mp->ma_keys, newentries, numentries); + mp->ma_keys->dk_usable -= numentries; + mp->ma_keys->dk_nentries = numentries; return 0; } @@ -1357,12 +1361,26 @@ make_keys_shared(PyObject *op) PyObject * _PyDict_NewPresized(Py_ssize_t minused) { + const Py_ssize_t max_presize = 128 * 1024; Py_ssize_t newsize; PyDictKeysObject *new_keys; - for (newsize = PyDict_MINSIZE; - newsize <= minused && newsize > 0; - newsize <<= 1) - ; + + /* There are no strict guarantee that returned dict can contain minused + * items without resize. So we create medium size dict instead of very + * large dict or MemoryError. + */ + if (minused > USABLE_FRACTION(max_presize)) { + newsize = max_presize; + } + else { + Py_ssize_t minsize = ESTIMATE_SIZE(minused); + newsize = PyDict_MINSIZE; + while (newsize < minsize) { + newsize <<= 1; + } + } + assert(IS_POWER_OF_2(newsize)); + new_keys = new_keys_object(newsize); if (new_keys == NULL) return NULL; @@ -1405,7 +1423,7 @@ PyDict_GetItem(PyObject *op, PyObject *key) Let's just hope that no exception occurs then... This must be _PyThreadState_Current and not PyThreadState_GET() because in debug mode, the latter complains if tstate is NULL. */ - tstate = _PyThreadState_UncheckedGet(); + tstate = PyThreadState_GET(); if (tstate != NULL && tstate->curexc_type != NULL) { /* preserve the existing exception */ PyObject *err_type, *err_value, *err_tb; @@ -1687,7 +1705,7 @@ int _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, Py_hash_t *phash) { - Py_ssize_t i, n; + Py_ssize_t i; PyDictObject *mp; PyDictKeyEntry *entry_ptr; PyObject *value; @@ -1696,21 +1714,18 @@ _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, return 0; mp = (PyDictObject *)op; i = *ppos; - n = mp->ma_keys->dk_nentries; - if ((size_t)i >= (size_t)n) - return 0; if (mp->ma_values) { - PyObject **value_ptr = &mp->ma_values[i]; - while (i < n && *value_ptr == NULL) { - value_ptr++; - i++; - } - if (i >= n) + if (i < 0 || i >= mp->ma_used) return 0; + /* values of split table is always dense */ entry_ptr = &DK_ENTRIES(mp->ma_keys)[i]; - value = *value_ptr; + value = mp->ma_values[i]; + assert(value != NULL); } else { + Py_ssize_t n = mp->ma_keys->dk_nentries; + if (i < 0 || i >= n) + return 0; entry_ptr = &DK_ENTRIES(mp->ma_keys)[i]; while (i < n && entry_ptr->me_value == NULL) { entry_ptr++; @@ -3375,7 +3390,7 @@ static PyObject* dictiter_iternextkey(dictiterobject *di) { PyObject *key; - Py_ssize_t i, n; + Py_ssize_t i; PyDictKeysObject *k; PyDictObject *d = di->di_dict; @@ -3392,18 +3407,15 @@ dictiter_iternextkey(dictiterobject *di) i = di->di_pos; k = d->ma_keys; - n = k->dk_nentries; + assert(i >= 0); if (d->ma_values) { - PyObject **value_ptr = &d->ma_values[i]; - while (i < n && *value_ptr == NULL) { - value_ptr++; - i++; - } - if (i >= n) + if (i >= d->ma_used) goto fail; key = DK_ENTRIES(k)[i].me_key; + assert(d->ma_values[i] != NULL); } else { + Py_ssize_t n = k->dk_nentries; PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i]; while (i < n && entry_ptr->me_value == NULL) { entry_ptr++; @@ -3461,7 +3473,7 @@ static PyObject * dictiter_iternextvalue(dictiterobject *di) { PyObject *value; - Py_ssize_t i, n; + Py_ssize_t i; PyDictObject *d = di->di_dict; if (d == NULL) @@ -3476,18 +3488,15 @@ dictiter_iternextvalue(dictiterobject *di) } i = di->di_pos; - n = d->ma_keys->dk_nentries; + assert(i >= 0); if (d->ma_values) { - PyObject **value_ptr = &d->ma_values[i]; - while (i < n && *value_ptr == NULL) { - value_ptr++; - i++; - } - if (i >= n) + if (i >= d->ma_used) goto fail; - value = *value_ptr; + value = d->ma_values[i]; + assert(value != NULL); } else { + Py_ssize_t n = d->ma_keys->dk_nentries; PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i]; while (i < n && entry_ptr->me_value == NULL) { entry_ptr++; @@ -3545,7 +3554,7 @@ static PyObject * dictiter_iternextitem(dictiterobject *di) { PyObject *key, *value, *result = di->di_result; - Py_ssize_t i, n; + Py_ssize_t i; PyDictObject *d = di->di_dict; if (d == NULL) @@ -3560,19 +3569,16 @@ dictiter_iternextitem(dictiterobject *di) } i = di->di_pos; - n = d->ma_keys->dk_nentries; + assert(i >= 0); if (d->ma_values) { - PyObject **value_ptr = &d->ma_values[i]; - while (i < n && *value_ptr == NULL) { - value_ptr++; - i++; - } - if (i >= n) + if (i >= d->ma_used) goto fail; key = DK_ENTRIES(d->ma_keys)[i].me_key; - value = *value_ptr; + value = d->ma_values[i]; + assert(value != NULL); } else { + Py_ssize_t n = d->ma_keys->dk_nentries; PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i]; while (i < n && entry_ptr->me_value == NULL) { entry_ptr++; |
