From c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Jun 2020 16:30:33 +0200 Subject: bpo-29882: Add _Py_popcount32() function (GH-20518) * Rename pycore_byteswap.h to pycore_bitutils.h. * Move popcount_digit() to pycore_bitutils.h as _Py_popcount32(). * _Py_popcount32() uses GCC and clang builtin function if available. * Add unit tests to _Py_popcount32(). --- Python/hamt.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'Python') diff --git a/Python/hamt.c b/Python/hamt.c index 8801c5ea41..e272e8808f 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -1,5 +1,6 @@ #include "Python.h" +#include "pycore_bitutils.h" // _Py_popcount32 #include "pycore_hamt.h" #include "pycore_object.h" // _PyObject_GC_TRACK() #include // offsetof() @@ -433,30 +434,10 @@ hamt_bitpos(int32_t hash, uint32_t shift) return (uint32_t)1 << hamt_mask(hash, shift); } -static inline uint32_t -hamt_bitcount(uint32_t i) -{ - /* We could use native popcount instruction but that would - require to either add configure flags to enable SSE4.2 - support or to detect it dynamically. Otherwise, we have - a risk of CPython not working properly on older hardware. - - In practice, there's no observable difference in - performance between using a popcount instruction or the - following fallback code. - - The algorithm is copied from: - https://graphics.stanford.edu/~seander/bithacks.html - */ - i = i - ((i >> 1) & 0x55555555); - i = (i & 0x33333333) + ((i >> 2) & 0x33333333); - return (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; -} - static inline uint32_t hamt_bitindex(uint32_t bitmap, uint32_t bit) { - return hamt_bitcount(bitmap & (bit - 1)); + return (uint32_t)_Py_popcount32(bitmap & (bit - 1)); } @@ -820,7 +801,7 @@ hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self, else { /* There was no key before with the same (shift,hash). */ - uint32_t n = hamt_bitcount(self->b_bitmap); + uint32_t n = (uint32_t)_Py_popcount32(self->b_bitmap); if (n >= 16) { /* When we have a situation where we want to store more -- cgit v1.2.1