diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-10-16 19:49:47 +0200 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-10-17 01:01:59 +0200 |
commit | 7d4ea165817fc613c79bb92ccb3844df94d1beed (patch) | |
tree | 708b52ceaefb48fc41343cf0d81ac8fdd9fab4f0 /numpy/core/setup_common.py | |
parent | 935017cbc3a336f66a110b53d2b595fffc8adbd7 (diff) | |
download | numpy-7d4ea165817fc613c79bb92ccb3844df94d1beed.tar.gz |
ENH: improve npy_memchr(p, 0) by using __builtin_ctz (tzcnt on x86)
Improves sparse mask performance by about a factor of three, the worst
case of no consecutive mask elements slows down by about 10%-15%.
Diffstat (limited to 'numpy/core/setup_common.py')
-rw-r--r-- | numpy/core/setup_common.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index bad3607fa..4633aef84 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -116,6 +116,7 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_bswap32", '5u'), ("__builtin_bswap64", '5u'), ("__builtin_expect", '5, 0'), + ("__builtin_ctz", '5'), ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 ] |