summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQiyu8 <fangchunlin@huawei.com>2020-12-16 16:23:54 +0800
committerQiyu8 <fangchunlin@huawei.com>2020-12-16 16:23:54 +0800
commitb156231e40e280658dd1d6582d1d9734e0f56b09 (patch)
treef7e9be11e4971c14c131a236c556d3b33942443a
parentb5c5ad8e542413d1273c5a27fe14f439cabe869c (diff)
downloadnumpy-b156231e40e280658dd1d6582d1d9734e0f56b09.tar.gz
add benchmark test case for little order.
-rw-r--r--benchmarks/benchmarks/bench_core.py3
-rw-r--r--numpy/core/src/multiarray/compiled_base.c3
2 files changed, 5 insertions, 1 deletions
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
index 0c2a18c15..1c028542d 100644
--- a/benchmarks/benchmarks/bench_core.py
+++ b/benchmarks/benchmarks/bench_core.py
@@ -165,6 +165,9 @@ class PackBits(Benchmark):
def time_packbits(self, dtype):
np.packbits(self.d)
+ def time_packbits_little(self, dtype):
+ np.packbits(self.d, bitorder="little")
+
def time_packbits_axis0(self, dtype):
np.packbits(self.d2, axis=0)
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index f09a1de32..6ae4dda6b 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1502,6 +1502,7 @@ pack_inner(const char *inptr,
npy_intp vn_out = n_out - (remain ? 1 : 0);
const int vstep = npyv_nlanes_u64;
const int vstepx4 = vstep * 4;
+ const int isAligned = npy_is_aligned(outptr, sizeof(npy_uint64));
vn_out -= (vn_out & (vstep - 1));
for (; index <= vn_out - vstepx4; index += vstepx4, inptr += npyv_nlanes_u8 * 4) {
npyv_u8 v0 = npyv_load_u8((const npy_uint8*)inptr);
@@ -1520,7 +1521,7 @@ pack_inner(const char *inptr,
bb[2] = npyv_tobits_b8(npyv_cmpneq_u8(v2, v_zero));
bb[3] = npyv_tobits_b8(npyv_cmpneq_u8(v3, v_zero));
if(out_stride == 1 &&
- (!NPY_STRONG_ALIGNMENT || npy_is_aligned(outptr, sizeof(npy_uint64)))) {
+ (!NPY_STRONG_ALIGNMENT || isAligned)) {
npy_uint64 *ptr64 = (npy_uint64*)outptr;
#if NPY_SIMD_WIDTH == 16
npy_uint64 bcomp = bb[0] | (bb[1] << 16) | (bb[2] << 32) | (bb[3] << 48);