diff options
-rw-r--r-- | benchmarks/benchmarks/bench_core.py | 6 | ||||
-rw-r--r-- | numpy/core/multiarray.py | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/compiled_base.c | 101 |
3 files changed, 89 insertions, 22 deletions
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py index 194ce3218..9e409dd91 100644 --- a/benchmarks/benchmarks/bench_core.py +++ b/benchmarks/benchmarks/bench_core.py @@ -162,12 +162,18 @@ class UnpackBits(Benchmark): def time_unpackbits(self): np.unpackbits(self.d) + def time_unpackbits_little(self): + np.unpackbits(self.d, bitorder="little") + def time_unpackbits_axis0(self): np.unpackbits(self.d2, axis=0) def time_unpackbits_axis1(self): np.unpackbits(self.d2, axis=1) + def time_unpackbits_axis1_little(self): + np.unpackbits(self.d2, bitorder="little", axis=1) + class Indices(Benchmark): def time_indices(self): diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 78fec1aab..8006dd9b5 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -1116,7 +1116,7 @@ def putmask(a, mask, values): @array_function_from_c_func_and_dispatcher(_multiarray_umath.packbits) def packbits(a, axis=None, bitorder='big'): """ - packbits(a, axis=None) + packbits(a, axis=None, bitorder='big') Packs the elements of a binary-valued array into bits in a uint8 array. @@ -1174,7 +1174,7 @@ def packbits(a, axis=None, bitorder='big'): @array_function_from_c_func_and_dispatcher(_multiarray_umath.unpackbits) def unpackbits(a, axis=None, count=None, bitorder='big'): """ - unpackbits(a, axis=None, count=None) + unpackbits(a, axis=None, count=None, bitorder='big') Unpacks elements of a uint8 array into a binary-valued output array. diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c index 25dc6951c..dc79bfa09 100644 --- a/numpy/core/src/multiarray/compiled_base.c +++ b/numpy/core/src/multiarray/compiled_base.c @@ -1703,6 +1703,15 @@ fail: static PyObject * unpack_bits(PyObject *input, int axis, PyObject *count_obj, char order) { + static int unpack_init = 0; + /* + * lookuptable for bitorder big as it has been around longer + * bitorder little is handled via byteswapping in the loop + */ + static union { + npy_uint8 bytes[8]; + npy_uint64 uint64; + } unpack_lookup_big[256]; PyArrayObject *inp; PyArrayObject *new = NULL; PyArrayObject *out = NULL; @@ -1788,6 +1797,22 @@ unpack_bits(PyObject *input, int axis, PyObject *count_obj, char order) goto fail; } + /* + * setup lookup table under GIL, 256 8 byte blocks representing 8 bits + * expanded to 1/0 bytes + */ + if (unpack_init == 0) { + npy_intp j; + for (j=0; j < 256; j++) { + npy_intp k; + for (k=0; k < 8; k++) { + npy_uint8 v = (j & (1 << k)) == (1 << k); + unpack_lookup_big[j].bytes[7 - k] = v; + } + } + unpack_init = 1; + } + count = PyArray_DIM(new, axis) * 8; if (outdims[axis] > count) { in_n = count / 8; @@ -1810,39 +1835,75 @@ unpack_bits(PyObject *input, int axis, PyObject *count_obj, char order) unsigned const char *inptr = PyArray_ITER_DATA(it); char *outptr = PyArray_ITER_DATA(ot); - if (order == 'b') { - for (index = 0; index < in_n; index++) { - for (i = 0; i < 8; i++) { - *outptr = ((*inptr & (128 >> i)) != 0); - outptr += out_stride; + if (out_stride == 1) { + /* for unity stride we can just copy out of the lookup table */ + if (order == 'b') { + for (index = 0; index < in_n; index++) { + npy_uint64 v = unpack_lookup_big[*inptr].uint64; + memcpy(outptr, &v, 8); + outptr += 8; + inptr += in_stride; + } + } + else { + for (index = 0; index < in_n; index++) { + npy_uint64 v = unpack_lookup_big[*inptr].uint64; + if (order != 'b') { + v = npy_bswap8(v); + } + memcpy(outptr, &v, 8); + outptr += 8; + inptr += in_stride; } - inptr += in_stride; } /* Clean up the tail portion */ - for (i = 0; i < in_tail; i++) { - *outptr = ((*inptr & (128 >> i)) != 0); - outptr += out_stride; + if (in_tail) { + npy_uint64 v = unpack_lookup_big[*inptr].uint64; + if (order != 'b') { + v = npy_bswap8(v); + } + memcpy(outptr, &v, in_tail); + } + /* Add padding */ + else if (out_pad) { + memset(outptr, 0, out_pad); } } else { - for (index = 0; index < in_n; index++) { - for (i = 0; i < 8; i++) { + if (order == 'b') { + for (index = 0; index < in_n; index++) { + for (i = 0; i < 8; i++) { + *outptr = ((*inptr & (128 >> i)) != 0); + outptr += out_stride; + } + inptr += in_stride; + } + /* Clean up the tail portion */ + for (i = 0; i < in_tail; i++) { + *outptr = ((*inptr & (128 >> i)) != 0); + outptr += out_stride; + } + } + else { + for (index = 0; index < in_n; index++) { + for (i = 0; i < 8; i++) { + *outptr = ((*inptr & (1 << i)) != 0); + outptr += out_stride; + } + inptr += in_stride; + } + /* Clean up the tail portion */ + for (i = 0; i < in_tail; i++) { *outptr = ((*inptr & (1 << i)) != 0); outptr += out_stride; } - inptr += in_stride; } - /* Clean up the tail portion */ - for (i = 0; i < in_tail; i++) { - *outptr = ((*inptr & (1 << i)) != 0); + /* Add padding */ + for (index = 0; index < out_pad; index++) { + *outptr = 0; outptr += out_stride; } } - /* Add padding */ - for (index = 0; index < out_pad; index++) { - *outptr = 0; - outptr += out_stride; - } PyArray_ITER_NEXT(it); PyArray_ITER_NEXT(ot); |