summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2019-05-21 21:56:16 +0300
committerGitHub <noreply@github.com>2019-05-21 21:56:16 +0300
commit8eb30b49fbd9d7a0fc270c27ecf86eb70d64ac62 (patch)
tree44e0f6a3ad3eda188d43984e5ade67048805ace5
parent504b287bdf4745256044f336f17c88ddcb0175dd (diff)
parent82eab1c0ad35c5e91ec50e74609763fa7367e4d7 (diff)
downloadnumpy-8eb30b49fbd9d7a0fc270c27ecf86eb70d64ac62.tar.gz
Merge pull request #13541 from juliantaylor/restore-unpack-performance
ENH: restore unpack bit lookup table
-rw-r--r--benchmarks/benchmarks/bench_core.py6
-rw-r--r--numpy/core/multiarray.py4
-rw-r--r--numpy/core/src/multiarray/compiled_base.c101
3 files changed, 89 insertions, 22 deletions
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
index 194ce3218..9e409dd91 100644
--- a/benchmarks/benchmarks/bench_core.py
+++ b/benchmarks/benchmarks/bench_core.py
@@ -162,12 +162,18 @@ class UnpackBits(Benchmark):
def time_unpackbits(self):
np.unpackbits(self.d)
+ def time_unpackbits_little(self):
+ np.unpackbits(self.d, bitorder="little")
+
def time_unpackbits_axis0(self):
np.unpackbits(self.d2, axis=0)
def time_unpackbits_axis1(self):
np.unpackbits(self.d2, axis=1)
+ def time_unpackbits_axis1_little(self):
+ np.unpackbits(self.d2, bitorder="little", axis=1)
+
class Indices(Benchmark):
def time_indices(self):
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 78fec1aab..8006dd9b5 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -1116,7 +1116,7 @@ def putmask(a, mask, values):
@array_function_from_c_func_and_dispatcher(_multiarray_umath.packbits)
def packbits(a, axis=None, bitorder='big'):
"""
- packbits(a, axis=None)
+ packbits(a, axis=None, bitorder='big')
Packs the elements of a binary-valued array into bits in a uint8 array.
@@ -1174,7 +1174,7 @@ def packbits(a, axis=None, bitorder='big'):
@array_function_from_c_func_and_dispatcher(_multiarray_umath.unpackbits)
def unpackbits(a, axis=None, count=None, bitorder='big'):
"""
- unpackbits(a, axis=None, count=None)
+ unpackbits(a, axis=None, count=None, bitorder='big')
Unpacks elements of a uint8 array into a binary-valued output array.
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 25dc6951c..dc79bfa09 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1703,6 +1703,15 @@ fail:
static PyObject *
unpack_bits(PyObject *input, int axis, PyObject *count_obj, char order)
{
+ static int unpack_init = 0;
+ /*
+ * lookuptable for bitorder big as it has been around longer
+ * bitorder little is handled via byteswapping in the loop
+ */
+ static union {
+ npy_uint8 bytes[8];
+ npy_uint64 uint64;
+ } unpack_lookup_big[256];
PyArrayObject *inp;
PyArrayObject *new = NULL;
PyArrayObject *out = NULL;
@@ -1788,6 +1797,22 @@ unpack_bits(PyObject *input, int axis, PyObject *count_obj, char order)
goto fail;
}
+ /*
+ * setup lookup table under GIL, 256 8 byte blocks representing 8 bits
+ * expanded to 1/0 bytes
+ */
+ if (unpack_init == 0) {
+ npy_intp j;
+ for (j=0; j < 256; j++) {
+ npy_intp k;
+ for (k=0; k < 8; k++) {
+ npy_uint8 v = (j & (1 << k)) == (1 << k);
+ unpack_lookup_big[j].bytes[7 - k] = v;
+ }
+ }
+ unpack_init = 1;
+ }
+
count = PyArray_DIM(new, axis) * 8;
if (outdims[axis] > count) {
in_n = count / 8;
@@ -1810,39 +1835,75 @@ unpack_bits(PyObject *input, int axis, PyObject *count_obj, char order)
unsigned const char *inptr = PyArray_ITER_DATA(it);
char *outptr = PyArray_ITER_DATA(ot);
- if (order == 'b') {
- for (index = 0; index < in_n; index++) {
- for (i = 0; i < 8; i++) {
- *outptr = ((*inptr & (128 >> i)) != 0);
- outptr += out_stride;
+ if (out_stride == 1) {
+ /* for unity stride we can just copy out of the lookup table */
+ if (order == 'b') {
+ for (index = 0; index < in_n; index++) {
+ npy_uint64 v = unpack_lookup_big[*inptr].uint64;
+ memcpy(outptr, &v, 8);
+ outptr += 8;
+ inptr += in_stride;
+ }
+ }
+ else {
+ for (index = 0; index < in_n; index++) {
+ npy_uint64 v = unpack_lookup_big[*inptr].uint64;
+ if (order != 'b') {
+ v = npy_bswap8(v);
+ }
+ memcpy(outptr, &v, 8);
+ outptr += 8;
+ inptr += in_stride;
}
- inptr += in_stride;
}
/* Clean up the tail portion */
- for (i = 0; i < in_tail; i++) {
- *outptr = ((*inptr & (128 >> i)) != 0);
- outptr += out_stride;
+ if (in_tail) {
+ npy_uint64 v = unpack_lookup_big[*inptr].uint64;
+ if (order != 'b') {
+ v = npy_bswap8(v);
+ }
+ memcpy(outptr, &v, in_tail);
+ }
+ /* Add padding */
+ else if (out_pad) {
+ memset(outptr, 0, out_pad);
}
}
else {
- for (index = 0; index < in_n; index++) {
- for (i = 0; i < 8; i++) {
+ if (order == 'b') {
+ for (index = 0; index < in_n; index++) {
+ for (i = 0; i < 8; i++) {
+ *outptr = ((*inptr & (128 >> i)) != 0);
+ outptr += out_stride;
+ }
+ inptr += in_stride;
+ }
+ /* Clean up the tail portion */
+ for (i = 0; i < in_tail; i++) {
+ *outptr = ((*inptr & (128 >> i)) != 0);
+ outptr += out_stride;
+ }
+ }
+ else {
+ for (index = 0; index < in_n; index++) {
+ for (i = 0; i < 8; i++) {
+ *outptr = ((*inptr & (1 << i)) != 0);
+ outptr += out_stride;
+ }
+ inptr += in_stride;
+ }
+ /* Clean up the tail portion */
+ for (i = 0; i < in_tail; i++) {
*outptr = ((*inptr & (1 << i)) != 0);
outptr += out_stride;
}
- inptr += in_stride;
}
- /* Clean up the tail portion */
- for (i = 0; i < in_tail; i++) {
- *outptr = ((*inptr & (1 << i)) != 0);
+ /* Add padding */
+ for (index = 0; index < out_pad; index++) {
+ *outptr = 0;
outptr += out_stride;
}
}
- /* Add padding */
- for (index = 0; index < out_pad; index++) {
- *outptr = 0;
- outptr += out_stride;
- }
PyArray_ITER_NEXT(it);
PyArray_ITER_NEXT(ot);