Merge pull request #10855 from madphysicist/unpack-count

ENH: Adding a count parameter to np.unpackbits
author: Sebastian Berg <sebastian@sipsolutions.net> 2019-02-25 20:48:35 +0100
committer: GitHub <noreply@github.com> 2019-02-25 20:48:35 +0100
commit: 269d9855216e7c66708b1e2c6f5da7e5f39c70c1 (patch)
tree: 6172cc59784bce2be5c67b1d98d87f7adeda0c22 /numpy
parent: 345b2f6e048061359e0fa0322e6b7b986cfd6291 (diff)
parent: df00dbf6e5d2c4957da9f3e0c171bda292009c2f (diff)
download: numpy-269d9855216e7c66708b1e2c6f5da7e5f39c70c1.tar.gz
4 files changed, 177 insertions, 125 deletions
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 9cc8f562a..2f5a48ed8 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -4526,97 +4526,6 @@ add_newdoc('numpy.core.umath', '_add_newdoc_ufunc',
     and then throwing away the ufunc.
     """)
 
-add_newdoc('numpy.core.multiarray', 'packbits',
-    """
-    packbits(myarray, axis=None)
-
-    Packs the elements of a binary-valued array into bits in a uint8 array.
-
-    The result is padded to full bytes by inserting zero bits at the end.
-
-    Parameters
-    ----------
-    myarray : array_like
-        An array of integers or booleans whose elements should be packed to
-        bits.
-    axis : int, optional
-        The dimension over which bit-packing is done.
-        ``None`` implies packing the flattened array.
-
-    Returns
-    -------
-    packed : ndarray
-        Array of type uint8 whose elements represent bits corresponding to the
-        logical (0 or nonzero) value of the input elements. The shape of
-        `packed` has the same number of dimensions as the input (unless `axis`
-        is None, in which case the output is 1-D).
-
-    See Also
-    --------
-    unpackbits: Unpacks elements of a uint8 array into a binary-valued output
-                array.
-
-    Examples
-    --------
-    >>> a = np.array([[[1,0,1],
-    ...                [0,1,0]],
-    ...               [[1,1,0],
-    ...                [0,0,1]]])
-    >>> b = np.packbits(a, axis=-1)
-    >>> b
-    array([[[160],
-            [ 64]],
-           [[192],
-            [ 32]]], dtype=uint8)
-
-    Note that in binary 160 = 1010 0000, 64 = 0100 0000, 192 = 1100 0000,
-    and 32 = 0010 0000.
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'unpackbits',
-    """
-    unpackbits(myarray, axis=None)
-
-    Unpacks elements of a uint8 array into a binary-valued output array.
-
-    Each element of `myarray` represents a bit-field that should be unpacked
-    into a binary-valued output array. The shape of the output array is either
-    1-D (if `axis` is None) or the same shape as the input array with unpacking
-    done along the axis specified.
-
-    Parameters
-    ----------
-    myarray : ndarray, uint8 type
-       Input array.
-    axis : int, optional
-        The dimension over which bit-unpacking is done.
-        ``None`` implies unpacking the flattened array.
-
-    Returns
-    -------
-    unpacked : ndarray, uint8 type
-       The elements are binary-valued (0 or 1).
-
-    See Also
-    --------
-    packbits : Packs the elements of a binary-valued array into bits in a uint8
-               array.
-
-    Examples
-    --------
-    >>> a = np.array([[2], [7], [23]], dtype=np.uint8)
-    >>> a
-    array([[ 2],
-           [ 7],
-           [23]], dtype=uint8)
-    >>> b = np.unpackbits(a, axis=1)
-    >>> b
-    array([[0, 0, 0, 0, 0, 0, 1, 0],
-           [0, 0, 0, 0, 0, 1, 1, 1],
-           [0, 0, 0, 1, 0, 1, 1, 1]], dtype=uint8)
-
-    """)
 
 add_newdoc('numpy.core._multiarray_tests', 'format_float_OSprintf_g',
     """
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index f2d7a9487..54b3a3e5e 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -1107,9 +1107,9 @@ def putmask(a, mask, values):
 
 
 @array_function_from_c_func_and_dispatcher(_multiarray_umath.packbits)
-def packbits(myarray, axis=None):
+def packbits(a, axis=None):
     """
-    packbits(myarray, axis=None)
+    packbits(a, axis=None)
 
     Packs the elements of a binary-valued array into bits in a uint8 array.
 
@@ -1117,7 +1117,7 @@ def packbits(myarray, axis=None):
 
     Parameters
     ----------
-    myarray : array_like
+    a : array_like
         An array of integers or booleans whose elements should be packed to
         bits.
     axis : int, optional
@@ -1154,28 +1154,39 @@ def packbits(myarray, axis=None):
     and 32 = 0010 0000.
 
     """
-    return (myarray,)
+    return (a,)
 
 
 @array_function_from_c_func_and_dispatcher(_multiarray_umath.unpackbits)
-def unpackbits(myarray, axis=None):
+def unpackbits(a, axis=None, count=None):
     """
-    unpackbits(myarray, axis=None)
+    unpackbits(a, axis=None, count=None)
 
     Unpacks elements of a uint8 array into a binary-valued output array.
 
-    Each element of `myarray` represents a bit-field that should be unpacked
-    into a binary-valued output array. The shape of the output array is either
-    1-D (if `axis` is None) or the same shape as the input array with unpacking
-    done along the axis specified.
+    Each element of `a` represents a bit-field that should be unpacked
+    into a binary-valued output array. The shape of the output array is
+    either 1-D (if `axis` is ``None``) or the same shape as the input
+    array with unpacking done along the axis specified.
 
     Parameters
     ----------
-    myarray : ndarray, uint8 type
+    a : ndarray, uint8 type
        Input array.
     axis : int, optional
         The dimension over which bit-unpacking is done.
         ``None`` implies unpacking the flattened array.
+    count : int or None, optional
+        The number of elements to unpack along `axis`, provided as a way
+        of undoing the effect of packing a size that is not a multiple
+        of eight. A non-negative number means to only unpack `count`
+        bits. A negative number means to trim off that many bits from
+        the end. ``None`` means to unpack the entire array (the
+        default). Counts larger than the available number of bits will
+        add zero padding to the output. Negative counts must not
+        exceed the available number of bits.
+
+        .. versionadded:: 1.17.0
 
     Returns
     -------
@@ -1184,8 +1195,8 @@ def unpackbits(myarray, axis=None):
 
     See Also
     --------
-    packbits : Packs the elements of a binary-valued array into bits in a uint8
-               array.
+    packbits : Packs the elements of a binary-valued array into bits in
+               a uint8 array.
 
     Examples
     --------
@@ -1199,9 +1210,27 @@ def unpackbits(myarray, axis=None):
     array([[0, 0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 1, 1, 1],
            [0, 0, 0, 1, 0, 1, 1, 1]], dtype=uint8)
+    >>> c = np.unpackbits(a, axis=1, count=-3)
+    >>> c
+    array([[0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0],
+           [0, 0, 0, 1, 0]], dtype=uint8)
+
+    >>> p = np.packbits(b, axis=0)
+    >>> np.unpackbits(p, axis=0)
+    array([[0, 0, 0, 0, 0, 0, 1, 0],
+           [0, 0, 0, 0, 0, 1, 1, 1],
+           [0, 0, 0, 1, 0, 1, 1, 1],
+           [0, 0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
+    >>> np.array_equal(b, np.unpackbits(p, axis=0, count=b.shape[0]))
+    True
 
     """
-    return (myarray,)
+    return (a,)
 
 
 @array_function_from_c_func_and_dispatcher(_multiarray_umath.shares_memory)
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 0ffd8d2bf..2da274658 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1691,7 +1691,7 @@ fail:
 }
 
 static PyObject *
-unpack_bits(PyObject *input, int axis)
+unpack_bits(PyObject *input, int axis, PyObject *count_obj)
 {
     static int unpack_init = 0;
     static char unpack_lookup[256][8];
@@ -1701,7 +1701,7 @@ unpack_bits(PyObject *input, int axis)
     npy_intp outdims[NPY_MAXDIMS];
     int i;
     PyArrayIterObject *it, *ot;
-    npy_intp n_in, in_stride, out_stride;
+    npy_intp count, in_n, in_tail, out_pad, in_stride, out_stride;
     NPY_BEGIN_THREADS_DEF;
 
     inp = (PyArrayObject *)PyArray_FROM_O(input);
@@ -1730,20 +1730,37 @@ unpack_bits(PyObject *input, int axis)
 
         newdim.ptr = &shape;
         temp = (PyArrayObject *)PyArray_Newshape(new, &newdim, NPY_CORDER);
+        Py_DECREF(new);
         if (temp == NULL) {
-            goto fail;
+            return NULL;
         }
-        Py_DECREF(new);
         new = temp;
     }
 
     /* Setup output shape */
-    for (i=0; i<PyArray_NDIM(new); i++) {
+    for (i = 0; i < PyArray_NDIM(new); i++) {
         outdims[i] = PyArray_DIM(new, i);
     }
 
     /* Multiply axis dimension by 8 */
-    outdims[axis] <<= 3;
+    outdims[axis] *= 8;
+    if (count_obj != Py_None) {
+        count = PyArray_PyIntAsIntp(count_obj);
+        if (error_converting(count)) {
+            goto fail;
+        }
+        if (count < 0) {
+            outdims[axis] += count;
+            if (outdims[axis] < 0) {
+                PyErr_Format(PyExc_ValueError,
+                             "-count larger than number of elements");
+                goto fail;
+            }
+        }
+        else {
+            outdims[axis] = count;
+        }
+    }
 
     /* Create output array */
     out = (PyArrayObject *)PyArray_NewFromDescr(
@@ -1753,6 +1770,7 @@ unpack_bits(PyObject *input, int axis)
     if (out == NULL) {
         goto fail;
     }
+
     /* Setup iterators to iterate over all but given axis */
     it = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)new, &axis);
     ot = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)out, &axis);
@@ -1784,12 +1802,23 @@ unpack_bits(PyObject *input, int axis)
         unpack_init = 1;
     }
 
-    NPY_BEGIN_THREADS_THRESHOLDED(PyArray_DIM(new, axis));
+    count = PyArray_DIM(new, axis) * 8;
+    if (outdims[axis] > count) {
+        in_n = count / 8;
+        in_tail = 0;
+        out_pad = outdims[axis] - count;
+    }
+    else {
+        in_n = outdims[axis] / 8;
+        in_tail = outdims[axis] % 8;
+        out_pad = 0;
+    }
 
-    n_in = PyArray_DIM(new, axis);
     in_stride = PyArray_STRIDE(new, axis);
     out_stride = PyArray_STRIDE(out, axis);
 
+    NPY_BEGIN_THREADS_THRESHOLDED(PyArray_Size((PyObject *)out) / 8);
+
     while (PyArray_ITER_NOTDONE(it)) {
         npy_intp index;
         unsigned const char *inptr = PyArray_ITER_DATA(it);
@@ -1797,23 +1826,42 @@ unpack_bits(PyObject *input, int axis)
 
         if (out_stride == 1) {
             /* for unity stride we can just copy out of the lookup table */
-            for (index = 0; index < n_in; index++) {
+            for (index = 0; index < in_n; index++) {
                 memcpy(outptr, unpack_lookup[*inptr], 8);
                 outptr += 8;
                 inptr += in_stride;
             }
+            /* Clean up the tail portion */
+            if (in_tail) {
+                memcpy(outptr, unpack_lookup[*inptr], in_tail);
+            }
+            /* Add padding */
+            else if (out_pad) {
+                memset(outptr, 0, out_pad);
+            }
         }
         else {
-            for (index = 0; index < n_in; index++) {
-                unsigned char mask = 128;
+            unsigned char mask;
 
-                for (i = 0; i < 8; i++) {
+            for (index = 0; index < in_n; index++) {
+                for (mask = 128; mask; mask >>= 1) {
                     *outptr = ((mask & (*inptr)) != 0);
                     outptr += out_stride;
-                    mask >>= 1;
                 }
                 inptr += in_stride;
             }
+            /* Clean up the tail portion */
+            mask = 128;
+            for (i = 0; i < in_tail; i++) {
+                *outptr = ((mask & (*inptr)) != 0);
+                outptr += out_stride;
+                mask >>= 1;
+            }
+            /* Add padding */
+            for (index = 0; index < out_pad; index++) {
+                *outptr = 0;
+                outptr += out_stride;
+            }
         }
         PyArray_ITER_NEXT(it);
         PyArray_ITER_NEXT(ot);
@@ -1838,25 +1886,28 @@ io_pack(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
 {
     PyObject *obj;
     int axis = NPY_MAXDIMS;
-    static char *kwlist[] = {"in", "axis", NULL};
+    static char *kwlist[] = {"a", "axis", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&:pack" , kwlist,
-                &obj, PyArray_AxisConverter, &axis)) {
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&:pack" , kwlist,
+                                     &obj, PyArray_AxisConverter, &axis)) {
         return NULL;
     }
     return pack_bits(obj, axis);
 }
 
+
 NPY_NO_EXPORT PyObject *
 io_unpack(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
 {
     PyObject *obj;
     int axis = NPY_MAXDIMS;
-    static char *kwlist[] = {"in", "axis", NULL};
+    PyObject *count = Py_None;
+    static char *kwlist[] = {"a", "axis", "count", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&:unpack" , kwlist,
-                &obj, PyArray_AxisConverter, &axis)) {
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&O:unpack" , kwlist,
+                                     &obj, PyArray_AxisConverter, &axis,
+                                     &count)) {
         return NULL;
     }
-    return unpack_bits(obj, axis);
+    return unpack_bits(obj, axis, count);
 }
diff --git a/numpy/lib/tests/test_packbits.py b/numpy/lib/tests/test_packbits.py
index fde5c37f2..00d5ca827 100644
--- a/numpy/lib/tests/test_packbits.py
+++ b/numpy/lib/tests/test_packbits.py
@@ -266,3 +266,66 @@ def test_unpackbits_large():
     assert_array_equal(np.packbits(np.unpackbits(d, axis=1), axis=1), d)
     d = d.T.copy()
     assert_array_equal(np.packbits(np.unpackbits(d, axis=0), axis=0), d)
+
+
+def test_unpackbits_count():
+    # test complete invertibility of packbits and unpackbits with count
+    x = np.array([
+        [1, 0, 1, 0, 0, 1, 0],
+        [0, 1, 1, 1, 0, 0, 0],
+        [0, 0, 1, 0, 0, 1, 1],
+        [1, 1, 0, 0, 0, 1, 1],
+        [1, 0, 1, 0, 1, 0, 1],
+        [0, 0, 1, 1, 1, 0, 0],
+        [0, 1, 0, 1, 0, 1, 0],
+    ], dtype=np.uint8)
+
+    padded1 = np.zeros(57, dtype=np.uint8)
+    padded1[:49] = x.ravel()
+
+    packed = np.packbits(x)
+    for count in range(58):
+        unpacked = np.unpackbits(packed, count=count)
+        assert_equal(unpacked.dtype, np.uint8)
+        assert_array_equal(unpacked, padded1[:count])
+    for count in range(-1, -57, -1):
+        unpacked = np.unpackbits(packed, count=count)
+        assert_equal(unpacked.dtype, np.uint8)
+        # count -1 because padded1 has 57 instead of 56 elements
+        assert_array_equal(unpacked, padded1[:count-1])
+    for kwargs in [{}, {'count': None}]:
+        unpacked = np.unpackbits(packed, **kwargs)
+        assert_equal(unpacked.dtype, np.uint8)
+        assert_array_equal(unpacked, padded1[:-1])
+    assert_raises(ValueError, np.unpackbits, packed, count=-57)
+
+    padded2 = np.zeros((9, 9), dtype=np.uint8)
+    padded2[:7, :7] = x
+
+    packed0 = np.packbits(x, axis=0)
+    packed1 = np.packbits(x, axis=1)
+    for count in range(10):
+        unpacked0 = np.unpackbits(packed0, axis=0, count=count)
+        assert_equal(unpacked0.dtype, np.uint8)
+        assert_array_equal(unpacked0, padded2[:count, :x.shape[1]])
+        unpacked1 = np.unpackbits(packed1, axis=1, count=count)
+        assert_equal(unpacked1.dtype, np.uint8)
+        assert_array_equal(unpacked1, padded2[:x.shape[1], :count])
+    for count in range(-1, -9, -1):
+        unpacked0 = np.unpackbits(packed0, axis=0, count=count)
+        assert_equal(unpacked0.dtype, np.uint8)
+        # count -1 because one extra zero of padding
+        assert_array_equal(unpacked0, padded2[:count-1, :x.shape[1]])
+        unpacked1 = np.unpackbits(packed1, axis=1, count=count)
+        assert_equal(unpacked1.dtype, np.uint8)
+        assert_array_equal(unpacked1, padded2[:x.shape[0], :count-1])
+    for kwargs in [{}, {'count': None}]:
+        unpacked0 = np.unpackbits(packed0, axis=0, **kwargs)
+        assert_equal(unpacked0.dtype, np.uint8)
+        assert_array_equal(unpacked0, padded2[:-1, :x.shape[1]])
+        unpacked1 = np.unpackbits(packed1, axis=1, **kwargs)
+        assert_equal(unpacked1.dtype, np.uint8)
+        assert_array_equal(unpacked1, padded2[:x.shape[0], :-1])
+    assert_raises(ValueError, np.unpackbits, packed0, axis=0, count=-9)
+    assert_raises(ValueError, np.unpackbits, packed1, axis=1, count=-9)
+
author	Sebastian Berg <sebastian@sipsolutions.net>	2019-02-25 20:48:35 +0100
committer	GitHub <noreply@github.com>	2019-02-25 20:48:35 +0100
commit	269d9855216e7c66708b1e2c6f5da7e5f39c70c1 (patch)
tree	6172cc59784bce2be5c67b1d98d87f7adeda0c22 /numpy
parent	345b2f6e048061359e0fa0322e6b7b986cfd6291 (diff)
parent	df00dbf6e5d2c4957da9f3e0c171bda292009c2f (diff)
download	numpy-269d9855216e7c66708b1e2c6f5da7e5f39c70c1.tar.gz