diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-16 01:48:42 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-16 01:48:42 -0800 |
commit | 165d98d9a574aaba367f20b4fcd8dec7fda0bafb (patch) | |
tree | e95edbebc6ba31a11481974c5d50ade444338a9d /numpy | |
parent | 1604e898911755ee29f829ee2ae1ffcc80c3dade (diff) | |
download | numpy-165d98d9a574aaba367f20b4fcd8dec7fda0bafb.tar.gz |
ENH: core: Switch nonzero to use the iterator as an example, add count_nonzero function
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 29 | ||||
-rw-r--r-- | numpy/core/code_generators/numpy_api.py | 1 | ||||
-rw-r--r-- | numpy/core/fromnumeric.py | 2 | ||||
-rw-r--r-- | numpy/core/numeric.py | 5 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 254 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 26 | ||||
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.c.src | 8 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 43 | ||||
-rw-r--r-- | numpy/ma/core.py | 2 |
9 files changed, 316 insertions, 54 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index baf5285f0..2faf3ba16 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -481,6 +481,35 @@ add_newdoc('numpy.core.multiarray', 'zeros', """) +add_newdoc('numpy.core.multiarray', 'count_nonzero', + """ + count_nonzero(a) + + Counts the number of non-zero values in the array ``a``. + + Parameters + ---------- + a : array_like + The array for which to count non-zeros. + + Returns + ------- + count : int + Number of non-zero values in the array. + + See Also + -------- + nonzero : Return the coordinates of all the non-zero values. + + Examples + -------- + >>> np.count_nonzero(np.eye(4)) + 4 + + >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]]) + 5 + """) + add_newdoc('numpy.core.multiarray','set_typeDict', """set_typeDict(dict) diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index 907167862..74ce72ab8 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -293,6 +293,7 @@ multiarray_funcs_api = { 'NpyIter_DebugPrint': 259, 'NpyIter_IterationNeedsAPI': 260, 'PyArray_CastingConverter': 261, + 'PyArray_CountNonzero': 262, } ufunc_types_api = { diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index e97e50c87..76b74e223 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -1136,6 +1136,8 @@ def nonzero(a): array. ndarray.nonzero : Equivalent ndarray method. + count_nonzero : + Counts the number of non-zero elements in the input array. Examples -------- diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index 62fbd4186..a65c62a08 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -1,6 +1,6 @@ __all__ = ['newaxis', 'ndarray', 'flatiter', 'newiter', 'nested_iters', 'ufunc', - 'arange', 'array', 'zeros', 'empty', 'broadcast', 'dtype', - 'fromstring', 'fromfile', 'frombuffer', + 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast', + 'dtype', 'fromstring', 'fromfile', 'frombuffer', 'int_asbuffer', 'where', 'argwhere', 'concatenate', 'fastCopyAndTranspose', 'lexsort', 'set_numeric_ops', 'can_cast', @@ -197,6 +197,7 @@ newaxis = None arange = multiarray.arange array = multiarray.array zeros = multiarray.zeros +count_nonzero = multiarray.count_nonzero empty = multiarray.empty fromstring = multiarray.fromstring fromiter = multiarray.fromiter diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 660c6d9fd..96e19fe28 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1682,75 +1682,225 @@ PyArray_Compress(PyArrayObject *self, PyObject *condition, int axis, } /*NUMPY_API + * Counts the number of non-zero elements in the array + * + * Returns -1 on error. + */ +NPY_NO_EXPORT npy_intp +PyArray_CountNonzero(PyArrayObject *self) +{ + int ndim = PyArray_NDIM(self); + PyArray_NonzeroFunc *nonzero = self->descr->f->nonzero; + char *data; + npy_intp stride, count; + npy_intp nonzero_count = 0; + + NpyIter *iter; + NpyIter_IterNext_Fn iternext; + char **dataptr; + npy_intp *strideptr, *innersizeptr; + + /* If it's a trivial one-dimensional loop, don't use an iterator */ + if (ndim <= 1 || PyArray_CHKFLAGS(self, NPY_CONTIGUOUS) || + PyArray_CHKFLAGS(self, NPY_FORTRAN)) { + data = PyArray_BYTES(self); + stride = (ndim == 0) ? 0 : (PyArray_CHKFLAGS(self, NPY_FORTRAN) ? + PyArray_STRIDE(self, 0) : PyArray_STRIDE(self, ndim-1)); + count = PyArray_SIZE(self); + + while (count--) { + if (nonzero(data, self)) { + ++nonzero_count; + } + data += stride; + } + + return nonzero_count; + } + + /* + * If the array has size zero, return zero (the iterator rejects + * size zero arrays) + */ + if (PyArray_SIZE(self) == 0) { + return 0; + } + + /* Otherwise create and use an iterator to count the nonzeros */ + iter = NpyIter_New(self, NPY_ITER_READONLY| + NPY_ITER_NO_INNER_ITERATION| + NPY_ITER_REFS_OK, + NPY_KEEPORDER, NPY_NO_CASTING, + NULL, 0, NULL, 0); + if (iter == NULL) { + return -1; + } + + /* Get the pointers for inner loop iteration */ + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter); + return -1; + } + dataptr = NpyIter_GetDataPtrArray(iter); + strideptr = NpyIter_GetInnerStrideArray(iter); + innersizeptr = NpyIter_GetInnerLoopSizePtr(iter); + + /* Iterate over all the elements to count the nonzeros */ + do { + data = *dataptr; + stride = *strideptr; + count = *innersizeptr; + + while (count--) { + if (nonzero(data, self)) { + ++nonzero_count; + } + data += stride; + } + + data = *dataptr; + } while(iternext(iter)); + + NpyIter_Deallocate(iter); + + return nonzero_count; +} + +/*NUMPY_API * Nonzero + * + * TODO: In NumPy 2.0, should make the iteration order a parameter. */ NPY_NO_EXPORT PyObject * PyArray_Nonzero(PyArrayObject *self) { - int n = self->nd, j; - intp count = 0, i, size; - PyArrayIterObject *it = NULL; - PyObject *ret = NULL, *item; - intp *dptr[MAX_DIMS]; - - it = (PyArrayIterObject *)PyArray_IterNew((PyObject *)self); - if (it == NULL) { + int i, ndim = PyArray_NDIM(self); + PyArrayObject *ret = NULL; + PyObject *ret_tuple; + npy_intp ret_dims[2]; + PyArray_NonzeroFunc *nonzero = self->descr->f->nonzero; + char *data; + npy_intp stride, count; + npy_intp nonzero_count = PyArray_CountNonzero(self); + npy_intp *coords; + + NpyIter *iter; + NpyIter_IterNext_Fn iternext; + NpyIter_GetCoords_Fn getcoords; + char **dataptr; + npy_intp *innersizeptr; + + /* Allocate the result as a 2D array */ + ret_dims[0] = nonzero_count; + ret_dims[1] = (ndim == 0) ? 1 : ndim; + ret = (PyArrayObject *)PyArray_New(&PyArray_Type, 2, ret_dims, + NPY_INTP, NULL, NULL, 0, 0, + NULL); + if (ret == NULL) { return NULL; } - /* One pass through 'self', counting the non-zero elements */ - size = it->size; - for (i = 0; i < size; i++) { - if (self->descr->f->nonzero(it->dataptr, self)) { - count++; + + /* If it's a one-dimensional result, don't use an iterator */ + if (ndim <= 1) { + npy_intp i; + + coords = (npy_intp *)PyArray_DATA(ret); + data = PyArray_BYTES(self); + stride = (ndim == 0) ? 0 : PyArray_STRIDE(self, 0); + count = (ndim == 0) ? 1 : PyArray_DIM(self, 0); + + for (i = 0; i < count; ++i) { + if (nonzero(data, self)) { + *coords++ = i; + } + data += stride; } - PyArray_ITER_NEXT(it); + + goto finish; } - PyArray_ITER_RESET(it); - /* Allocate the tuple of coordinates */ - ret = PyTuple_New(n); - if (ret == NULL) { - goto fail; + /* Build an iterator with coordinates, in C order */ + iter = NpyIter_New(self, NPY_ITER_READONLY| + NPY_ITER_COORDS| + NPY_ITER_REFS_OK, + NPY_CORDER, NPY_NO_CASTING, + NULL, 0, NULL, 0); + + if (iter == NULL) { + Py_DECREF(ret); + return NULL; } - for (j = 0; j < n; j++) { - item = PyArray_New(Py_TYPE(self), 1, &count, - PyArray_INTP, NULL, NULL, 0, 0, - (PyObject *)self); - if (item == NULL) { - goto fail; - } - PyTuple_SET_ITEM(ret, j, item); - dptr[j] = (intp *)PyArray_DATA(item); - } - /* A second pass through 'self', recording the indices */ - if (n == 1) { - for (i = 0; i < size; i++) { - if (self->descr->f->nonzero(it->dataptr, self)) { - *(dptr[0])++ = i; - } - PyArray_ITER_NEXT(it); + + /* Get the pointers for inner loop iteration */ + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter); + Py_DECREF(ret); + return NULL; + } + getcoords = NpyIter_GetGetCoords(iter, NULL); + if (getcoords == NULL) { + NpyIter_Deallocate(iter); + Py_DECREF(ret); + return NULL; + } + dataptr = NpyIter_GetDataPtrArray(iter); + innersizeptr = NpyIter_GetInnerLoopSizePtr(iter); + + coords = (npy_intp *)PyArray_DATA(ret); + + /* Get the coordinates for each non-zero element */ + do { + if (nonzero(*dataptr, self)) { + getcoords(iter, coords); + coords += ndim; } + } while(iternext(iter)); + + NpyIter_Deallocate(iter); + +finish: + /* Treat zero-dimensional as shape (1,) */ + if (ndim == 0) { + ndim = 1; + } + + ret_tuple = PyTuple_New(ndim); + if (ret_tuple == NULL) { + Py_DECREF(ret); + return NULL; + } + + /* Create views into ret, one for each dimension */ + if (ndim == 1) { + /* Directly switch to one dimensions (dimension 1 is 1 anyway) */ + ret->nd = 1; + PyTuple_SET_ITEM(ret_tuple, i, (PyObject *)ret); } else { - /* reset contiguous so that coordinates gets updated */ - it->contiguous = 0; - for (i = 0; i < size; i++) { - if (self->descr->f->nonzero(it->dataptr, self)) { - for (j = 0; j < n; j++) { - *(dptr[j])++ = it->coordinates[j]; - } + for (i = 0; i < ndim; ++i) { + npy_intp stride = ndim*NPY_SIZEOF_INTP; + PyArrayObject *view; + + view = (PyArrayObject *)PyArray_New(Py_TYPE(self), 1, + &nonzero_count, + NPY_INTP, &stride, + PyArray_BYTES(ret) + i*NPY_SIZEOF_INTP, + 0, 0, (PyObject *)self); + if (view == NULL) { + Py_DECREF(ret); + Py_DECREF(ret_tuple); + return NULL; } - PyArray_ITER_NEXT(it); + Py_INCREF(ret); + view->base = (PyObject *)ret; + PyTuple_SET_ITEM(ret_tuple, i, (PyObject *)view); } - } - Py_DECREF(it); - return ret; - - fail: - Py_XDECREF(ret); - Py_XDECREF(it); - return NULL; + Py_DECREF(ret); + } + return ret_tuple; } diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index f03f5deb4..95f706389 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1725,6 +1725,29 @@ array_zeros(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) } static PyObject * +array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyObject *array_in; + PyArrayObject *array; + npy_intp count; + + if (!PyArg_ParseTuple(args, "O", &array_in)) { + return NULL; + } + + array = PyArray_FromAny(array_in, NULL, 0, 0, 0, NULL); + if (array == NULL) { + return NULL; + } + + count = PyArray_CountNonzero(array); + + Py_DECREF(array); + + return (count == -1) ? NULL : PyInt_FromSsize_t(count); +} + +static PyObject * array_fromstring(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds) { char *data; @@ -2744,6 +2767,9 @@ static struct PyMethodDef array_module_methods[] = { {"zeros", (PyCFunction)array_zeros, METH_VARARGS|METH_KEYWORDS, NULL}, + {"count_nonzero", + (PyCFunction)array_count_nonzero, + METH_VARARGS, NULL}, {"empty", (PyCFunction)array_empty, METH_VARARGS|METH_KEYWORDS, NULL}, diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index f11fd7a60..4536f9c3f 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -552,8 +552,16 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, */ if (ndim > 1 && !(itflags&NPY_ITFLAG_HASCOORDS)) { npyiter_coalesce_axes(iter); + /* + * The operation may have changed the layout, so we have to + * get the internal pointers again. + */ itflags = NIT_ITFLAGS(iter); ndim = NIT_NDIM(iter); + op = NIT_OBJECTS(iter); + op_dtype = NIT_DTYPES(iter); + op_itflags = NIT_OPITFLAGS(iter); + op_dataptr = NIT_RESETDATAPTR(iter); } /* diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index f3ca44404..1f1d4f9b8 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -409,6 +409,49 @@ class TestFromiter(TestCase): self.assertTrue(alltrue(a == expected,axis=0)) self.assertTrue(alltrue(a20 == expected[:20],axis=0)) +class TestNonzero(TestCase): + def test_nonzero_trivial(self): + assert_equal(np.count_nonzero(array([])), 0) + assert_equal(np.nonzero(array([])), ([],)) + + assert_equal(np.count_nonzero(array(0)), 0) + assert_equal(np.nonzero(array(0)), ([],)) + assert_equal(np.count_nonzero(array(1)), 1) + assert_equal(np.nonzero(array(1)), ([0],)) + + def test_nonzero_onedim(self): + x = array([1,0,2,-1,0,0,8]) + assert_equal(np.count_nonzero(x), 4) + assert_equal(np.nonzero(x), ([0, 2, 3, 6],)) + + x = array([(1,2),(0,0),(1,1),(-1,3),(0,7)], + dtype=[('a','i4'),('b','i2')]) + assert_equal(np.count_nonzero(x['a']), 3) + assert_equal(np.count_nonzero(x['b']), 4) + assert_equal(np.nonzero(x['a']), ([0,2,3],)) + assert_equal(np.nonzero(x['b']), ([0,2,3,4],)) + + def test_nonzero_twodim(self): + x = array([[0,1,0],[2,0,3]]) + assert_equal(np.count_nonzero(x), 3) + assert_equal(np.nonzero(x), ([0,1,1],[1,0,2])) + + x = np.eye(3) + assert_equal(np.count_nonzero(x), 3) + assert_equal(np.nonzero(x), ([0,1,2],[0,1,2])) + + x = array([[(0,1),(0,0),(1,11)], + [(1,1),(1,0),(0,0)], + [(0,0),(1,5),(0,1)]], dtype=[('a','f4'),('b','u1')]) + assert_equal(np.count_nonzero(x['a']), 4) + assert_equal(np.count_nonzero(x['b']), 5) + assert_equal(np.nonzero(x['a']), ([0,1,1,2],[2,0,1,1])) + assert_equal(np.nonzero(x['b']), ([0,0,1,2,2],[0,2,0,1,2])) + + assert_equal(np.count_nonzero(x['a'].T), 4) + assert_equal(np.count_nonzero(x['b'].T), 5) + assert_equal(np.nonzero(x['a'].T), ([2,0,1,1],[0,1,1,2])) + assert_equal(np.nonzero(x['b'].T), ([0,2,0,1,2],[0,0,1,2,2])) class TestIndex(TestCase): def test_boolean(self): diff --git a/numpy/ma/core.py b/numpy/ma/core.py index a945789df..6873bef6c 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -4314,6 +4314,8 @@ class MaskedArray(ndarray): array. ndarray.nonzero : Equivalent ndarray method. + count_nonzero : + Counts the number of non-zero elements in the input array. Examples -------- |