diff options
author | jaimefrio <jaime.frio@gmail.com> | 2014-02-11 22:41:42 -0800 |
---|---|---|
committer | jaimefrio <jaime.frio@gmail.com> | 2014-02-11 22:42:07 -0800 |
commit | 9dcdb57792d9b03144b55ebe84bc97b32af6c1db (patch) | |
tree | 7bcf10645fb7fc65de282d8edbade449f9232755 /numpy/lib/src/_compiled_base.c | |
parent | 688b243552cc98d8e1be791eeda3fba81bfbe807 (diff) | |
download | numpy-9dcdb57792d9b03144b55ebe84bc97b32af6c1db.tar.gz |
ENH: single pass over array in `bincount` to determine output size
`bincount` checks its input array to make sure there are no negative entries,
and to determine the size of the output array. This is done by calling two
different functions, each having to loop over the whole array. This PR
adds a new function, `minmax`, that computes the minimum and maximum of the
array in a single pass over it. This leads to speed-ups peaking at 1.5x,
with typical values for large arrays around 1.15x - 1.25x.
A full benchmark summary of the new implementation, including a supposedly
more efficient algorithm that turned out to run slower, can be found [here](https://gist.github.com/jaimefrio/8743836).
Diffstat (limited to 'numpy/lib/src/_compiled_base.c')
-rw-r--r-- | numpy/lib/src/_compiled_base.c | 48 |
1 files changed, 18 insertions, 30 deletions
diff --git a/numpy/lib/src/_compiled_base.c b/numpy/lib/src/_compiled_base.c index c91855e81..7ac4bcc5b 100644 --- a/numpy/lib/src/_compiled_base.c +++ b/numpy/lib/src/_compiled_base.c @@ -105,37 +105,26 @@ check_array_monotonic(const double *a, npy_int lena) } } -/* find the index of the maximum element of an integer array */ -static npy_intp -mxx (npy_intp *i , npy_intp len) +/* Find the minimum and maximum of an integer array */ +static void +minmax(const npy_intp *data, npy_intp data_len, npy_intp *mn, npy_intp *mx) { - npy_intp mx = 0, max = i[0]; - npy_intp j; + npy_intp min; + npy_intp max = min = *data; - for ( j = 1; j < len; j ++ ) { - if ( i [j] > max ) { - max = i [j]; - mx = j; + while (--data_len) { + const npy_intp val = *(++data); + if (val < min) { + min = val; + } + else if (val > max) { + max = val; } } - return mx; -} -/* find the index of the minimum element of an integer array */ -static npy_intp -mnx (npy_intp *i , npy_intp len) -{ - npy_intp mn = 0, min = i [0]; - npy_intp j; - - for ( j = 1; j < len; j ++ ) - if ( i [j] < min ) - {min = i [j]; - mn = j;} - return mn; + *mn = min; + *mx = max; } - - /* * arr_bincount is registered as bincount. * @@ -155,7 +144,7 @@ arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) PyArray_Descr *type; PyObject *list = NULL, *weight=Py_None, *mlength=Py_None; PyArrayObject *lst=NULL, *ans=NULL, *wts=NULL; - npy_intp *numbers, *ians, len , mxi, mni, ans_size, minlength; + npy_intp *numbers, *ians, len , mx, mn, ans_size, minlength; int i; double *weights , *dans; static char *kwlist[] = {"list", "weights", "minlength", NULL}; @@ -188,14 +177,13 @@ arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) } numbers = (npy_intp *) PyArray_DATA(lst); - mxi = mxx(numbers, len); - mni = mnx(numbers, len); - if (numbers[mni] < 0) { + minmax(numbers, len, &mn, &mx); + if (mn < 0) { PyErr_SetString(PyExc_ValueError, "The first argument of bincount must be non-negative"); goto fail; } - ans_size = numbers [mxi] + 1; + ans_size = mx + 1; if (mlength != Py_None) { if (!(minlength = PyArray_PyIntAsIntp(mlength))) { goto fail; |