diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2013-08-16 15:08:53 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2013-08-16 15:08:53 -0700 |
commit | d4af6123a609b062af7ed23e106b6bfffd482e7f (patch) | |
tree | ecfbced2ba34d1e2e74029cd40044c16b92455d3 | |
parent | ec418015d1a8e947feb3ca2868d976ee252782ec (diff) | |
parent | acef718f40a30188c1379c13cc49c920d9e7c303 (diff) | |
download | numpy-d4af6123a609b062af7ed23e106b6bfffd482e7f.tar.gz |
Merge pull request #2821 from ContinuumIO/ufunc-fancy-indexing3
in place fancy indexing for ufuncs
-rw-r--r-- | doc/release/1.8.0-notes.rst | 10 | ||||
-rw-r--r-- | doc/source/reference/ufuncs.rst | 7 | ||||
-rw-r--r-- | numpy/add_newdocs.py | 53 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 10 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 311 | ||||
-rw-r--r-- | numpy/core/tests/test_ufunc.py | 135 |
6 files changed, 524 insertions, 2 deletions
diff --git a/doc/release/1.8.0-notes.rst b/doc/release/1.8.0-notes.rst index 1cc8c0c08..1e7d00a74 100644 --- a/doc/release/1.8.0-notes.rst +++ b/doc/release/1.8.0-notes.rst @@ -185,6 +185,16 @@ New nan aware statistical functions are added. In these functions the results are what would be obtained if nan values were ommited from all computations. +In place fancy indexing for ufuncs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The function ``at`` has been added to ufunc objects to allow in place +ufuncs with no buffering when fancy indexing is used. For example, the +following will increment the first and second items in the array, and will +increment the third item twice: +numpy.add.at(array, [0, 1, 2, 2], 1) + +This is similar to doing array[[0, 1, 2, 2]] += 1 + C-API ~~~~~ diff --git a/doc/source/reference/ufuncs.rst b/doc/source/reference/ufuncs.rst index 2154bca37..acf9bf330 100644 --- a/doc/source/reference/ufuncs.rst +++ b/doc/source/reference/ufuncs.rst @@ -417,6 +417,12 @@ an integer (or Boolean) data-type and smaller than the size of the :class:`int_` data type, it will be internally upcast to the :class:`int_` (or :class:`uint`) data-type. +Ufuncs also have a fifth method that allows in place operations to be +performed using fancy indexing. No buffering is used on the dimensions where +fancy indexing is used, so the fancy index can list an item more than once and +the operation will be performed on the result of the previous operation for +that item. + .. index:: pair: ufunc; methods @@ -427,6 +433,7 @@ an integer (or Boolean) data-type and smaller than the size of the ufunc.accumulate ufunc.reduceat ufunc.outer + ufunc.at .. warning:: diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 5bb800488..65a416a9f 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -5815,6 +5815,59 @@ add_newdoc('numpy.core', 'ufunc', ('outer', """)) +add_newdoc('numpy.core', 'ufunc', ('at', + """ + at(a, indices, b=None) + + Performs unbuffered in place operation on operand 'a' for elements + specified by 'indices'. For addition ufunc, this method is equivalent to + `a[indices] += b`, except that results are accumulated for elements that + are indexed more than once. For example, `a[[0,0]] += 1` will only + increment the first element once because of buffering, whereas + `add.at(a, [0,0], 1)` will increment the first element twice. + + Parameters + ---------- + a : array_like + The array to perform in place operation on. + indices : array_like or tuple + Array like index object or slice object for indexing into first + operand. If first operand has multiple dimensions, indices can be a + tuple of array like index objects or slice objects. + b : array_like + Second operand for ufuncs requiring two operands. Operand must be + broadcastable over first operand after indexing or slicing. + + Examples + -------- + Set items 0 and 1 to their negative values: + + >>> a = np.array([1, 2, 3, 4]) + >>> np.negative.at(a, [0, 1]) + >>> print(a) + array([-1, -2, 3, 4]) + + :: + + Increment items 0 and 1, and increment item 2 twice: + + >>> a = np.array([1, 2, 3, 4]) + >>> np.add.at(a, [0, 1, 2, 2], 1) + >>> print(a) + array([2, 3, 5, 4]) + + :: + + Add items 0 and 1 in first array to second array, + and store results in first array: + + >>> a = np.array([1, 2, 3, 4]) + >>> b = np.array([1, 2]) + >>> np.add.at(a, [0, 1], b) + >>> print(a) + array([2, 4, 3, 4]) + + """)) ############################################################################## # diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 25f3dad9b..4f33afa1e 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1662,7 +1662,9 @@ PyArray_MapIterReset(PyArrayMapIterObject *mit) mit->index = 0; - copyswap = PyArray_DESCR(mit->iters[0]->ao)->f->copyswap; + if (mit->numiter > 0) { + copyswap = PyArray_DESCR(mit->iters[0]->ao)->f->copyswap; + } if (mit->subspace != NULL) { memcpy(coord, mit->bscoord, sizeof(npy_intp)*PyArray_NDIM(mit->ait->ao)); @@ -1712,7 +1714,11 @@ PyArray_MapIterNext(PyArrayMapIterObject *mit) if (mit->index >= mit->size) { return; } - copyswap = PyArray_DESCR(mit->iters[0]->ao)->f->copyswap; + + if (mit->numiter > 0) { + copyswap = PyArray_DESCR(mit->iters[0]->ao)->f->copyswap; + } + /* Sub-space iteration */ if (mit->subspace != NULL) { PyArray_ITER_NEXT(mit->subspace); diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 7ec5b977f..fff2e158f 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -4849,6 +4849,314 @@ ufunc_reduceat(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds) return PyUFunc_GenericReduction(ufunc, args, kwds, UFUNC_REDUCEAT); } +/* + * Call ufunc only on selected array items and store result in first operand. + * For add ufunc, method call is equivalent to op1[idx] += op2 with no + * buffering of the first operand. + * Arguments: + * op1 - First operand to ufunc + * idx - Indices that are applied to first operand. Equivalent to op1[idx]. + * op2 - Second operand to ufunc (if needed). Must be able to broadcast + * over first operand. + */ +static PyObject * +ufunc_at(PyUFuncObject *ufunc, PyObject *args) +{ + PyObject *op1 = NULL; + PyObject *idx = NULL; + PyObject *op2 = NULL; + PyArrayObject *op1_array = NULL; + PyArrayObject *op2_array = NULL; + PyArrayMapIterObject *iter = NULL; + PyArrayIterObject *iter2 = NULL; + PyArray_Descr *dtypes[3] = {NULL, NULL, NULL}; + PyArrayObject *operands[3] = {NULL, NULL, NULL}; + PyArrayObject *array_operands[3] = {NULL, NULL, NULL}; + npy_intp dims[1] = {1}; + + int needs_api; + NPY_BEGIN_THREADS_DEF; + + PyUFuncGenericFunction innerloop; + void *innerloopdata; + npy_intp count[1], stride[1]; + int i; + int nop; + + NpyIter *iter_buffer; + NpyIter_IterNextFunc *iternext; + npy_uint32 op_flags[NPY_MAXARGS]; + int buffersize; + int errormask = 0; + PyObject *errobj = NULL; + + if (ufunc->nin > 2) { + PyErr_SetString(PyExc_ValueError, + "Only unary and binary ufuncs supported at this time"); + return NULL; + } + + if (!PyArg_ParseTuple(args, "OO|O", &op1, &idx, &op2)) { + return NULL; + } + + if (ufunc->nin == 2 && op2 == NULL) { + PyErr_SetString(PyExc_ValueError, + "second operand needed for ufunc"); + return NULL; + } + + if (!PyArray_Check(op1)) { + PyErr_SetString(PyExc_TypeError, + "first operand must be array"); + return NULL; + } + + op1_array = op1; + + iter = PyArray_MapIterArray(op1_array, idx); + if (iter == NULL) { + goto fail; + } + + /* Create second operand from number array if needed. */ + if (op2 != NULL) { + op2_array = (PyArrayObject *)PyArray_FromAny(op2, NULL, + 0, 0, 0, NULL); + if (op2_array == NULL) { + goto fail; + } + + /* + * May need to swap axes so that second operand is + * iterated over correctly + */ + if ((iter->subspace != NULL) && (iter->consec)) { + PyArray_MapIterSwapAxes(iter, &op2_array, 0); + if (op2_array == NULL) { + goto fail; + } + } + + /* + * Create array iter object for second operand that + * "matches" the map iter object for the first operand. + * Then we can just iterate over the first and second + * operands at the same time and not have to worry about + * picking the correct elements from each operand to apply + * the ufunc to. + */ + if ((iter2 = (PyArrayIterObject *)\ + PyArray_BroadcastToShape((PyObject *)op2_array, + iter->dimensions, iter->nd))==NULL) { + goto fail; + } + } + + /* + * Create dtypes array for either one or two input operands. + * The output operand is set to the first input operand + */ + dtypes[0] = PyArray_DESCR(op1_array); + operands[0] = op1_array; + if (op2_array != NULL) { + dtypes[1] = PyArray_DESCR(op2_array); + dtypes[2] = dtypes[0]; + operands[1] = op2_array; + operands[2] = op1_array; + nop = 3; + } + else { + dtypes[1] = dtypes[0]; + dtypes[2] = NULL; + operands[1] = op1_array; + operands[2] = NULL; + nop = 2; + } + + if (ufunc->type_resolver(ufunc, NPY_UNSAFE_CASTING, + operands, NULL, dtypes) < 0) { + goto fail; + } + if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, + &innerloop, &innerloopdata, &needs_api) < 0) { + goto fail; + } + + count[0] = 1; + stride[0] = 1; + + Py_INCREF(PyArray_DESCR(op1_array)); + array_operands[0] = PyArray_NewFromDescr(&PyArray_Type, + PyArray_DESCR(op1_array), + 1, dims, NULL, iter->dataptr, + NPY_ARRAY_WRITEABLE, NULL); + if (iter2 != NULL) { + Py_INCREF(PyArray_DESCR(op2_array)); + array_operands[1] = PyArray_NewFromDescr(&PyArray_Type, + PyArray_DESCR(op2_array), + 1, dims, NULL, + PyArray_ITER_DATA(iter2), + NPY_ARRAY_WRITEABLE, NULL); + Py_INCREF(PyArray_DESCR(op1_array)); + array_operands[2] = PyArray_NewFromDescr(&PyArray_Type, + PyArray_DESCR(op1_array), + 1, dims, NULL, + iter->dataptr, + NPY_ARRAY_WRITEABLE, NULL); + } + else { + Py_INCREF(PyArray_DESCR(op1_array)); + array_operands[1] = PyArray_NewFromDescr(&PyArray_Type, + PyArray_DESCR(op1_array), + 1, dims, NULL, + iter->dataptr, + NPY_ARRAY_WRITEABLE, NULL); + array_operands[2] = NULL; + } + + /* Set up the flags */ + op_flags[0] = NPY_ITER_READONLY| + NPY_ITER_ALIGNED; + + if (iter2 != NULL) { + op_flags[1] = NPY_ITER_READONLY| + NPY_ITER_ALIGNED; + op_flags[2] = NPY_ITER_WRITEONLY| + NPY_ITER_ALIGNED| + NPY_ITER_ALLOCATE| + NPY_ITER_NO_BROADCAST| + NPY_ITER_NO_SUBTYPE; + } + else { + op_flags[1] = NPY_ITER_WRITEONLY| + NPY_ITER_ALIGNED| + NPY_ITER_ALLOCATE| + NPY_ITER_NO_BROADCAST| + NPY_ITER_NO_SUBTYPE; + } + + PyUFunc_GetPyValues(ufunc->name, &buffersize, &errormask, &errobj); + + /* + * Create NpyIter object to "iterate" over single element of each input + * operand. This is an easy way to reuse the NpyIter logic for dealing + * with certain cases like casting operands to correct dtype. On each + * iteration over the MapIterArray object created above, we'll take the + * current data pointers from that and reset this NpyIter object using + * those data pointers, and then trigger a buffer copy. The buffer data + * pointers from the NpyIter object will then be passed to the inner loop + * function. + */ + iter_buffer = NpyIter_AdvancedNew(nop, array_operands, + NPY_ITER_EXTERNAL_LOOP| + NPY_ITER_REFS_OK| + NPY_ITER_ZEROSIZE_OK| + NPY_ITER_BUFFERED| + NPY_ITER_GROWINNER| + NPY_ITER_DELAY_BUFALLOC, + NPY_KEEPORDER, NPY_UNSAFE_CASTING, + op_flags, dtypes, + -1, NULL, NULL, buffersize); + + if (iter_buffer == NULL) { + goto fail; + } + + needs_api = needs_api | NpyIter_IterationNeedsAPI(iter_buffer); + + iternext = NpyIter_GetIterNext(iter_buffer, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter_buffer); + goto fail; + } + + if (!needs_api) { + NPY_BEGIN_THREADS; + } + + /* + * Iterate over first and second operands and call ufunc + * for each pair of inputs + */ + i = iter->size; + while (i > 0) + { + char *dataptr[3]; + char **buffer_dataptr; + + /* + * Set up data pointers for either one or two input operands. + * The output data pointer points to the first operand data. + */ + dataptr[0] = iter->dataptr; + if (iter2 != NULL) { + dataptr[1] = PyArray_ITER_DATA(iter2); + dataptr[2] = iter->dataptr; + } + else { + dataptr[1] = iter->dataptr; + dataptr[2] = NULL; + } + + /* Reset NpyIter data pointers which will trigger a buffer copy */ + NpyIter_ResetBasePointers(iter_buffer, dataptr, NULL); + buffer_dataptr = NpyIter_GetDataPtrArray(iter_buffer); + + innerloop(buffer_dataptr, count, stride, innerloopdata); + + if (needs_api && PyErr_Occurred()) { + break; + } + + /* + * Call to iternext triggers copy from buffer back to output array + * after innerloop puts result in buffer. + */ + iternext(iter_buffer); + + PyArray_MapIterNext(iter); + if (iter2 != NULL) { + PyArray_ITER_NEXT(iter2); + } + + i--; + } + + if (!needs_api) { + NPY_END_THREADS; + } + + NpyIter_Deallocate(iter_buffer); + + Py_XDECREF(op2_array); + Py_XDECREF(iter); + Py_XDECREF(iter2); + Py_XDECREF(array_operands[0]); + Py_XDECREF(array_operands[1]); + Py_XDECREF(array_operands[2]); + Py_XDECREF(errobj); + + if (needs_api && PyErr_Occurred()) { + return NULL; + } + else { + Py_RETURN_NONE; + } + +fail: + + Py_XDECREF(op2_array); + Py_XDECREF(iter); + Py_XDECREF(iter2); + Py_XDECREF(array_operands[0]); + Py_XDECREF(array_operands[1]); + Py_XDECREF(array_operands[2]); + Py_XDECREF(errobj); + + return NULL; +} + static struct PyMethodDef ufunc_methods[] = { {"reduce", @@ -4863,6 +5171,9 @@ static struct PyMethodDef ufunc_methods[] = { {"outer", (PyCFunction)ufunc_outer, METH_VARARGS | METH_KEYWORDS, NULL}, + {"at", + (PyCFunction)ufunc_at, + METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} /* sentinel */ }; diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index eadb767ed..2368e5809 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -865,5 +865,140 @@ class TestUfunc(TestCase): assert_(MyThing.rmul_count == 1, MyThing.rmul_count) assert_(MyThing.getitem_count <= 2, MyThing.getitem_count) + def test_inplace_fancy_indexing(self): + + a = np.arange(10) + np.add.at(a, [2,5,2], 1) + assert_equal(a, [0, 1, 4, 3, 4, 6, 6, 7, 8, 9]) + + a = np.arange(10) + b = np.array([100,100,100]) + np.add.at(a, [2,5,2], b) + assert_equal(a, [0, 1, 202, 3, 4, 105, 6, 7, 8, 9]) + + a = np.arange(9).reshape(3,3) + b = np.array([[100,100,100],[200,200,200],[300,300,300]]) + np.add.at(a, (slice(None), [1,2,1]), b) + assert_equal(a, [[0,201,102], [3,404,205], [6,607,308]]) + + a = np.arange(27).reshape(3,3,3) + b = np.array([100,200,300]) + np.add.at(a, (slice(None), slice(None), [1,2,1]), b) + assert_equal(a, + [[[0,401,202], + [3,404,205], + [6,407,208]], + + [[9, 410,211], + [12,413,214], + [15,416,217]], + + [[18,419,220], + [21,422,223], + [24,425,226]]]) + + a = np.arange(9).reshape(3,3) + b = np.array([[100,100,100],[200,200,200],[300,300,300]]) + np.add.at(a, ([1,2,1], slice(None)), b) + assert_equal(a, [[0,1,2], [403,404,405], [206,207,208]]) + + a = np.arange(27).reshape(3,3,3) + b = np.array([100,200,300]) + np.add.at(a, (slice(None), [1,2,1], slice(None)), b) + assert_equal(a, + [[[0, 1, 2 ], + [203,404,605], + [106,207,308]], + + [[9, 10, 11 ], + [212,413,614], + [115,216,317]], + + [[18, 19, 20 ], + [221,422,623], + [124,225,326]]]) + + a = np.arange(9).reshape(3,3) + b = np.array([100,200,300]) + np.add.at(a, (0, [1,2,1]), b) + assert_equal(a, [[0,401,202], [3,4,5], [6,7,8]]) + + a = np.arange(27).reshape(3,3,3) + b = np.array([100,200,300]) + np.add.at(a, ([1,2,1], 0, slice(None)), b) + assert_equal(a, + [[[0, 1, 2], + [3, 4, 5], + [6, 7, 8]], + + [[209,410,611], + [12, 13, 14], + [15, 16, 17]], + + [[118,219,320], + [21, 22, 23], + [24, 25, 26]]]) + + a = np.arange(27).reshape(3,3,3) + b = np.array([100,200,300]) + np.add.at(a, (slice(None), slice(None), slice(None)), b) + assert_equal(a, + [[[100,201,302], + [103,204,305], + [106,207,308]], + + [[109,210,311], + [112,213,314], + [115,216,317]], + + [[118,219,320], + [121,222,323], + [124,225,326]]]) + + a = np.arange(10) + np.negative.at(a, [2,5,2]) + assert_equal(a, [0, 1, 2, 3, 4, -5, 6, 7, 8, 9]) + + # Test 0-dim array + a = np.array(0) + np.add.at(a, (), 1) + assert_equal(a, 1) + + assert_raises(IndexError, np.add.at, a, 0, 1) + assert_raises(IndexError, np.add.at, a, [], 1) + + # Test mixed dtypes + a = np.arange(10) + np.power.at(a, [1,2,3,2], 3.5) + assert_equal(a, np.array([0, 1, 4414, 46, 4, 5, 6, 7, 8, 9])) + + # Test boolean indexing and boolean ufuncs + a = np.arange(10) + index = a % 2 == 0 + np.equal.at(a, index, [0, 2, 4, 6, 8]) + assert_equal(a, [1, 1, 1, 3, 1, 5, 1, 7, 1, 9]) + + # Test unary operator + a = np.arange(10, dtype='u4') + np.invert.at(a, [2, 5, 2]) + assert_equal(a, [0, 1, 2, 3, 4, 5 ^ 0xffffffff, 6, 7, 8, 9]) + + # Test empty subspace + orig = np.arange(4) + a = orig[:,None][:,0:0] + np.add.at(a, [0,1], 3) + assert_array_equal(orig, np.arange(4)) + + # Test with swapped byte order + index = np.array([1,2,1], np.dtype('i').newbyteorder()) + values = np.array([1,2,3,4], np.dtype('f').newbyteorder()) + np.add.at(values, index, 3) + assert_array_equal(values, [1,8,6,4]) + + # Test exception thrown + values = np.array(['a', 1], dtype=np.object) + self.assertRaises(TypeError, np.add.at, values, [0,1], 1) + assert_array_equal(values, np.array(['a', 1], dtype=np.object)) + if __name__ == "__main__": run_module_suite() |