diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2013-10-22 20:08:54 +0200 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2014-02-06 17:51:59 +0100 |
commit | 96dbf1bc7378cd67d690de628ad78016610058bb (patch) | |
tree | 3bc6ef7f7601e5f241a687e9c5135c4e7ddd06a9 | |
parent | 13b44ee04a924aeccfb67fc986fa68d221bd86e2 (diff) | |
download | numpy-96dbf1bc7378cd67d690de628ad78016610058bb.tar.gz |
ENH: Use template for inner loops and use stransfer
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 252 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 333 | ||||
-rw-r--r-- | numpy/core/src/private/lowlevel_strided_loops.h | 6 |
3 files changed, 297 insertions, 294 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 638ae4d1e..abc200baa 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -1353,3 +1353,255 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, } } } + + +/***************************************************************************/ +/****************** Mapiter (Advanced indexing) Get/Set ********************/ +/***************************************************************************/ + +/**begin repeat + * #name = set, get# + * #isget = 0, 1# + */ + +NPY_NO_EXPORT int +mapiter_@name@(PyArrayMapIterObject *mit) +{ + npy_intp *counter, count; + npy_intp indval; + int i, is_aligned; + char *self_ptr; + + /* Cached mit info */ + int numiter = mit->numiter; + int needs_api = mit->needs_api; + /* Constant information */ + npy_intp fancy_dims[NPY_MAXDIMS]; + npy_intp fancy_strides[NPY_MAXDIMS]; + int iteraxes[NPY_MAXDIMS]; + + char *baseoffset = mit->baseoffset; + char **outer_ptrs = mit->outer_ptrs; + npy_intp *outer_strides = mit->outer_strides; + PyArrayObject *array= mit->array; + + /* Fill constant information */ + for (i = 0; i < numiter; i++) { + fancy_dims[i] = mit->fancy_dims[i]; + fancy_strides[i] = mit->fancy_strides[i]; + iteraxes[i] = mit->iteraxes[i]; + } + + /* + * Alignment information (swapping is never needed, since we buffer), + * could also check extra_op is buffered, but it should rarely matter. + */ + + is_aligned = PyArray_ISALIGNED(array) && PyArray_ISALIGNED(mit->extra_op); + + if (mit->size == 0) { + return 0; + } + + if (mit->subspace_iter == NULL) { + /* + * Item by item copy situation, the operand is buffered + * so use copyswap. + */ + PyArray_CopySwapFunc *copyswap = PyArray_DESCR(array)->f->copyswap; + + /* We have only one iterator handling everything */ + counter = NpyIter_GetInnerLoopSizePtr(mit->outer); + + /************ Optimized inner loops without subspace *************/ + +/**begin repeat1 + * #numiter = 1, numiter# + */ + if (numiter == @numiter@) { + + /* Optimization for aligned types that do not need the api */ + switch ((is_aligned && !needs_api) ? PyArray_ITEMSIZE(array) : 0) { + +/**begin repeat2 + * #elsize = 1, 2, 4, 8, 0# + * #copytype = npy_uint8, npy_uint16, npy_uint32, npy_uint64, 0# + */ + +#if @elsize@ + case @elsize@: +#else + default: +#endif + do { +#if !@isget@ + /* + * When the API is needed the casting might fail + * TODO: (only if buffering is enabled). + */ + if (needs_api && PyErr_Occurred()) { + return -1; + } +#endif + count = *counter; + while (count--) { + self_ptr = baseoffset; + for (i=0; i < @numiter@; i++) { + indval = *((npy_intp*)outer_ptrs[i]); +#if @isget@ + if (check_and_adjust_index(&indval, fancy_dims[i], + iteraxes[i]) < 0 ) { + return -1; + } +#else + if (indval < 0) { + indval += fancy_dims[i]; + } +#endif + self_ptr += indval * fancy_strides[i]; + + /* advance indexing arrays */ + outer_ptrs[i] += outer_strides[i]; + } + +#if @isget@ +#if @elsize@ + *(@copytype@ *)(outer_ptrs[i]) = *(@copytype@ *)self_ptr; +#else + copyswap(outer_ptrs[i], self_ptr, 0, array); +#endif + +#else /* !@isget@ */ +#if @elsize@ + *(@copytype@ *)self_ptr = *(@copytype@ *)(outer_ptrs[i]); +#else + copyswap(self_ptr, outer_ptrs[i], 0, array); +#endif +#endif + /* advance extra operand */ + outer_ptrs[i] += outer_strides[i]; + } + } while (mit->outer_next(mit->outer)); + + break; + +/**end repeat2**/ + } + } +/**end repeat1**/ + } + + /******************* Nested Iteration Situation *******************/ + else { + char *subspace_baseptrs[2]; + char **subspace_ptrs = mit->subspace_ptrs; + npy_intp *subspace_strides = mit->subspace_strides; + int skip = 0; + + /* Use strided transfer functions for the inner loop */ + PyArray_StridedUnaryOp *stransfer = NULL; + NpyAuxData *transferdata = NULL; + npy_intp fixed_strides[2]; + +#if @isget@ + npy_intp src_itemsize = PyArray_ITEMSIZE(array); +#else + npy_intp src_itemsize = PyArray_ITEMSIZE(array); +#endif + + /* + * Get a dtype transfer function, since there are no + * buffers, this is safe. + */ + NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides); + if (PyArray_GetDTypeTransferFunction(is_aligned, +#if @isget@ + fixed_strides[0], fixed_strides[1], +#else + fixed_strides[1], fixed_strides[0], +#endif + PyArray_DESCR(array), PyArray_DESCR(mit->extra_op), + 0, + &stransfer, &transferdata, + &needs_api) != NPY_SUCCEED) { + return -1; + } + + counter = NpyIter_GetInnerLoopSizePtr(mit->subspace_iter); + if (*counter == PyArray_NDIM(mit->subspace)) { + skip = 1; + } + + do { + self_ptr = baseoffset; + for (i=0; i < numiter; i++) { + indval = *((npy_intp*)outer_ptrs[i]); +#if @isget@ + if (check_and_adjust_index(&indval, fancy_dims[i], + iteraxes[i]) < 0 ) { + return -1; + } +#else + if (indval < 0) { + indval += fancy_dims[i]; + } +#endif + + self_ptr += indval * fancy_strides[i]; + } + + /* + * Resetting is slow, so skip if the subspace iteration has + * only a single inner loop. + */ + if (!skip) { + subspace_baseptrs[0] = self_ptr; + subspace_baseptrs[1] = mit->extra_op_ptrs[0]; + + /* (can't really fail, since no buffering necessary) */ + if (!NpyIter_ResetBasePointers(mit->subspace_iter, + subspace_baseptrs, NULL)) { + NPY_AUXDATA_FREE(transferdata); + return -1; + } + } + else { + subspace_ptrs[0] = self_ptr; + subspace_ptrs[1] = mit->extra_op_ptrs[0]; + } + +#if !@isget@ + /* + * When the API is needed the casting might fail + * TODO: Could only check if casting is unsafe, or even just + * not at all... + */ + if (needs_api && PyErr_Occurred()) { + NPY_AUXDATA_FREE(transferdata); + return -1; + } +#endif + + do { + +#if @isget@ + stransfer(subspace_ptrs[1], subspace_strides[1], + subspace_ptrs[0], subspace_strides[0], + *counter, src_itemsize, transferdata); +#else + stransfer(subspace_ptrs[0], subspace_strides[0], + subspace_ptrs[1], subspace_strides[1], + *counter, src_itemsize, transferdata); +#endif + } while (mit->subspace_next(mit->subspace_iter)); + + mit->extra_op_next(mit->extra_op_iter); + } while (mit->outer_next(mit->outer)); + + NPY_AUXDATA_FREE(transferdata); + } + + return 0; +} + +/**end repeat**/ diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 020acd821..cc30f3d45 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -216,294 +216,6 @@ PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject **ret, int getm } -static int -PyArray_GetMap(PyArrayMapIterObject *mit) -{ - PyArray_CopySwapFunc *copyswap; - PyArray_CopySwapNFunc *copyswapn; - npy_intp *counter, count; - npy_intp indval; - int i; - char *self_ptr; - - /* Cached mit info */ - int numiter = mit->numiter; - int needs_api = mit->needs_api; - /* Constant information */ - npy_intp fancy_dims[NPY_MAXDIMS]; - npy_intp fancy_strides[NPY_MAXDIMS]; - int iteraxes[NPY_MAXDIMS]; - - char *baseoffset = mit->baseoffset; - char **outer_ptrs = mit->outer_ptrs; - npy_intp *outer_strides = mit->outer_strides; - PyArrayObject *extra_op = mit->extra_op; - - /* Fill constant information */ - for (i = 0; i < numiter; i++) { - fancy_dims[i] = mit->fancy_dims[i]; - fancy_strides[i] = mit->fancy_strides[i]; - iteraxes[i] = mit->iteraxes[i]; - } - - if (mit->size == 0) { - return 0; - } - - copyswap = PyArray_DESCR(mit->array)->f->copyswap; - copyswapn = PyArray_DESCR(mit->array)->f->copyswapn; - - PyArray_MapIterReset(mit); - - if (mit->subspace_iter == NULL) { - /* We have only one iterator handling everything */ - counter = NpyIter_GetInnerLoopSizePtr(mit->outer); - - if (mit->numiter == 1) { - do { - count = *counter; - while (count--) { - self_ptr = baseoffset; - indval = *((npy_intp*)outer_ptrs[0]); - if (check_and_adjust_index(&indval, fancy_dims[0], - iteraxes[0]) < 0 ) { - return -1; - } - self_ptr += indval * fancy_strides[0]; - - /* advance indexing arrays */ - outer_ptrs[0] += outer_strides[0]; - - /* TODO: Can optimize with memcpy! */ - copyswap(outer_ptrs[1], self_ptr, 0, extra_op); - - /* advance extra operand */ - outer_ptrs[1] += outer_strides[1]; - } - } while (mit->outer_next(mit->outer)); - } - else { - do { - count = *counter; - while (count--) { - self_ptr = baseoffset; - for (i=0; i < numiter; i++) { - indval = *((npy_intp*)outer_ptrs[i]); - if (check_and_adjust_index(&indval, fancy_dims[i], - iteraxes[i]) < 0 ) { - return -1; - } - self_ptr += indval * fancy_strides[i]; - - /* advance indexing arrays */ - outer_ptrs[i] += outer_strides[i]; - } - - /* TODO: Can optimize with memcpy! */ - copyswap(outer_ptrs[i], self_ptr, 0, extra_op); - - /* advance extra operand */ - outer_ptrs[i] += outer_strides[i]; - } - } while (mit->outer_next(mit->outer)); - } - } - else { - /* We have a nested iter situation */ - char *subspace_baseptrs[2]; - char **subspace_ptrs = mit->subspace_ptrs; - npy_intp *subspace_strides = mit->subspace_strides; - - do { - self_ptr = baseoffset; - for (i=0; i < numiter; i++) { - indval = *((npy_intp*)outer_ptrs[i]); - if (check_and_adjust_index(&indval, fancy_dims[i], - iteraxes[i]) < 0 ) { - return -1; - } - - self_ptr += indval * fancy_strides[i]; - } - - subspace_baseptrs[0] = self_ptr; - subspace_baseptrs[1] = mit->extra_op_ptrs[0]; - - /* (can't really fail, since no buffering necessary) */ - if (!NpyIter_ResetBasePointers(mit->subspace_iter, - subspace_baseptrs, NULL)) { - return -1; - } - - counter = NpyIter_GetInnerLoopSizePtr(mit->subspace_iter); - - do { - copyswapn(subspace_ptrs[1], subspace_strides[1], - subspace_ptrs[0], subspace_strides[0], - *counter, 0, extra_op); - } while (mit->subspace_next(mit->subspace_iter)); - - mit->extra_op_next(mit->extra_op_iter); - } while (mit->outer_next(mit->outer)); - } - - return 0; -} - - -static int -PyArray_SetMap(PyArrayMapIterObject *mit) -{ - PyArray_CopySwapFunc *copyswap; - PyArray_CopySwapNFunc *copyswapn; - npy_intp *counter, count; - npy_intp indval; - int i; - char *self_ptr; - - /* Cached mit info */ - int numiter = mit->numiter; - int needs_api = mit->needs_api; - /* Constant information */ - npy_intp fancy_dims[NPY_MAXDIMS]; - npy_intp fancy_strides[NPY_MAXDIMS]; - int iteraxes[NPY_MAXDIMS]; - - char *baseoffset = mit->baseoffset; - char **outer_ptrs = mit->outer_ptrs; - npy_intp *outer_strides = mit->outer_strides; - PyArrayObject *extra_op = mit->extra_op; - - /* Fill constant information */ - for (i = 0; i < numiter; i++) { - fancy_dims[i] = mit->fancy_dims[i]; - fancy_strides[i] = mit->fancy_strides[i]; - iteraxes[i] = mit->iteraxes[i]; - } - - if (mit->size == 0) { - return 0; - } - - copyswap = PyArray_DESCR(mit->array)->f->copyswap; - copyswapn = PyArray_DESCR(mit->array)->f->copyswapn; - - PyArray_MapIterReset(mit); - - if (mit->subspace_iter == NULL) { - /* We have only one iterator handling everything */ - counter = NpyIter_GetInnerLoopSizePtr(mit->outer); - - if (mit->numiter == 1) { - do { - if (needs_api && PyErr_Occurred()) { - /* - * NOTE: this check is in principle on necessary if - * buffering is used. - */ - return -1; - } - - count = *counter; - while (count--) { - self_ptr = baseoffset; - indval = *((npy_intp*)outer_ptrs[0]); - if (indval < 0) { - indval += fancy_dims[0]; - } - self_ptr += indval * fancy_strides[0]; - - /* advance indexing arrays */ - outer_ptrs[0] += outer_strides[0]; - - /* TODO: Can optimize with memcpy! */ - copyswap(self_ptr, outer_ptrs[1], 0, extra_op); - - /* advance extra operand */ - outer_ptrs[1] += outer_strides[1]; - } - } while (mit->outer_next(mit->outer)); - } - else { - do { - if (needs_api && PyErr_Occurred()) { - return -1; - } - - count = *counter; - while (count--) { - self_ptr = baseoffset; - for (i=0; i < numiter; i++) { - indval = *((npy_intp*)outer_ptrs[i]); - if (indval < 0) { - indval += fancy_dims[i]; - } - self_ptr += indval * fancy_strides[i]; - - /* advance indexing arrays */ - outer_ptrs[i] += outer_strides[i]; - } - - /* TODO: Can optimize with memcpy! */ - copyswap(self_ptr, outer_ptrs[i], 0, extra_op); - - /* advance extra operand */ - outer_ptrs[i] += outer_strides[i]; - } - } while (mit->outer_next(mit->outer)); - } - } - else { - /* We have a nested iter situation */ - char *subspace_baseptrs[2]; - char **subspace_ptrs = mit->subspace_ptrs; - npy_intp *subspace_strides = mit->subspace_strides; - - do { - self_ptr = baseoffset; - for (i=0; i < numiter; i++) { - indval = *((npy_intp*)outer_ptrs[i]); - if (indval < 0) { - indval += fancy_dims[i]; - } - - self_ptr += indval * fancy_strides[i]; - } - - subspace_baseptrs[0] = self_ptr; - subspace_baseptrs[1] = mit->extra_op_ptrs[0]; - - /* (can't really fail, since no buffering necessary) */ - if (!NpyIter_ResetBasePointers(mit->subspace_iter, - subspace_baseptrs, NULL)) { - return -1; - } - if (needs_api && PyErr_Occurred()) { - /* - * NOTE: this check is in principle on necessary if - * buffering is used, in principle could *not* - * use buffering for the subspace case and instead - * directly use the strided dtype transfer function. - */ - return -1; - } - - counter = NpyIter_GetInnerLoopSizePtr(mit->subspace_iter); - - do { - copyswapn(subspace_ptrs[0], subspace_strides[0], - subspace_ptrs[1], subspace_strides[1], - *counter, 0, extra_op); - } while (mit->subspace_next(mit->subspace_iter)); - - mit->extra_op_next(mit->extra_op_iter); - } while (mit->outer_next(mit->outer)); - } - - return 0; -} - - /* * This function handles all index preparations with the exception * of field access. It fills the array of index_info structs correctly. @@ -1505,6 +1217,18 @@ array_ass_boolean_subscript(PyArrayObject *self, NpyIter_Deallocate(iter); } + if (needs_api) { + /* + * FIXME?: most assignment operations stop after the first occurance + * of an error. Boolean does not currently, but should at least + * report the error. (This is only relevant for things like str->int + * casts which call into python) + */ + if (PyErr_Occurred()) { + return -1; + } + } + return 0; } @@ -1685,7 +1409,12 @@ array_subscript(PyArrayObject *self, PyObject *op) goto finish_view; } - if (PyArray_GetMap(mit) < 0) { + /* Reset the outer iterator */ + if (NpyIter_Reset(mit->outer, NULL) < 0) { + goto finish_view; + } + + if (mapiter_get(mit) < 0) { /* TODO: Check if safe for object types. */ Py_DECREF(mit); goto finish_view; @@ -1944,9 +1673,7 @@ array_ass_sub(PyArrayObject *self, PyObject *ind, PyObject *op) mit = (PyArrayMapIterObject *)PyArray_MapIterNew(indices, index_num, index_type, ndim, fancy_ndim, self, - view, - NPY_ITER_BUFFERED | - NPY_ITER_GROWINNER, + view, 0, NPY_ITER_WRITEONLY, ((tmp_arr == NULL) ? NPY_ITER_READWRITE : @@ -1966,6 +1693,11 @@ array_ass_sub(PyArrayObject *self, PyObject *ind, PyObject *op) } } + /* Can now reset the outer iterator (delayed bufalloc) */ + if (NpyIter_Reset(mit->outer, NULL) < 0) { + goto fail; + } + if (PyArray_MapIterCheckIndices(mit) < 0) { Py_DECREF((PyObject *)mit); goto fail; @@ -1976,7 +1708,7 @@ array_ass_sub(PyArrayObject *self, PyObject *ind, PyObject *op) * not care about safe casting. */ - if (PyArray_SetMap(mit) < 0) { + if (mapiter_set(mit) < 0) { goto fail; } @@ -2483,6 +2215,9 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) * be NULL (if extra_op_flags is not 0). * * @return A new MapIter (PyObject *) which still requires binding or NULL. + * + * NOTE: The outer iteration (and subspace if requested buffered) is + * created with DELAY_BUFALLOC. It must be reset before usage! */ NPY_NO_EXPORT PyObject * PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type, @@ -2903,7 +2638,17 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type, index_arrays[1] = extra_op; op_axes[1] = &single_op_axis[mit->nd_fancy]; - dtypes[1] = extra_op_dtype; + + /* + * Buffering is never used here, but in case someone plugs it in + * somewhere else, set the type correctly then. + */ + if ((subspace_iter_flags & NPY_ITER_BUFFERED)) { + dtypes[1] = extra_op_dtype; + } + else { + dtypes[1] = NULL; + } op_flags[1] = extra_op_flags; } else { diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index f2e5d02c3..a2cfc8653 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -326,6 +326,12 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, PyArray_MaskedStridedUnaryOp *stransfer, NpyAuxData *data); +NPY_NO_EXPORT int +mapiter_get(PyArrayMapIterObject *mit); + +NPY_NO_EXPORT int +mapiter_set(PyArrayMapIterObject *mit); + /* * Prepares shape and strides for a simple raw array iteration. * This sorts the strides into FORTRAN order, reverses any negative |