diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2013-10-25 03:40:49 +0200 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2014-02-06 17:51:59 +0100 |
commit | fa35a252de2e7ba30276ca30ab67775d4f2e9383 (patch) | |
tree | 04a930e89a3d08fdfa000b5d90b24013d1055bfd | |
parent | 020ec9612e522f2bc6dffb54da00cc4919457541 (diff) | |
download | numpy-fa35a252de2e7ba30276ca30ab67775d4f2e9383.tar.gz |
ENH: Add trivial loop special cases for fancy indexing
This optimizes simple contiguous loops for a single fancy
index into a 1-d array and also for general index checking
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 95 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 110 | ||||
-rw-r--r-- | numpy/core/src/private/lowlevel_strided_loops.h | 8 |
3 files changed, 207 insertions, 6 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index abc200baa..d05c69096 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -1365,6 +1365,100 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, */ NPY_NO_EXPORT int +mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind, + PyArrayObject *result) +{ + char *base_ptr, *self_ptr, *ind_ptr, *result_ptr; + npy_intp self_stride, ind_stride, result_stride; + npy_intp fancy_dim = PyArray_DIM(self, 0); + + npy_intp itersize; + npy_intp indval; + + int is_aligned = PyArray_ISALIGNED(self) && PyArray_ISALIGNED(result); + int needs_api = PyDataType_REFCHK(PyArray_DESCR(self)); + + PyArray_CopySwapFunc *copyswap = PyArray_DESCR(self)->f->copyswap; + + base_ptr = PyArray_BYTES(self); + self_stride = PyArray_STRIDE(self, 0); + + PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(ind, result, itersize, + ind_ptr, result_ptr, + ind_stride, result_stride) + +#if !@isget@ + /* Check the indices beforehand */ + while (itersize--) { + indval = *((npy_intp*)ind_ptr); + if (check_and_adjust_index(&indval, fancy_dim, 1) < 0 ) { + return -1; + } + ind_ptr += ind_stride; + } + + /* + * Reset the ind_ptr and itersize, due to broadcasting it is always + * the size of ind. + */ + ind_ptr = PyArray_BYTES(ind); + itersize = PyArray_SIZE(ind); +#endif + + /* Optimization for aligned types that do not need the api */ + switch ((is_aligned && !needs_api) ? PyArray_ITEMSIZE(self) : 0) { + +/**begin repeat1 + * #elsize = 1, 2, 4, 8, 0# + * #copytype = npy_uint8, npy_uint16, npy_uint32, npy_uint64, 0# + */ + +#if @elsize@ + case @elsize@: +#else + default: +#endif + while (itersize--) { + indval = *((npy_intp*)ind_ptr); +#if @isget@ + if (check_and_adjust_index(&indval, fancy_dim, 1) < 0 ) { + return -1; + } +#else + if (indval < 0) { + indval += fancy_dim; + } +#endif + self_ptr = base_ptr + indval * self_stride; + +#if @isget@ +#if @elsize@ + *(@copytype@ *)result_ptr = *(@copytype@ *)self_ptr; +#else + copyswap(result_ptr, self_ptr, 0, self); +#endif + +#else /* !@isget@ */ +#if @elsize@ + *(@copytype@ *)self_ptr = *(@copytype@ *)result_ptr; +#else + copyswap(self_ptr, result_ptr, 0, self); +#endif +#endif + + ind_ptr += ind_stride; + result_ptr += result_stride; + } + break; + +/**end repeat1**/ + } + + return 0; +} + + +NPY_NO_EXPORT int mapiter_@name@(PyArrayMapIterObject *mit) { npy_intp *counter, count; @@ -1487,6 +1581,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) /**end repeat2**/ } + return 0; } /**end repeat1**/ } diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 868de929a..bf148cc3a 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1365,7 +1365,7 @@ array_subscript(PyArrayObject *self, PyObject *op) goto finish; } - /* If it is only a single ellipsis, just return self */ + /* If it is only a single ellipsis, just return a view */ else if (index_type == HAS_ELLIPSIS) { /* * TODO: Should this be a view or not? The only reason not would be @@ -1406,6 +1406,46 @@ array_subscript(PyArrayObject *self, PyObject *op) goto finish; } + /* + * Special case for very simple 1-d fancy indexing, which however + * is quite common. This saves not only a lot of setup time in the + * iterator, but also is faster (must be exactly fancy because + * we don't support 0-d booleans here) + */ + if (index_type == HAS_FANCY && + index_num == 1) { + /* The array being indexed has one dimension and it is a fancy index */ + PyArrayObject *ind = indices[0].object; + + /* Check if the index is simple enough */ + if (PyArray_TRIVIALLY_ITERABLE(ind) && + PyArray_DESCR(ind)->type_num == NPY_INTP && + PyArray_ISALIGNED(ind) && PyArray_ISNBO(PyArray_DESCR(ind))) { + + Py_INCREF(PyArray_DESCR(self)); + result = PyArray_NewFromDescr(&PyArray_Type, + PyArray_DESCR(self), + PyArray_NDIM(ind), + PyArray_SHAPE(ind), + NULL, NULL, + /* Same order as indices */ + PyArray_ISFORTRAN(ind) ? + NPY_ARRAY_F_CONTIGUOUS : 0, + NULL); + if (result == NULL) { + goto finish; + } + + if (mapiter_trivial_get(self, ind, (PyArrayObject *)result) < 0) { + Py_DECREF(result); + result = NULL; + goto finish; + } + + goto wrap_out_array; + } + } + /* fancy indexing has to be used. And view is the subspace. */ PyArrayMapIterObject * mit; mit = (PyArrayMapIterObject *)PyArray_MapIterNew(indices, index_num, @@ -1439,6 +1479,7 @@ array_subscript(PyArrayObject *self, PyObject *op) Py_DECREF(mit); + wrap_out_array: if (!PyArray_CheckExact(self)) { /* * Need to create a new array as if the old one never existed. @@ -1668,6 +1709,39 @@ array_ass_sub(PyArrayObject *self, PyObject *ind, PyObject *op) tmp_arr = (PyArrayObject *)op; } + /* + * Special case for very simple 1-d fancy indexing, which however + * is quite common. This saves not only a lot of setup time in the + * iterator, but also is faster (must be exactly fancy because + * we don't support 0-d booleans here) + */ + if (index_type == HAS_FANCY && + index_num == 1 && tmp_arr) { + /* The array being indexed has one dimension and it is a fancy index */ + PyArrayObject *ind = indices[0].object; + + /* Check if the type is equivalent */ + if (PyArray_EquivTypes(PyArray_DESCR(self), + PyArray_DESCR(tmp_arr)) && + /* + * Either they are equivalent, or the values must + * be a scalar + */ + (PyArray_EQUIVALENTLY_ITERABLE(ind, tmp_arr) || + (PyArray_NDIM(tmp_arr) == 0 && + PyArray_TRIVIALLY_ITERABLE(tmp_arr))) && + /* Check the type/alginment of the index */ + PyArray_DESCR(ind)->type_num == NPY_INTP && + PyArray_ISALIGNED(ind) && PyArray_ISNBO(PyArray_DESCR(ind))) { + + /* trivial_set checks the index for us */ + if (mapiter_trivial_set(self, ind, tmp_arr) < 0) { + goto fail; + } + goto success; + } + } + /* fancy indexing has to be used. And view is the subspace. */ /* @@ -2139,6 +2213,7 @@ mapiter_fill_info(PyArrayMapIterObject *mit, npy_index_info *indices, NPY_NO_EXPORT int PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) { + PyArrayObject *op; NpyIter *op_iter; NpyIter_IterNextFunc *op_iternext; npy_intp outer_dim, indval; @@ -2155,15 +2230,41 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) } intp_type = PyArray_DescrFromType(NPY_INTP); + for (i=0; i < mit->numiter; i++) { - op_iter = NpyIter_New(NpyIter_GetOperandArray(mit->outer)[i], + op = NpyIter_GetOperandArray(mit->outer)[i]; + + outer_dim = mit->fancy_dims[i]; + outer_axis = mit->iteraxes[i]; + + /* See if it is possible to just trivially iterate the array */ + if (PyArray_TRIVIALLY_ITERABLE(op) && + PyArray_DESCR(op)->type_num == NPY_INTP && + PyArray_ISALIGNED(op) && PyArray_ISNBO(PyArray_DESCR(op))) { + char *data; + npy_intp stride; + + PyArray_PREPARE_TRIVIAL_ITERATION(op, itersize, data, stride); + + while (itersize--) { + indval = *((npy_intp*)data); + if (check_and_adjust_index(&indval, + outer_dim, outer_axis) < 0 ) { + return -1; + } + data += stride; + } + continue; + } + + /* Use NpyIter if the trivial iteration is not possible */ + op_iter = NpyIter_New(op, NPY_ITER_BUFFERED | NPY_ITER_NBO | NPY_ITER_ALIGNED | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_GROWINNER | NPY_ITER_READONLY, NPY_KEEPORDER, NPY_SAFE_CASTING, intp_type); if (op_iter == NULL) { - /* Should be impossible */ Py_DECREF(intp_type); return -1; } @@ -2175,9 +2276,6 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) return -1; } - outer_dim = mit->fancy_dims[i]; - outer_axis = mit->iteraxes[i]; - iterptr = NpyIter_GetDataPtrArray(op_iter); iterstride = NpyIter_GetInnerStrideArray(op_iter); do { diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index a2cfc8653..c74cd901a 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -327,6 +327,14 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, NpyAuxData *data); NPY_NO_EXPORT int +mapiter_trivial_get(PyArrayObject *self, PyArrayObject *ind, + PyArrayObject *result); + +NPY_NO_EXPORT int +mapiter_trivial_set(PyArrayObject *self, PyArrayObject *ind, + PyArrayObject *result); + +NPY_NO_EXPORT int mapiter_get(PyArrayMapIterObject *mit); NPY_NO_EXPORT int |