diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-07-10 11:45:53 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-07-11 09:15:32 -0600 |
commit | a99a19437e1fe8547a6a002a4b247875fd9d482b (patch) | |
tree | 8462b06bdf8c5916b70b1b3f7b3d37e7ffc68872 /numpy | |
parent | 0ce8c51ef2f25c54c34c7c47c5ccbf0ce81cecac (diff) | |
download | numpy-a99a19437e1fe8547a6a002a4b247875fd9d482b.tar.gz |
ENH: core: Performance optimization in CopyInto, when both casting and broadcasting
When both casting and broadcasting are occurring, and the total size of
'src' is smaller than the default NumPy buffersize, this tells the iterator
to make a copy with the 'dst' dtype so most of the operations are simple
copies with no cast.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 51 | ||||
-rw-r--r-- | numpy/core/tests/test_api.py | 2 |
2 files changed, 45 insertions, 8 deletions
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 9388c41a7..6100fe7ee 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -2799,7 +2799,9 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) else { PyArrayObject *op[2]; npy_uint32 op_flags[2]; + PyArray_Descr *op_dtypes_values[2], **op_dtypes = NULL; NpyIter *iter; + npy_intp src_size; NpyIter_IterNextFunc *iternext; char **dataptr; @@ -2811,7 +2813,7 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) op[0] = dst; op[1] = src; /* - * TODO: In NumPy 2.0, renable NPY_ITER_NO_BROADCAST. This + * TODO: In NumPy 2.0, reenable NPY_ITER_NO_BROADCAST. This * was removed during NumPy 1.6 testing for compatibility * with NumPy 1.5, as per Travis's -10 veto power. */ @@ -2819,14 +2821,30 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) op_flags[0] = NPY_ITER_WRITEONLY; op_flags[1] = NPY_ITER_READONLY; + /* + * If 'src' is being broadcast to 'dst', and it is smaller + * than the default NumPy buffer size, allow the iterator to + * make a copy of 'src' with the 'dst' dtype if necessary. + * + * This is a performance operation, to allow fewer casts followed + * by more plain copies. + */ + src_size = PyArray_SIZE(src); + if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) { + op_flags[1] |= NPY_ITER_COPY; + op_dtypes = op_dtypes_values; + op_dtypes_values[0] = NULL; + op_dtypes_values[1] = PyArray_DESCR(dst); + } + iter = NpyIter_MultiNew(2, op, NPY_ITER_EXTERNAL_LOOP| NPY_ITER_REFS_OK| NPY_ITER_ZEROSIZE_OK, NPY_KEEPORDER, - NPY_NO_CASTING, + NPY_UNSAFE_CASTING, op_flags, - NULL); + op_dtypes); if (iter == NULL) { return -1; } @@ -2852,7 +2870,7 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) if (PyArray_GetDTypeTransferFunction( PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), stride[1], stride[0], - PyArray_DESCR(src), PyArray_DESCR(dst), + NpyIter_GetDescrArray(iter)[1], PyArray_DESCR(dst), 0, &stransfer, &transferdata, &needs_api) != NPY_SUCCEED) { @@ -2983,7 +3001,9 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src, else { PyArrayObject *op[3]; npy_uint32 op_flags[3]; + PyArray_Descr *op_dtypes_values[3], **op_dtypes = NULL; NpyIter *iter; + npy_intp src_size; NpyIter_IterNextFunc *iternext; char **dataptr; @@ -3005,14 +3025,31 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src, op_flags[1] = NPY_ITER_READONLY; op_flags[2] = NPY_ITER_READONLY; + /* + * If 'src' is being broadcast to 'dst', and it is smaller + * than the default NumPy buffer size, allow the iterator to + * make a copy of 'src' with the 'dst' dtype if necessary. + * + * This is a performance operation, to allow fewer casts followed + * by more plain copies. + */ + src_size = PyArray_SIZE(src); + if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) { + op_flags[1] |= NPY_ITER_COPY; + op_dtypes = op_dtypes_values; + op_dtypes_values[0] = NULL; + op_dtypes_values[1] = PyArray_DESCR(dst); + op_dtypes_values[2] = NULL; + } + iter = NpyIter_MultiNew(3, op, NPY_ITER_EXTERNAL_LOOP| NPY_ITER_REFS_OK| NPY_ITER_ZEROSIZE_OK, NPY_KEEPORDER, - NPY_NO_CASTING, + NPY_UNSAFE_CASTING, op_flags, - NULL); + op_dtypes); if (iter == NULL) { return -1; } @@ -3038,7 +3075,7 @@ PyArray_MaskedCopyInto(PyArrayObject *dst, PyArrayObject *src, if (PyArray_GetMaskedDTypeTransferFunction( PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), stride[1], stride[0], stride[2], - PyArray_DESCR(src), + NpyIter_GetDescrArray(iter)[1], PyArray_DESCR(dst), PyArray_DESCR(mask), 0, diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 7ebcb932b..d2d8241f2 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -110,7 +110,7 @@ def test_copyto(): assert_raises(TypeError, np.copyto, a, 3.5, where=[True,False,True]) # Lists of integer 0's and 1's is ok too - np.copyto(a, 4, where=[[0,1,1], [1,0,0]]) + np.copyto(a, 4.0, casting='unsafe', where=[[0,1,1], [1,0,0]]) assert_equal(a, [[3,4,4], [4,1,3]]) # Overlapping copy with mask should work |