diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-07-29 11:55:00 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:48 -0600 |
commit | d0d73c2544400acdd7296ce6ba0d0a5f8e534ed7 (patch) | |
tree | 55634c8527d7b63a3386adbe7a04fd0703da5a93 /numpy | |
parent | 4839dfa7bccdc89d6b8950ad702c23e48e19a3ba (diff) | |
download | numpy-d0d73c2544400acdd7296ce6ba0d0a5f8e534ed7.tar.gz |
ENH: missingdata: Write NA mask support into PyArray_CopyInto
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/conversion_utils.c | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 317 |
2 files changed, 313 insertions, 8 deletions
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c index 74d7e1192..461bd565f 100644 --- a/numpy/core/src/multiarray/conversion_utils.c +++ b/numpy/core/src/multiarray/conversion_utils.c @@ -405,7 +405,7 @@ PyArray_PyIntAsInt(PyObject *o) goto finish; } #endif - if (Py_TYPE(o)->tp_as_number != NULL && \ + if (Py_TYPE(o)->tp_as_number != NULL && Py_TYPE(o)->tp_as_number->nb_int != NULL) { obj = Py_TYPE(o)->tp_as_number->nb_int(o); if (obj == NULL) { @@ -415,7 +415,7 @@ PyArray_PyIntAsInt(PyObject *o) Py_DECREF(obj); } #if !defined(NPY_PY3K) - else if (Py_TYPE(o)->tp_as_number != NULL && \ + else if (Py_TYPE(o)->tp_as_number != NULL && Py_TYPE(o)->tp_as_number->nb_long != NULL) { obj = Py_TYPE(o)->tp_as_number->nb_long(o); if (obj == NULL) { diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 47b021e08..cd9d6a612 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -478,8 +478,11 @@ PyArray_MoveInto(PyArrayObject *dst, PyArrayObject *src) * it for most cases. It may still incorrectly handle copying of * partially-overlapping data elements, where the data pointer was offset * by a fraction of the element size. + * + * For NA masked arrays, we always use the overlapping check and + * copy to handle this. */ - if ((PyArray_NDIM(dst) == 1 && + if ((!PyArray_HASMASKNA(dst) && PyArray_NDIM(dst) == 1 && PyArray_NDIM(src) == 1 && PyArray_STRIDE(dst, 0) > 0 && PyArray_STRIDE(src, 0) > 0) || @@ -498,6 +501,15 @@ PyArray_MoveInto(PyArrayObject *dst, PyArrayObject *src) if (tmp == NULL) { return -1; } + + /* Make the temporary copy have an NA mask if necessary */ + if (PyArray_HASMASKNA(src)) { + if (PyArray_AllocateMaskNA(tmp, 1, 0) < 0) { + Py_DECREF(tmp); + return -1; + } + } + ret = PyArray_CopyInto(tmp, src); if (ret == 0) { ret = PyArray_CopyInto(dst, tmp); @@ -2984,8 +2996,7 @@ PyArray_CopyAnyInto(PyArrayObject *dst, PyArrayObject *src) NPY_NO_EXPORT int PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) { - PyArray_StridedTransferFn *stransfer = NULL; - NpyAuxData *transferdata = NULL; + int src_has_maskna, dst_has_maskna; NPY_BEGIN_THREADS_DEF; if (!PyArray_ISWRITEABLE(dst)) { @@ -2994,8 +3005,27 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) return -1; } - if (PyArray_NDIM(dst) >= PyArray_NDIM(src) && + src_has_maskna = PyArray_HASMASKNA(src); + dst_has_maskna = PyArray_HASMASKNA(dst); + /* Can't copy an NA to an array which doesn't support it */ + if (src_has_maskna && !dst_has_maskna) { + if (PyArray_ContainsNA(src)) { + PyErr_SetString(PyExc_ValueError, + "Cannot assign NA value to an array which " + "does not support NAs"); + return -1; + } + /* If there are no actual NAs, allow the copy */ + else { + src_has_maskna = 0; + } + } + + /* Special case for simple strides and no NA mask */ + if (!dst_has_maskna && PyArray_NDIM(dst) >= PyArray_NDIM(src) && PyArray_TRIVIALLY_ITERABLE_PAIR(dst, src)) { + PyArray_StridedTransferFn *stransfer = NULL; + NpyAuxData *transferdata = NULL; char *dst_data, *src_data; npy_intp count, dst_stride, src_stride, src_itemsize; @@ -3044,7 +3074,10 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) return PyErr_Occurred() ? -1 : 0; } - else { + /* Copying unmasked into unmasked */ + else if (!dst_has_maskna) { + PyArray_StridedTransferFn *stransfer = NULL; + NpyAuxData *transferdata = NULL; PyArrayObject *op[2]; npy_uint32 op_flags[2]; PyArray_Descr *op_dtypes_values[2], **op_dtypes = NULL; @@ -3067,7 +3100,11 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) */ /*op_flags[0] = NPY_ITER_WRITEONLY|NPY_ITER_NO_BROADCAST;*/ op_flags[0] = NPY_ITER_WRITEONLY; - op_flags[1] = NPY_ITER_READONLY; + /* + * If src has an NA mask, it was already confirmed to + * contain no NA values, so ignoring the NA mask is fine. + */ + op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA; /* * If 'src' is being broadcast to 'dst', and it is smaller @@ -3148,6 +3185,274 @@ PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src) return PyErr_Occurred() ? -1 : 0; } + /* Copying non NA-masked into NA-masked */ + else if (!src_has_maskna) { + PyArray_StridedTransferFn *stransfer = NULL; + NpyAuxData *transferdata = NULL; + PyArrayObject *op[2]; + npy_uint32 op_flags[2]; + PyArray_Descr *op_dtypes_values[2], **op_dtypes = NULL; + NpyIter *iter; + npy_intp src_size; + + NpyIter_IterNextFunc *iternext; + char **dataptr; + npy_intp *stride; + npy_intp *countptr; + npy_intp src_itemsize; + int needs_api; + + op[0] = dst; + op[1] = src; + /* + * TODO: In NumPy 2.0, reenable NPY_ITER_NO_BROADCAST. This + * was removed during NumPy 1.6 testing for compatibility + * with NumPy 1.5, as per Travis's -10 veto power. + */ + /*op_flags[0] = NPY_ITER_WRITEONLY|NPY_ITER_NO_BROADCAST|NPY_ITER_USE_MASKNA;*/ + op_flags[0] = NPY_ITER_WRITEONLY | NPY_ITER_USE_MASKNA; + op_flags[1] = NPY_ITER_READONLY; + + /* + * If 'src' is being broadcast to 'dst', and it is smaller + * than the default NumPy buffer size, allow the iterator to + * make a copy of 'src' with the 'dst' dtype if necessary. + * + * This is a performance operation, to allow fewer casts followed + * by more plain copies. + */ + src_size = PyArray_SIZE(src); + if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) { + op_flags[1] |= NPY_ITER_COPY; + op_dtypes = op_dtypes_values; + op_dtypes_values[0] = NULL; + op_dtypes_values[1] = PyArray_DESCR(dst); + } + + iter = NpyIter_MultiNew(2, op, + NPY_ITER_EXTERNAL_LOOP| + NPY_ITER_REFS_OK| + NPY_ITER_ZEROSIZE_OK, + NPY_KEEPORDER, + NPY_UNSAFE_CASTING, + op_flags, + op_dtypes); + if (iter == NULL) { + return -1; + } + + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter); + return -1; + } + dataptr = NpyIter_GetDataPtrArray(iter); + stride = NpyIter_GetInnerStrideArray(iter); + countptr = NpyIter_GetInnerLoopSizePtr(iter); + src_itemsize = PyArray_DESCR(src)->elsize; + + needs_api = NpyIter_IterationNeedsAPI(iter); + + /* + * Because buffering is disabled in the iterator, the inner loop + * strides will be the same throughout the iteration loop. Thus, + * we can pass them to this function to take advantage of + * contiguous strides, etc. + */ + if (PyArray_GetDTypeTransferFunction( + PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), + stride[1], stride[0], + NpyIter_GetDescrArray(iter)[1], PyArray_DESCR(dst), + 0, + &stransfer, &transferdata, + &needs_api) != NPY_SUCCEED) { + NpyIter_Deallocate(iter); + return -1; + } + + + if (NpyIter_GetIterSize(iter) != 0) { + /* Because buffering is disabled, this stride will be fixed */ + npy_intp maskna_stride = stride[2]; + if (!needs_api) { + NPY_BEGIN_THREADS; + } + + /* Specialize for contiguous mask stride */ + if (maskna_stride == 1) { + do { + char *maskna_ptr = dataptr[2]; + npy_intp count = *countptr; + + stransfer(dataptr[0], stride[0], + dataptr[1], stride[1], + count, src_itemsize, transferdata); + memset(maskna_ptr, 1, count); + } while(iternext(iter)); + } + else { + do { + char *maskna_ptr = dataptr[2]; + npy_intp count = *countptr; + + stransfer(dataptr[0], stride[0], + dataptr[1], stride[1], + count, src_itemsize, transferdata); + while (count-- != 0) { + *maskna_ptr = 1; + maskna_ptr += maskna_stride; + } + } while(iternext(iter)); + } + + if (!needs_api) { + NPY_END_THREADS; + } + } + + NPY_AUXDATA_FREE(transferdata); + NpyIter_Deallocate(iter); + + return PyErr_Occurred() ? -1 : 0; + } + /* Copying NA-masked into NA-masked */ + else { + PyArray_MaskedStridedTransferFn *stransfer = NULL; + NpyAuxData *transferdata = NULL; + PyArrayObject *op[2]; + npy_uint32 op_flags[2]; + PyArray_Descr *op_dtypes_values[2], **op_dtypes = NULL; + NpyIter *iter; + npy_intp src_size; + + NpyIter_IterNextFunc *iternext; + char **dataptr; + npy_intp *stride; + npy_intp *countptr; + npy_intp src_itemsize; + int needs_api; + + op[0] = dst; + op[1] = src; + /* + * TODO: In NumPy 2.0, reenable NPY_ITER_NO_BROADCAST. This + * was removed during NumPy 1.6 testing for compatibility + * with NumPy 1.5, as per Travis's -10 veto power. + */ + /*op_flags[0] = NPY_ITER_WRITEONLY|NPY_ITER_NO_BROADCAST|NPY_ITER_USE_MASKNA;*/ + op_flags[0] = NPY_ITER_WRITEONLY | NPY_ITER_USE_MASKNA; + op_flags[1] = NPY_ITER_READONLY | NPY_ITER_USE_MASKNA; + + /* + * If 'src' is being broadcast to 'dst', and it is smaller + * than the default NumPy buffer size, allow the iterator to + * make a copy of 'src' with the 'dst' dtype if necessary. + * + * This is a performance operation, to allow fewer casts followed + * by more plain copies. + */ + src_size = PyArray_SIZE(src); + if (src_size <= NPY_BUFSIZE && src_size < PyArray_SIZE(dst)) { + op_flags[1] |= NPY_ITER_COPY; + op_dtypes = op_dtypes_values; + op_dtypes_values[0] = NULL; + op_dtypes_values[1] = PyArray_DESCR(dst); + } + + iter = NpyIter_MultiNew(2, op, + NPY_ITER_EXTERNAL_LOOP| + NPY_ITER_REFS_OK| + NPY_ITER_ZEROSIZE_OK, + NPY_KEEPORDER, + NPY_UNSAFE_CASTING, + op_flags, + op_dtypes); + if (iter == NULL) { + return -1; + } + + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter); + return -1; + } + dataptr = NpyIter_GetDataPtrArray(iter); + stride = NpyIter_GetInnerStrideArray(iter); + countptr = NpyIter_GetInnerLoopSizePtr(iter); + src_itemsize = PyArray_DESCR(src)->elsize; + + needs_api = NpyIter_IterationNeedsAPI(iter); + + /* + * Because buffering is disabled in the iterator, the inner loop + * strides will be the same throughout the iteration loop. Thus, + * we can pass them to this function to take advantage of + * contiguous strides, etc. + */ + if (PyArray_GetMaskedDTypeTransferFunction( + PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), + stride[1], stride[0], stride[3], + NpyIter_GetDescrArray(iter)[1], + PyArray_DESCR(dst), + PyArray_MASKNA_DTYPE(src), + 0, + &stransfer, &transferdata, + &needs_api) != NPY_SUCCEED) { + NpyIter_Deallocate(iter); + return -1; + } + + + if (NpyIter_GetIterSize(iter) != 0) { + /* Because buffering is disabled, this stride will be fixed */ + npy_intp dst_maskna_stride = stride[2]; + npy_intp src_maskna_stride = stride[3]; + if (!needs_api) { + NPY_BEGIN_THREADS; + } + + /* Specialize for contiguous mask stride */ + if (src_maskna_stride == 1 && dst_maskna_stride == 1) { + do { + char *dst_maskna_ptr = dataptr[2]; + char *src_maskna_ptr = dataptr[3]; + npy_intp count = *countptr; + + stransfer(dataptr[0], stride[0], + dataptr[1], stride[1], + (npy_mask *)src_maskna_ptr, src_maskna_stride, + count, src_itemsize, transferdata); + memcpy(dst_maskna_ptr, src_maskna_ptr, count); + } while(iternext(iter)); + } + else { + do { + char *dst_maskna_ptr = dataptr[2]; + char *src_maskna_ptr = dataptr[3]; + npy_intp count = *countptr; + + stransfer(dataptr[0], stride[0], + dataptr[1], stride[1], + (npy_mask *)src_maskna_ptr, src_maskna_stride, + count, src_itemsize, transferdata); + while (count-- != 0) { + *dst_maskna_ptr = *src_maskna_ptr; + src_maskna_ptr += src_maskna_stride; + dst_maskna_ptr += dst_maskna_stride; + } + } while(iternext(iter)); + } + + if (!needs_api) { + NPY_END_THREADS; + } + } + + NPY_AUXDATA_FREE(transferdata); + NpyIter_Deallocate(iter); + + return PyErr_Occurred() ? -1 : 0; + } } /*NUMPY_API |