diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-08-16 16:54:16 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:55 -0600 |
commit | 9194b3af704df71aa9b1ff2f53f169848d0f9dc7 (patch) | |
tree | 1e6624032156dfa190e1adba79cc76e5700f19ce | |
parent | 99a21efff4b1f2292dc370c7c9c7c58f10385f2a (diff) | |
download | numpy-9194b3af704df71aa9b1ff2f53f169848d0f9dc7.tar.gz |
ENH: missingdata: Rewrite PyArray_Concatenate to work with NA masks
It should also have less memory usage for heterogeneous inputs,
because it no longer makes extra copies in that case.
-rw-r--r-- | doc/release/2.0.0-notes.rst | 3 | ||||
-rw-r--r-- | doc/source/reference/c-api.array.rst | 26 | ||||
-rw-r--r-- | numpy/add_newdocs.py | 7 | ||||
-rw-r--r-- | numpy/core/shape_base.py | 7 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 8 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 8 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 482 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_mask.c | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.c | 123 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.h | 14 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 7 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 23 | ||||
-rw-r--r-- | numpy/core/tests/test_shape_base.py | 2 |
15 files changed, 528 insertions, 188 deletions
diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst index d5aa99639..ddedf85de 100644 --- a/doc/release/2.0.0-notes.rst +++ b/doc/release/2.0.0-notes.rst @@ -29,9 +29,12 @@ What works with NA: * Array methods: + ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod, ndarray.conjugate, ndarray.diagonal + + numpy.concatenate What doesn't work with NA: * Fancy indexing, such as with lists and partial boolean masks. + * ndarray.flat and any other methods that use the old iterator + mechanism instead of the newer nditer. * UFunc.reduce of multi-dimensional arrays, with skipna=True and a ufunc that doesn't have an identity. * UFunc.accumulate, UFunc.reduceat. diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst index 498b2a448..369360407 100644 --- a/doc/source/reference/c-api.array.rst +++ b/doc/source/reference/c-api.array.rst @@ -1667,18 +1667,20 @@ Conversion copied into every location. A -1 is returned if an error occurs, otherwise 0 is returned. -.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype) - - Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new view of - the array *self* as possibly a different data-type, *dtype*. If - *dtype* is ``NULL``, then the returned array will have the same - data type as *self*. The new data-type must be consistent with - the size of *self*. Either the itemsizes must be identical, or - *self* must be single-segment and the total number of bytes must - be the same. In the latter case the dimensions of the returned - array will be altered in the last (or first for Fortran-style - contiguous arrays) dimension. The data area of the returned array - and self is exactly the same. +.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype, PyTypeObject *ptype) + + Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new + view of the array *self* as possibly a different data-type, *dtype*, + and different array subclass *ptype*. + + If *dtype* is ``NULL``, then the returned array will have the same + data type as *self*. The new data-type must be consistent with the + size of *self*. Either the itemsizes must be identical, or *self* must + be single-segment and the total number of bytes must be the same. + In the latter case the dimensions of the returned array will be + altered in the last (or first for Fortran-style contiguous arrays) + dimension. The data area of the returned array and self is exactly + the same. Shape Manipulation diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index d992a7122..82f59c6b0 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -3702,7 +3702,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('put', add_newdoc('numpy.core.multiarray', 'copyto', """ - copyto(dst, src, casting='same_kind', where=None) + copyto(dst, src, casting='same_kind', where=None, preservena=False) Copies values from `src` into `dst`, broadcasting as necessary. Raises a TypeError if the casting rule is violated, and if @@ -3725,10 +3725,13 @@ add_newdoc('numpy.core.multiarray', 'copyto', * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. - where : array_like of bool + where : array_like of bool, optional A boolean array which is broadcasted to match the dimensions of `dst`, and selects elements to copy from `src` to `dst` wherever it contains the value True. + preservena : bool, optional + If set to True, leaves any NA values in `dst` untouched. This + is similar to the "hard mask" feature in numpy.ma. """) diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py index 93de19299..8a4c80e27 100644 --- a/numpy/core/shape_base.py +++ b/numpy/core/shape_base.py @@ -267,5 +267,10 @@ def hstack(tup): [3, 4]]) """ - return _nx.concatenate(map(atleast_1d,tup),1) + arrs = map(atleast_1d,tup) + # As a special case, dimension 0 of 1-dimensional arrays is "horizontal" + if arrs[0].ndim == 1: + return _nx.concatenate(arrs, 0) + else: + return _nx.concatenate(arrs, 1) diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 0d72807ae..19b01cce9 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -1331,7 +1331,13 @@ NPY_NO_EXPORT PyArray_Descr * PyArray_MinScalarType(PyArrayObject *arr) { PyArray_Descr *dtype = PyArray_DESCR(arr); - if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num)) { + /* + * If the array isn't a numeric scalar or is a scalar but with + * its value masked out, just return the array's dtype. + */ + if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num) || + (PyArray_HASMASKNA(arr) && !NpyMaskValue_IsExposed( + (npy_mask)*PyArray_MASKNA_DATA(arr)))) { Py_INCREF(dtype); return dtype; } diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 48d81d75b..bcd33afd8 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1175,6 +1175,7 @@ PyArray_NewLikeArray(PyArrayObject *prototype, NPY_ORDER order, int idim; PyArray_CreateSortedStridePerm(PyArray_NDIM(prototype), + PyArray_SHAPE(prototype), PyArray_STRIDES(prototype), strideperm); diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 39a8ba1bc..943859ae5 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -3922,7 +3922,7 @@ PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape, } /* Sort the axes based on the destination strides */ - PyArray_CreateSortedStridePerm(ndim, strides, strideperm); + PyArray_CreateSortedStridePerm(ndim, shape, strides, strideperm); for (i = 0; i < ndim; ++i) { int iperm = strideperm[ndim - i - 1].perm; out_shape[i] = shape[iperm]; @@ -4052,7 +4052,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape, } /* Sort the axes based on the destination strides */ - PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm); + PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm); for (i = 0; i < ndim; ++i) { int iperm = strideperm[ndim - i - 1].perm; out_shape[i] = shape[iperm]; @@ -4186,7 +4186,7 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape, } /* Sort the axes based on the destination strides */ - PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm); + PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm); for (i = 0; i < ndim; ++i) { int iperm = strideperm[ndim - i - 1].perm; out_shape[i] = shape[iperm]; @@ -4324,7 +4324,7 @@ PyArray_PrepareFourRawArrayIter(int ndim, npy_intp *shape, } /* Sort the axes based on the destination strides */ - PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm); + PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm); for (i = 0; i < ndim; ++i) { int iperm = strideperm[ndim - i - 1].perm; out_shape[i] = shape[iperm]; diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index b4c212871..d23ecef83 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -48,6 +48,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "datetime_strings.h" #include "datetime_busday.h" #include "datetime_busdaycal.h" +#include "shape.h" +#include "ctors.h" #include "na_singleton.h" #include "na_mask.h" @@ -61,7 +63,7 @@ NPY_NO_EXPORT double PyArray_GetPriority(PyObject *obj, double default_) { PyObject *ret; - double priority = PyArray_PRIORITY; + double priority = NPY_PRIORITY; if (PyArray_CheckExact(obj)) return priority; @@ -202,7 +204,7 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd, break; case 2: n = PyArray_DIMS(ap)[0]; - ptr2 = (char **)_pya_malloc(n * sizeof(char *)); + ptr2 = (char **)PyArray_malloc(n * sizeof(char *)); if (!ptr2) { goto fail; } @@ -214,7 +216,7 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd, case 3: n = PyArray_DIMS(ap)[0]; m = PyArray_DIMS(ap)[1]; - ptr3 = (char ***)_pya_malloc(n*(m+1) * sizeof(char *)); + ptr3 = (char ***)PyArray_malloc(n*(m+1) * sizeof(char *)); if (!ptr3) { goto fail; } @@ -294,177 +296,354 @@ PyArray_Free(PyObject *op, void *ptr) return -1; } if (PyArray_NDIM(ap) >= 2) { - _pya_free(ptr); + PyArray_free(ptr); } Py_DECREF(ap); return 0; } -static PyObject * -_swap_and_concat(PyObject *op, int axis, int n) +/* + * Concatenates a list of ndarrays. + */ +NPY_NO_EXPORT PyArrayObject * +PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis) { - PyObject *newtup = NULL; - PyObject *otmp, *arr; - int i; + PyTypeObject *subtype = &PyArray_Type; + double priority = NPY_PRIORITY; + int iarrays, idim, ndim; + npy_intp shape[NPY_MAXDIMS], s, strides[NPY_MAXDIMS]; + int strideperm[NPY_MAXDIMS]; + PyArray_Descr *dtype = NULL; + PyArrayObject *ret = NULL; + PyArrayObject_fieldaccess *sliding_view = NULL; + int has_maskna; - newtup = PyTuple_New(n); - if (newtup == NULL) { + if (narrays <= 0) { + PyErr_SetString(PyExc_ValueError, + "need at least one array to concatenate"); return NULL; } - for (i = 0; i < n; i++) { - otmp = PySequence_GetItem(op, i); - arr = PyArray_FROM_O(otmp); - Py_DECREF(otmp); - if (arr == NULL) { - goto fail; + + /* All the arrays must have the same 'ndim' */ + ndim = PyArray_NDIM(arrays[0]); + + if (ndim == 0) { + PyErr_SetString(PyExc_ValueError, + "zero-dimensional arrays cannot be concatenated"); + return NULL; + } + + /* Handle standard Python negative indexing */ + if (axis < 0) { + axis += narrays; + } + if (axis < 0 || axis >= ndim) { + PyErr_SetString(PyExc_IndexError, + "axis out of bounds"); + return NULL; + } + + /* + * Figure out the final concatenated shape starting from the first + * array's shape. Also check whether any of the inputs have an + * NA mask. + */ + memcpy(shape, PyArray_SHAPE(arrays[0]), ndim * sizeof(shape[0])); + has_maskna = PyArray_HASMASKNA(arrays[0]); + for (iarrays = 1; iarrays < narrays; ++iarrays) { + npy_intp *arr_shape; + + if (PyArray_NDIM(arrays[iarrays]) != ndim) { + PyErr_SetString(PyExc_ValueError, + "all the input arrays must have same " + "number of dimensions"); + return NULL; } - otmp = PyArray_SwapAxes((PyArrayObject *)arr, axis, 0); - Py_DECREF(arr); - if (otmp == NULL) { - goto fail; + arr_shape = PyArray_SHAPE(arrays[iarrays]); + + for (idim = 0; idim < ndim; ++idim) { + /* Build up the size of the concatenation axis */ + if (idim == axis) { + shape[idim] += arr_shape[idim]; + } + /* Validate that the rest of the dimensions match */ + else if (shape[idim] != arr_shape[idim]) { + PyErr_SetString(PyExc_ValueError, + "all the input array dimensions " + "except for the concatenation axis " + "must match exactly"); + return NULL; + } + } + + has_maskna = has_maskna || PyArray_HASMASKNA(arrays[iarrays]); + } + + /* Get the priority subtype for the array */ + for (iarrays = 0; iarrays < narrays; ++iarrays) { + if (Py_TYPE(arrays[iarrays]) != subtype) { + double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); + if (pr > priority) { + priority = pr; + subtype = Py_TYPE(arrays[iarrays]); + } } - PyTuple_SET_ITEM(newtup, i, otmp); } - otmp = PyArray_Concatenate(newtup, 0); - Py_DECREF(newtup); - if (otmp == NULL) { + + /* Get the resulting dtype from combining all the arrays */ + dtype = PyArray_ResultType(narrays, arrays, 0, NULL); + if (dtype == NULL) { return NULL; } - arr = PyArray_SwapAxes((PyArrayObject *)otmp, axis, 0); - Py_DECREF(otmp); - return arr; - fail: - Py_DECREF(newtup); - return NULL; + /* + * Figure out the permutation to apply to the strides to match + * the memory layout of the input arrays, using ambiguity + * resolution rules matching that of the NpyIter. + */ + PyArray_CreateMultiSortedStridePerm(narrays, arrays, ndim, strideperm); + s = dtype->elsize; + for (idim = ndim-1; idim >= 0; --idim) { + int iperm = strideperm[idim]; + strides[iperm] = s; + s *= shape[iperm]; + } + + /* Allocate the array for the result. This steals the 'dtype' reference. */ + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, + dtype, + ndim, + shape, + strides, + NULL, + 0, + NULL); + if (ret == NULL) { + return NULL; + } + + /* Add an NA mask if required */ + if (has_maskna) { + if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) { + Py_DECREF(ret); + return NULL; + } + } + + /* + * Create a view which slides through ret for assigning the + * successive input arrays. + */ + sliding_view = (PyArrayObject_fieldaccess *)PyArray_View(ret, + NULL, &PyArray_Type); + if (sliding_view == NULL) { + Py_DECREF(ret); + return NULL; + } + for (iarrays = 0; iarrays < narrays; ++iarrays) { + /* Set the dimension to match the input array's */ + sliding_view->dimensions[axis] = PyArray_SHAPE(arrays[iarrays])[axis]; + + /* Copy the data for this array */ + if (PyArray_AssignArray((PyArrayObject *)sliding_view, arrays[iarrays], + NULL, NPY_SAME_KIND_CASTING, 0, NULL) < 0) { + Py_DECREF(sliding_view); + Py_DECREF(ret); + return NULL; + } + + /* Slide to the start of the next window */ + sliding_view->data += sliding_view->dimensions[axis] * + sliding_view->strides[axis]; + if (has_maskna) { + sliding_view->maskna_data += sliding_view->dimensions[axis] * + sliding_view->maskna_strides[axis]; + } + } + + Py_DECREF(sliding_view); + return ret; +} + +/* + * Concatenates a list of ndarrays, flattening each in C-order. + */ +NPY_NO_EXPORT PyArrayObject * +PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays, + NPY_ORDER order) +{ + PyTypeObject *subtype = &PyArray_Type; + double priority = NPY_PRIORITY; + int iarrays; + npy_intp shape[2], strides[2]; + PyArray_Descr *dtype = NULL; + PyArrayObject *ret = NULL; + PyArrayObject_fieldaccess *sliding_view = NULL; + int has_maskna; + + if (narrays <= 0) { + PyErr_SetString(PyExc_ValueError, + "need at least one array to concatenate"); + return NULL; + } + + /* All the arrays must have the same total number of elements */ + shape[0] = narrays; + shape[1] = PyArray_SIZE(arrays[0]); + + /* + * Figure out the final concatenated shape starting from the first + * array's shape. Also check whether any of the inputs have an + * NA mask. + */ + has_maskna = PyArray_HASMASKNA(arrays[0]); + for (iarrays = 1; iarrays < narrays; ++iarrays) { + if (PyArray_SIZE(arrays[iarrays]) != shape[1]) { + PyErr_SetString(PyExc_ValueError, + "all the input arrays must have same " + "number of elements"); + return NULL; + } + + has_maskna = has_maskna || PyArray_HASMASKNA(arrays[iarrays]); + } + + /* Get the priority subtype for the array */ + for (iarrays = 0; iarrays < narrays; ++iarrays) { + if (Py_TYPE(arrays[iarrays]) != subtype) { + double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0); + if (pr > priority) { + priority = pr; + subtype = Py_TYPE(arrays[iarrays]); + } + } + } + + /* Get the resulting dtype from combining all the arrays */ + dtype = PyArray_ResultType(narrays, arrays, 0, NULL); + if (dtype == NULL) { + return NULL; + } + + strides[1] = dtype->elsize; + strides[2] = strides[1] * shape[1]; + + /* Allocate the array for the result. This steals the 'dtype' reference. */ + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, + dtype, + 2, + shape, + strides, + NULL, + 0, + NULL); + if (ret == NULL) { + return NULL; + } + + /* Add an NA mask if required */ + if (has_maskna) { + if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) { + Py_DECREF(ret); + return NULL; + } + } + + /* + * Create a view which slides through ret for assigning the + * successive input arrays. + */ + sliding_view = (PyArrayObject_fieldaccess *)PyArray_View(ret, + NULL, &PyArray_Type); + if (sliding_view == NULL) { + Py_DECREF(ret); + return NULL; + } + /* Each array gets flattened into one slot along 'axis' */ + sliding_view->dimensions[0] = 1; + for (iarrays = 0; iarrays < narrays; ++iarrays) { + + /* Copy the data for this array */ + if (PyArray_CopyAsFlat((PyArrayObject *)sliding_view, arrays[iarrays], + order) < 0) { + Py_DECREF(sliding_view); + Py_DECREF(ret); + return NULL; + } + + /* Slide to the start of the next window */ + sliding_view->data += sliding_view->strides[0]; + if (has_maskna) { + sliding_view->maskna_data += sliding_view->maskna_strides[0]; + } + } + + Py_DECREF(sliding_view); + return ret; } + /*NUMPY_API * Concatenate * * Concatenate an arbitrary Python sequence into an array. * op is a python object supporting the sequence interface. * Its elements will be concatenated together to form a single - * multidimensional array. If axis is MAX_DIMS or bigger, then + * multidimensional array. If axis is NPY_MAXDIMS or bigger, then * each sequence object will be flattened before concatenation */ NPY_NO_EXPORT PyObject * PyArray_Concatenate(PyObject *op, int axis) { - PyArrayObject *ret, **mps; - PyObject *otmp; - int i, n, tmp, nd = 0, new_dim; - char *data; - PyTypeObject *subtype; - double prior1, prior2; - npy_intp numbytes; + int iarrays, narrays; + PyArrayObject **arrays; + PyArrayObject *ret; - n = PySequence_Length(op); - if (n == -1) { + /* Convert the input list into arrays */ + narrays = PySequence_Size(op); + if (narrays < 0) { return NULL; } - if (n == 0) { - PyErr_SetString(PyExc_ValueError, - "concatenation of zero-length sequences is "\ - "impossible"); + arrays = PyArray_malloc(narrays * sizeof(arrays[0])); + if (arrays == NULL) { + PyErr_NoMemory(); return NULL; } - - if ((axis < 0) || ((0 < axis) && (axis < MAX_DIMS))) { - return _swap_and_concat(op, axis, n); - } - mps = PyArray_ConvertToCommonType(op, &n); - if (mps == NULL) { - return NULL; - } - - /* - * Make sure these arrays are legal to concatenate. - * Must have same dimensions except d0 - */ - prior1 = PyArray_PRIORITY; - subtype = &PyArray_Type; - ret = NULL; - for (i = 0; i < n; i++) { - if (axis >= MAX_DIMS) { - otmp = PyArray_Ravel(mps[i],0); - Py_DECREF(mps[i]); - mps[i] = (PyArrayObject *)otmp; - } - if (Py_TYPE(mps[i]) != subtype) { - prior2 = PyArray_GetPriority((PyObject *)(mps[i]), 0.0); - if (prior2 > prior1) { - prior1 = prior2; - subtype = Py_TYPE(mps[i]); - } - } - } - - new_dim = 0; - for (i = 0; i < n; i++) { - if (mps[i] == NULL) { + for (iarrays = 0; iarrays < narrays; ++iarrays) { + PyObject *item = PySequence_GetItem(op, iarrays); + if (item == NULL) { + narrays = iarrays; goto fail; } - if (i == 0) { - nd = PyArray_NDIM(mps[i]); - } - else { - if (nd != PyArray_NDIM(mps[i])) { - PyErr_SetString(PyExc_ValueError, - "arrays must have same "\ - "number of dimensions"); - goto fail; - } - if (!PyArray_CompareLists(PyArray_DIMS(mps[0])+1, - PyArray_DIMS(mps[i])+1, - nd-1)) { - PyErr_SetString(PyExc_ValueError, - "array dimensions must "\ - "agree except for d_0"); - goto fail; - } - } - if (nd == 0) { - PyErr_SetString(PyExc_ValueError, - "0-d arrays can't be concatenated"); + arrays[iarrays] = (PyArrayObject *)PyArray_FromAny(item, NULL, + 0, 0, NPY_ARRAY_ALLOWNA, NULL); + Py_DECREF(item); + if (arrays[iarrays] == NULL) { + narrays = iarrays; goto fail; } - new_dim += PyArray_DIMS(mps[i])[0]; } - tmp = PyArray_DIMS(mps[0])[0]; - PyArray_DIMS(mps[0])[0] = new_dim; - Py_INCREF(PyArray_DESCR(mps[0])); - ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, - PyArray_DESCR(mps[0]), nd, - PyArray_DIMS(mps[0]), - NULL, NULL, 0, - (PyObject *)ret); - PyArray_DIMS(mps[0])[0] = tmp; - if (ret == NULL) { - goto fail; + if (axis >= NPY_MAXDIMS) { + ret = PyArray_ConcatenateFlattenedArrays(narrays, arrays, NPY_CORDER); } - data = PyArray_DATA(ret); - for (i = 0; i < n; i++) { - numbytes = PyArray_NBYTES(mps[i]); - memcpy(data, PyArray_DATA(mps[i]), numbytes); - data += numbytes; + else { + ret = PyArray_ConcatenateArrays(narrays, arrays, axis); } - PyArray_INCREF(ret); - for (i = 0; i < n; i++) { - Py_XDECREF(mps[i]); + for (iarrays = 0; iarrays < narrays; ++iarrays) { + Py_DECREF(arrays[iarrays]); } - PyDataMem_FREE(mps); + return (PyObject *)ret; - fail: - Py_XDECREF(ret); - for (i = 0; i < n; i++) { - Py_XDECREF(mps[i]); +fail: + /* 'narrays' was set to how far we got in the conversion */ + for (iarrays = 0; iarrays < narrays; ++iarrays) { + Py_DECREF(arrays[iarrays]); } - PyDataMem_FREE(mps); + return NULL; } @@ -652,7 +831,7 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) int typenum, nd, axis; npy_intp is1, is2, os; char *op; - npy_intp dimensions[MAX_DIMS]; + npy_intp dimensions[NPY_MAXDIMS]; PyArray_DotFunc *dot; PyArray_Descr *typec; NPY_BEGIN_THREADS_DEF; @@ -761,7 +940,7 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out) int typenum, nd, axis, matchDim; npy_intp is1, is2, os; char *op; - npy_intp dimensions[MAX_DIMS]; + npy_intp dimensions[NPY_MAXDIMS]; PyArray_DotFunc *dot; PyArray_Descr *typec; NPY_BEGIN_THREADS_DEF; @@ -1510,8 +1689,8 @@ PyArray_EquivTypenums(int typenum1, int typenum2) static PyObject * _prepend_ones(PyArrayObject *arr, int nd, int ndmin) { - npy_intp newdims[MAX_DIMS]; - npy_intp newstrides[MAX_DIMS]; + npy_intp newdims[NPY_MAXDIMS]; + npy_intp newstrides[NPY_MAXDIMS]; int i, k, num; PyArrayObject *ret; PyArray_Descr *dtype; @@ -1723,7 +1902,7 @@ array_copyto(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) goto fail; } wheremask = (PyArrayObject *)PyArray_FromAny(wheremask_in, - dtype, 0, 0, 0, NULL); + dtype, 0, 0, NPY_ARRAY_ALLOWNA, NULL); if (wheremask == NULL) { goto fail; } @@ -1879,7 +2058,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) } else { if (obj == NULL) { - dptr = _pya_malloc(typecode->elsize); + dptr = PyArray_malloc(typecode->elsize); if (dptr == NULL) { return PyErr_NoMemory(); } @@ -1904,7 +2083,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) /* free dptr which contains zeros */ if (alloc) { - _pya_free(dptr); + PyArray_free(dptr); } return ret; } @@ -2716,7 +2895,7 @@ static PyObject * array_set_datetimeparse_function(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds)) { - PyErr_SetString(PyExc_RuntimeError, "This function is to be removed"); + PyErr_SetString(PyExc_RuntimeError, "This function has been removed"); return NULL; } @@ -2823,7 +3002,7 @@ array_can_cast_safely(PyObject *NPY_UNUSED(self), PyObject *args, PyArray_IsPythonNumber(from_obj)) { PyArrayObject *arr; arr = (PyArrayObject *)PyArray_FromAny(from_obj, - NULL, 0, 0, 0, NULL); + NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL); if (arr == NULL) { goto finish; } @@ -2885,7 +3064,8 @@ array_min_scalar_type(PyObject *NPY_UNUSED(dummy), PyObject *args) return NULL; } - array = (PyArrayObject *)PyArray_FromAny(array_in, NULL, 0, 0, 0, NULL); + array = (PyArrayObject *)PyArray_FromAny(array_in, + NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL); if (array == NULL) { return NULL; } @@ -2930,7 +3110,7 @@ array_result_type(PyObject *NPY_UNUSED(dummy), PyObject *args) goto finish; } arr[narr] = (PyArrayObject *)PyArray_FromAny(obj, - NULL, 0, 0, 0, NULL); + NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL); if (arr[narr] == NULL) { goto finish; } @@ -3027,17 +3207,17 @@ _SigSegv_Handler(int signum) } #endif -#define _test_code() { \ - test = *((char*)memptr); \ - if (!ro) { \ - *((char *)memptr) = '\0'; \ - *((char *)memptr) = test; \ - } \ - test = *((char*)memptr+size-1); \ - if (!ro) { \ - *((char *)memptr+size-1) = '\0'; \ - *((char *)memptr+size-1) = test; \ - } \ +#define _test_code() { \ + test = *((char*)memptr); \ + if (!ro) { \ + *((char *)memptr) = '\0'; \ + *((char *)memptr) = test; \ + } \ + test = *((char*)memptr+size-1); \ + if (!ro) { \ + *((char *)memptr+size-1) = '\0'; \ + *((char *)memptr+size-1) = test; \ + } \ } static PyObject * @@ -3901,7 +4081,7 @@ PyMODINIT_FUNC initmultiarray(void) { if (!d) { goto err; } - PyArray_Type.tp_free = _pya_free; + PyArray_Type.tp_free = PyArray_free; if (PyType_Ready(&PyArray_Type) < 0) { return RETVAL; } @@ -3911,7 +4091,7 @@ PyMODINIT_FUNC initmultiarray(void) { PyArrayIter_Type.tp_iter = PyObject_SelfIter; NpyIter_Type.tp_iter = PyObject_SelfIter; PyArrayMultiIter_Type.tp_iter = PyObject_SelfIter; - PyArrayMultiIter_Type.tp_free = _pya_free; + PyArrayMultiIter_Type.tp_free = PyArray_free; if (PyType_Ready(&PyArrayIter_Type) < 0) { return RETVAL; } diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c index 6d7677d5e..0cb05beab 100644 --- a/numpy/core/src/multiarray/na_mask.c +++ b/numpy/core/src/multiarray/na_mask.c @@ -294,7 +294,8 @@ PyArray_AllocateMaskNA(PyArrayObject *arr, shape = fa->dimensions; /* This causes the NA mask and data memory orderings to match */ - PyArray_CreateSortedStridePerm(fa->nd, fa->strides, strideperm); + PyArray_CreateSortedStridePerm(fa->nd, fa->dimensions, + fa->strides, strideperm); stride = maskna_dtype->elsize; for (i = fa->nd-1; i >= 0; --i) { npy_intp i_perm = strideperm[i].perm; diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index 07efba65c..93c11b60d 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -2369,7 +2369,7 @@ npyiter_reverse_axis_ordering(NpyIter *iter) NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM; } -static npy_intp +static NPY_INLINE npy_intp intp_abs(npy_intp x) { return (x < 0) ? -x : x; diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 4f84fe347..b49b4d988 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -876,10 +876,7 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b) bstride = -bstride; } - if (astride > bstride) { - return -1; - } - else if (astride == bstride) { + if (astride == bstride || astride == 0 || bstride == 0) { /* * Make the qsort stable by next comparing the perm order. * (Note that two perm entries will never be equal) @@ -888,6 +885,9 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b) bperm = ((npy_stride_sort_item *)b)->perm; return (aperm < bperm) ? -1 : 1; } + if (astride > bstride) { + return -1; + } else { return 1; } @@ -901,22 +901,123 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b) * [(2, 12), (0, 4), (1, -2)]. */ NPY_NO_EXPORT void -PyArray_CreateSortedStridePerm(int ndim, npy_intp * strides, - npy_stride_sort_item *strideperm) +PyArray_CreateSortedStridePerm(int ndim, npy_intp *shape, + npy_intp *strides, + npy_stride_sort_item *out_strideperm) { int i; /* Set up the strideperm values */ for (i = 0; i < ndim; ++i) { - strideperm[i].perm = i; - strideperm[i].stride = strides[i]; + out_strideperm[i].perm = i; + if (shape[i] == 1) { + out_strideperm[i].stride = 0; + } + else { + out_strideperm[i].stride = strides[i]; + } } /* Sort them */ - qsort(strideperm, ndim, sizeof(npy_stride_sort_item), + qsort(out_strideperm, ndim, sizeof(npy_stride_sort_item), &_npy_stride_sort_item_comparator); } +static NPY_INLINE npy_intp +intp_abs(npy_intp x) +{ + return (x < 0) ? -x : x; +} + + + +/* + * Creates a sorted stride perm matching the KEEPORDER behavior + * of the NpyIter object. Because this operates based on multiple + * input strides, the 'stride' member of the npy_stride_sort_item + * would be useless and we simply argsort a list of indices instead. + * + * The caller should have already validated that 'ndim' matches for + * every array in the arrays list. + */ +NPY_NO_EXPORT void +PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays, + int ndim, int *out_strideperm) +{ + int i0, i1, ipos, j0, j1, iarrays; + + /* Initialize the strideperm values to the identity. */ + for (i0 = 0; i0 < ndim; ++i0) { + out_strideperm[i0] = i0; + } + + /* + * This is the same as the custom stable insertion sort in + * the NpyIter object, but sorting in the reverse order as + * in the iterator. The iterator sorts from smallest stride + * to biggest stride (Fortran order), whereas here we sort + * from biggest stride to smallest stride (C order). + */ + for (i0 = 1; i0 < ndim; ++i0) { + + ipos = i0; + j0 = out_strideperm[i0]; + + for (i1 = i0 - 1; i1 >= 0; --i1) { + int ambig = 1, shouldswap = 0; + + j1 = out_strideperm[i1]; + + for (iarrays = 0; iarrays < narrays; ++iarrays) { + if (PyArray_SHAPE(arrays[iarrays])[j0] != 1 && + PyArray_SHAPE(arrays[iarrays])[j1] != 1) { + if (intp_abs(PyArray_STRIDES(arrays[iarrays])[j0]) <= + intp_abs(PyArray_STRIDES(arrays[iarrays])[j1])) { + /* + * Set swap even if it's not ambiguous already, + * because in the case of conflicts between + * different operands, C-order wins. + */ + shouldswap = 0; + } + else { + /* Only set swap if it's still ambiguous */ + if (ambig) { + shouldswap = 1; + } + } + + /* + * A comparison has been done, so it's + * no longer ambiguous + */ + ambig = 0; + } + } + /* + * If the comparison was unambiguous, either shift + * 'ipos' to 'i1' or stop looking for an insertion point + */ + if (!ambig) { + if (shouldswap) { + ipos = i1; + } + else { + break; + } + } + } + + /* Insert out_strideperm[i0] into the right place */ + if (ipos != i0) { + for (i1 = i0; i1 > ipos; --i1) { + out_strideperm[i1] = out_strideperm[i1-1]; + } + out_strideperm[ipos] = j0; + } + } +} + /*NUMPY_API * Ravel * Returns a contiguous array @@ -953,8 +1054,8 @@ PyArray_Ravel(PyArrayObject *a, NPY_ORDER order) npy_intp stride; int i, ndim = PyArray_NDIM(a); - PyArray_CreateSortedStridePerm(PyArray_NDIM(a), PyArray_STRIDES(a), - strideperm); + PyArray_CreateSortedStridePerm(PyArray_NDIM(a), PyArray_SHAPE(a), + PyArray_STRIDES(a), strideperm); stride = PyArray_DESCR(a)->elsize; for (i = ndim-1; i >= 0; --i) { diff --git a/numpy/core/src/multiarray/shape.h b/numpy/core/src/multiarray/shape.h index cffd847e2..a293254a7 100644 --- a/numpy/core/src/multiarray/shape.h +++ b/numpy/core/src/multiarray/shape.h @@ -8,4 +8,18 @@ NPY_NO_EXPORT PyObject * build_shape_string(npy_intp n, npy_intp *vals); +/* + * Creates a sorted stride perm matching the KEEPORDER behavior + * of the NpyIter object. Because this operates based on multiple + * input strides, the 'stride' member of the npy_stride_sort_item + * would be useless and we simply argsort a list of indices instead. + * + * The caller should have already validated that 'ndim' matches for + * every array in the arrays list. + */ +NPY_NO_EXPORT void +PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays, + int ndim, int *out_strideperm); + + #endif diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index ee644569a..e4d5edca3 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2582,13 +2582,12 @@ allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags, int idim, ndim = PyArray_NDIM(arr); if (dtype == NULL) { - dtype = PyArray_DESCR(arr); + dtype = PyArray_DTYPE(arr); Py_INCREF(dtype); } - PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), - PyArray_STRIDES(arr), - strideperm); + PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), PyArray_SHAPE(arr), + PyArray_STRIDES(arr), strideperm); /* Build the new strides and shape */ stride = dtype->elsize; diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index 85bcde99c..8c0f9272a 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -920,5 +920,28 @@ def test_array_maskna_diagonal(): res = a.diagonal(3) assert_equal(res, []) +def test_array_maskna_concatenate(): + # np.concatenate + a = np.arange(6, maskna=True, dtype='i4').reshape(2,3) + a[1,0] = np.NA + + b = np.array([[12],[13]], dtype='i4') + res = np.concatenate([a, b], axis=1) + assert_equal(np.isna(res), [[0,0,0,0], [1,0,0,0]]) + assert_equal(res[~np.isna(res)], [0,1,2,12,4,5,13]) + assert_equal(res.strides, (16, 4)) + + b = np.array([[10, np.NA, 11]], maskna=True, dtype='i4') + res = np.concatenate([a,b], axis=0) + assert_equal(np.isna(res), [[0,0,0], [1,0,0], [0,1,0]]) + assert_equal(res[~np.isna(res)], [0,1,2,4,5,10,11]) + assert_equal(res.strides, (12, 4)) + + b = np.array([[np.NA, 10]], order='F', maskna=True, dtype='i4') + res = np.concatenate([a.T, b], axis=0) + assert_equal(np.isna(res), [[0,1], [0,0], [0,0], [1,0]]) + assert_equal(res[~np.isna(res)], [0,1,4,2,5,10]) + assert_equal(res.strides, (4, 16)) + if __name__ == "__main__": run_module_suite() diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py index f282dbe25..17a9cbbde 100644 --- a/numpy/core/tests/test_shape_base.py +++ b/numpy/core/tests/test_shape_base.py @@ -141,3 +141,5 @@ class TestVstack(TestCase): desired = array([[1,2],[1,2]]) assert_array_equal(res,desired) +if __name__ == "__main__": + run_module_suite() |