summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-16 16:54:16 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:55 -0600
commit9194b3af704df71aa9b1ff2f53f169848d0f9dc7 (patch)
tree1e6624032156dfa190e1adba79cc76e5700f19ce
parent99a21efff4b1f2292dc370c7c9c7c58f10385f2a (diff)
downloadnumpy-9194b3af704df71aa9b1ff2f53f169848d0f9dc7.tar.gz
ENH: missingdata: Rewrite PyArray_Concatenate to work with NA masks
It should also have less memory usage for heterogeneous inputs, because it no longer makes extra copies in that case.
-rw-r--r--doc/release/2.0.0-notes.rst3
-rw-r--r--doc/source/reference/c-api.array.rst26
-rw-r--r--numpy/add_newdocs.py7
-rw-r--r--numpy/core/shape_base.py7
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c8
-rw-r--r--numpy/core/src/multiarray/ctors.c1
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c8
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c482
-rw-r--r--numpy/core/src/multiarray/na_mask.c3
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c2
-rw-r--r--numpy/core/src/multiarray/shape.c123
-rw-r--r--numpy/core/src/multiarray/shape.h14
-rw-r--r--numpy/core/src/umath/ufunc_object.c7
-rw-r--r--numpy/core/tests/test_maskna.py23
-rw-r--r--numpy/core/tests/test_shape_base.py2
15 files changed, 528 insertions, 188 deletions
diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst
index d5aa99639..ddedf85de 100644
--- a/doc/release/2.0.0-notes.rst
+++ b/doc/release/2.0.0-notes.rst
@@ -29,9 +29,12 @@ What works with NA:
* Array methods:
+ ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod,
ndarray.conjugate, ndarray.diagonal
+ + numpy.concatenate
What doesn't work with NA:
* Fancy indexing, such as with lists and partial boolean masks.
+ * ndarray.flat and any other methods that use the old iterator
+ mechanism instead of the newer nditer.
* UFunc.reduce of multi-dimensional arrays, with skipna=True and a ufunc
that doesn't have an identity.
* UFunc.accumulate, UFunc.reduceat.
diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst
index 498b2a448..369360407 100644
--- a/doc/source/reference/c-api.array.rst
+++ b/doc/source/reference/c-api.array.rst
@@ -1667,18 +1667,20 @@ Conversion
copied into every location. A -1 is returned if an error occurs,
otherwise 0 is returned.
-.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype)
-
- Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new view of
- the array *self* as possibly a different data-type, *dtype*. If
- *dtype* is ``NULL``, then the returned array will have the same
- data type as *self*. The new data-type must be consistent with
- the size of *self*. Either the itemsizes must be identical, or
- *self* must be single-segment and the total number of bytes must
- be the same. In the latter case the dimensions of the returned
- array will be altered in the last (or first for Fortran-style
- contiguous arrays) dimension. The data area of the returned array
- and self is exactly the same.
+.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype, PyTypeObject *ptype)
+
+ Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new
+ view of the array *self* as possibly a different data-type, *dtype*,
+ and different array subclass *ptype*.
+
+ If *dtype* is ``NULL``, then the returned array will have the same
+ data type as *self*. The new data-type must be consistent with the
+ size of *self*. Either the itemsizes must be identical, or *self* must
+ be single-segment and the total number of bytes must be the same.
+ In the latter case the dimensions of the returned array will be
+ altered in the last (or first for Fortran-style contiguous arrays)
+ dimension. The data area of the returned array and self is exactly
+ the same.
Shape Manipulation
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index d992a7122..82f59c6b0 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -3702,7 +3702,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('put',
add_newdoc('numpy.core.multiarray', 'copyto',
"""
- copyto(dst, src, casting='same_kind', where=None)
+ copyto(dst, src, casting='same_kind', where=None, preservena=False)
Copies values from `src` into `dst`, broadcasting as necessary.
Raises a TypeError if the casting rule is violated, and if
@@ -3725,10 +3725,13 @@ add_newdoc('numpy.core.multiarray', 'copyto',
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
- where : array_like of bool
+ where : array_like of bool, optional
A boolean array which is broadcasted to match the dimensions
of `dst`, and selects elements to copy from `src` to `dst`
wherever it contains the value True.
+ preservena : bool, optional
+ If set to True, leaves any NA values in `dst` untouched. This
+ is similar to the "hard mask" feature in numpy.ma.
""")
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index 93de19299..8a4c80e27 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -267,5 +267,10 @@ def hstack(tup):
[3, 4]])
"""
- return _nx.concatenate(map(atleast_1d,tup),1)
+ arrs = map(atleast_1d,tup)
+ # As a special case, dimension 0 of 1-dimensional arrays is "horizontal"
+ if arrs[0].ndim == 1:
+ return _nx.concatenate(arrs, 0)
+ else:
+ return _nx.concatenate(arrs, 1)
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 0d72807ae..19b01cce9 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -1331,7 +1331,13 @@ NPY_NO_EXPORT PyArray_Descr *
PyArray_MinScalarType(PyArrayObject *arr)
{
PyArray_Descr *dtype = PyArray_DESCR(arr);
- if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num)) {
+ /*
+ * If the array isn't a numeric scalar or is a scalar but with
+ * its value masked out, just return the array's dtype.
+ */
+ if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num) ||
+ (PyArray_HASMASKNA(arr) && !NpyMaskValue_IsExposed(
+ (npy_mask)*PyArray_MASKNA_DATA(arr)))) {
Py_INCREF(dtype);
return dtype;
}
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 48d81d75b..bcd33afd8 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1175,6 +1175,7 @@ PyArray_NewLikeArray(PyArrayObject *prototype, NPY_ORDER order,
int idim;
PyArray_CreateSortedStridePerm(PyArray_NDIM(prototype),
+ PyArray_SHAPE(prototype),
PyArray_STRIDES(prototype),
strideperm);
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 39a8ba1bc..943859ae5 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -3922,7 +3922,7 @@ PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, strides, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, strides, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
@@ -4052,7 +4052,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
@@ -4186,7 +4186,7 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
@@ -4324,7 +4324,7 @@ PyArray_PrepareFourRawArrayIter(int ndim, npy_intp *shape,
}
/* Sort the axes based on the destination strides */
- PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+ PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
for (i = 0; i < ndim; ++i) {
int iperm = strideperm[ndim - i - 1].perm;
out_shape[i] = shape[iperm];
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index b4c212871..d23ecef83 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -48,6 +48,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
#include "datetime_strings.h"
#include "datetime_busday.h"
#include "datetime_busdaycal.h"
+#include "shape.h"
+#include "ctors.h"
#include "na_singleton.h"
#include "na_mask.h"
@@ -61,7 +63,7 @@ NPY_NO_EXPORT double
PyArray_GetPriority(PyObject *obj, double default_)
{
PyObject *ret;
- double priority = PyArray_PRIORITY;
+ double priority = NPY_PRIORITY;
if (PyArray_CheckExact(obj))
return priority;
@@ -202,7 +204,7 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
break;
case 2:
n = PyArray_DIMS(ap)[0];
- ptr2 = (char **)_pya_malloc(n * sizeof(char *));
+ ptr2 = (char **)PyArray_malloc(n * sizeof(char *));
if (!ptr2) {
goto fail;
}
@@ -214,7 +216,7 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
case 3:
n = PyArray_DIMS(ap)[0];
m = PyArray_DIMS(ap)[1];
- ptr3 = (char ***)_pya_malloc(n*(m+1) * sizeof(char *));
+ ptr3 = (char ***)PyArray_malloc(n*(m+1) * sizeof(char *));
if (!ptr3) {
goto fail;
}
@@ -294,177 +296,354 @@ PyArray_Free(PyObject *op, void *ptr)
return -1;
}
if (PyArray_NDIM(ap) >= 2) {
- _pya_free(ptr);
+ PyArray_free(ptr);
}
Py_DECREF(ap);
return 0;
}
-static PyObject *
-_swap_and_concat(PyObject *op, int axis, int n)
+/*
+ * Concatenates a list of ndarrays.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis)
{
- PyObject *newtup = NULL;
- PyObject *otmp, *arr;
- int i;
+ PyTypeObject *subtype = &PyArray_Type;
+ double priority = NPY_PRIORITY;
+ int iarrays, idim, ndim;
+ npy_intp shape[NPY_MAXDIMS], s, strides[NPY_MAXDIMS];
+ int strideperm[NPY_MAXDIMS];
+ PyArray_Descr *dtype = NULL;
+ PyArrayObject *ret = NULL;
+ PyArrayObject_fieldaccess *sliding_view = NULL;
+ int has_maskna;
- newtup = PyTuple_New(n);
- if (newtup == NULL) {
+ if (narrays <= 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "need at least one array to concatenate");
return NULL;
}
- for (i = 0; i < n; i++) {
- otmp = PySequence_GetItem(op, i);
- arr = PyArray_FROM_O(otmp);
- Py_DECREF(otmp);
- if (arr == NULL) {
- goto fail;
+
+ /* All the arrays must have the same 'ndim' */
+ ndim = PyArray_NDIM(arrays[0]);
+
+ if (ndim == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "zero-dimensional arrays cannot be concatenated");
+ return NULL;
+ }
+
+ /* Handle standard Python negative indexing */
+ if (axis < 0) {
+ axis += narrays;
+ }
+ if (axis < 0 || axis >= ndim) {
+ PyErr_SetString(PyExc_IndexError,
+ "axis out of bounds");
+ return NULL;
+ }
+
+ /*
+ * Figure out the final concatenated shape starting from the first
+ * array's shape. Also check whether any of the inputs have an
+ * NA mask.
+ */
+ memcpy(shape, PyArray_SHAPE(arrays[0]), ndim * sizeof(shape[0]));
+ has_maskna = PyArray_HASMASKNA(arrays[0]);
+ for (iarrays = 1; iarrays < narrays; ++iarrays) {
+ npy_intp *arr_shape;
+
+ if (PyArray_NDIM(arrays[iarrays]) != ndim) {
+ PyErr_SetString(PyExc_ValueError,
+ "all the input arrays must have same "
+ "number of dimensions");
+ return NULL;
}
- otmp = PyArray_SwapAxes((PyArrayObject *)arr, axis, 0);
- Py_DECREF(arr);
- if (otmp == NULL) {
- goto fail;
+ arr_shape = PyArray_SHAPE(arrays[iarrays]);
+
+ for (idim = 0; idim < ndim; ++idim) {
+ /* Build up the size of the concatenation axis */
+ if (idim == axis) {
+ shape[idim] += arr_shape[idim];
+ }
+ /* Validate that the rest of the dimensions match */
+ else if (shape[idim] != arr_shape[idim]) {
+ PyErr_SetString(PyExc_ValueError,
+ "all the input array dimensions "
+ "except for the concatenation axis "
+ "must match exactly");
+ return NULL;
+ }
+ }
+
+ has_maskna = has_maskna || PyArray_HASMASKNA(arrays[iarrays]);
+ }
+
+ /* Get the priority subtype for the array */
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+ if (Py_TYPE(arrays[iarrays]) != subtype) {
+ double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0);
+ if (pr > priority) {
+ priority = pr;
+ subtype = Py_TYPE(arrays[iarrays]);
+ }
}
- PyTuple_SET_ITEM(newtup, i, otmp);
}
- otmp = PyArray_Concatenate(newtup, 0);
- Py_DECREF(newtup);
- if (otmp == NULL) {
+
+ /* Get the resulting dtype from combining all the arrays */
+ dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
+ if (dtype == NULL) {
return NULL;
}
- arr = PyArray_SwapAxes((PyArrayObject *)otmp, axis, 0);
- Py_DECREF(otmp);
- return arr;
- fail:
- Py_DECREF(newtup);
- return NULL;
+ /*
+ * Figure out the permutation to apply to the strides to match
+ * the memory layout of the input arrays, using ambiguity
+ * resolution rules matching that of the NpyIter.
+ */
+ PyArray_CreateMultiSortedStridePerm(narrays, arrays, ndim, strideperm);
+ s = dtype->elsize;
+ for (idim = ndim-1; idim >= 0; --idim) {
+ int iperm = strideperm[idim];
+ strides[iperm] = s;
+ s *= shape[iperm];
+ }
+
+ /* Allocate the array for the result. This steals the 'dtype' reference. */
+ ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
+ dtype,
+ ndim,
+ shape,
+ strides,
+ NULL,
+ 0,
+ NULL);
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ /* Add an NA mask if required */
+ if (has_maskna) {
+ if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ }
+
+ /*
+ * Create a view which slides through ret for assigning the
+ * successive input arrays.
+ */
+ sliding_view = (PyArrayObject_fieldaccess *)PyArray_View(ret,
+ NULL, &PyArray_Type);
+ if (sliding_view == NULL) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+ /* Set the dimension to match the input array's */
+ sliding_view->dimensions[axis] = PyArray_SHAPE(arrays[iarrays])[axis];
+
+ /* Copy the data for this array */
+ if (PyArray_AssignArray((PyArrayObject *)sliding_view, arrays[iarrays],
+ NULL, NPY_SAME_KIND_CASTING, 0, NULL) < 0) {
+ Py_DECREF(sliding_view);
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ /* Slide to the start of the next window */
+ sliding_view->data += sliding_view->dimensions[axis] *
+ sliding_view->strides[axis];
+ if (has_maskna) {
+ sliding_view->maskna_data += sliding_view->dimensions[axis] *
+ sliding_view->maskna_strides[axis];
+ }
+ }
+
+ Py_DECREF(sliding_view);
+ return ret;
+}
+
+/*
+ * Concatenates a list of ndarrays, flattening each in C-order.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays,
+ NPY_ORDER order)
+{
+ PyTypeObject *subtype = &PyArray_Type;
+ double priority = NPY_PRIORITY;
+ int iarrays;
+ npy_intp shape[2], strides[2];
+ PyArray_Descr *dtype = NULL;
+ PyArrayObject *ret = NULL;
+ PyArrayObject_fieldaccess *sliding_view = NULL;
+ int has_maskna;
+
+ if (narrays <= 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "need at least one array to concatenate");
+ return NULL;
+ }
+
+ /* All the arrays must have the same total number of elements */
+ shape[0] = narrays;
+ shape[1] = PyArray_SIZE(arrays[0]);
+
+ /*
+ * Figure out the final concatenated shape starting from the first
+ * array's shape. Also check whether any of the inputs have an
+ * NA mask.
+ */
+ has_maskna = PyArray_HASMASKNA(arrays[0]);
+ for (iarrays = 1; iarrays < narrays; ++iarrays) {
+ if (PyArray_SIZE(arrays[iarrays]) != shape[1]) {
+ PyErr_SetString(PyExc_ValueError,
+ "all the input arrays must have same "
+ "number of elements");
+ return NULL;
+ }
+
+ has_maskna = has_maskna || PyArray_HASMASKNA(arrays[iarrays]);
+ }
+
+ /* Get the priority subtype for the array */
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+ if (Py_TYPE(arrays[iarrays]) != subtype) {
+ double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0);
+ if (pr > priority) {
+ priority = pr;
+ subtype = Py_TYPE(arrays[iarrays]);
+ }
+ }
+ }
+
+ /* Get the resulting dtype from combining all the arrays */
+ dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
+ if (dtype == NULL) {
+ return NULL;
+ }
+
+ strides[1] = dtype->elsize;
+ strides[2] = strides[1] * shape[1];
+
+ /* Allocate the array for the result. This steals the 'dtype' reference. */
+ ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
+ dtype,
+ 2,
+ shape,
+ strides,
+ NULL,
+ 0,
+ NULL);
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ /* Add an NA mask if required */
+ if (has_maskna) {
+ if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ }
+
+ /*
+ * Create a view which slides through ret for assigning the
+ * successive input arrays.
+ */
+ sliding_view = (PyArrayObject_fieldaccess *)PyArray_View(ret,
+ NULL, &PyArray_Type);
+ if (sliding_view == NULL) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ /* Each array gets flattened into one slot along 'axis' */
+ sliding_view->dimensions[0] = 1;
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+
+ /* Copy the data for this array */
+ if (PyArray_CopyAsFlat((PyArrayObject *)sliding_view, arrays[iarrays],
+ order) < 0) {
+ Py_DECREF(sliding_view);
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ /* Slide to the start of the next window */
+ sliding_view->data += sliding_view->strides[0];
+ if (has_maskna) {
+ sliding_view->maskna_data += sliding_view->maskna_strides[0];
+ }
+ }
+
+ Py_DECREF(sliding_view);
+ return ret;
}
+
/*NUMPY_API
* Concatenate
*
* Concatenate an arbitrary Python sequence into an array.
* op is a python object supporting the sequence interface.
* Its elements will be concatenated together to form a single
- * multidimensional array. If axis is MAX_DIMS or bigger, then
+ * multidimensional array. If axis is NPY_MAXDIMS or bigger, then
* each sequence object will be flattened before concatenation
*/
NPY_NO_EXPORT PyObject *
PyArray_Concatenate(PyObject *op, int axis)
{
- PyArrayObject *ret, **mps;
- PyObject *otmp;
- int i, n, tmp, nd = 0, new_dim;
- char *data;
- PyTypeObject *subtype;
- double prior1, prior2;
- npy_intp numbytes;
+ int iarrays, narrays;
+ PyArrayObject **arrays;
+ PyArrayObject *ret;
- n = PySequence_Length(op);
- if (n == -1) {
+ /* Convert the input list into arrays */
+ narrays = PySequence_Size(op);
+ if (narrays < 0) {
return NULL;
}
- if (n == 0) {
- PyErr_SetString(PyExc_ValueError,
- "concatenation of zero-length sequences is "\
- "impossible");
+ arrays = PyArray_malloc(narrays * sizeof(arrays[0]));
+ if (arrays == NULL) {
+ PyErr_NoMemory();
return NULL;
}
-
- if ((axis < 0) || ((0 < axis) && (axis < MAX_DIMS))) {
- return _swap_and_concat(op, axis, n);
- }
- mps = PyArray_ConvertToCommonType(op, &n);
- if (mps == NULL) {
- return NULL;
- }
-
- /*
- * Make sure these arrays are legal to concatenate.
- * Must have same dimensions except d0
- */
- prior1 = PyArray_PRIORITY;
- subtype = &PyArray_Type;
- ret = NULL;
- for (i = 0; i < n; i++) {
- if (axis >= MAX_DIMS) {
- otmp = PyArray_Ravel(mps[i],0);
- Py_DECREF(mps[i]);
- mps[i] = (PyArrayObject *)otmp;
- }
- if (Py_TYPE(mps[i]) != subtype) {
- prior2 = PyArray_GetPriority((PyObject *)(mps[i]), 0.0);
- if (prior2 > prior1) {
- prior1 = prior2;
- subtype = Py_TYPE(mps[i]);
- }
- }
- }
-
- new_dim = 0;
- for (i = 0; i < n; i++) {
- if (mps[i] == NULL) {
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+ PyObject *item = PySequence_GetItem(op, iarrays);
+ if (item == NULL) {
+ narrays = iarrays;
goto fail;
}
- if (i == 0) {
- nd = PyArray_NDIM(mps[i]);
- }
- else {
- if (nd != PyArray_NDIM(mps[i])) {
- PyErr_SetString(PyExc_ValueError,
- "arrays must have same "\
- "number of dimensions");
- goto fail;
- }
- if (!PyArray_CompareLists(PyArray_DIMS(mps[0])+1,
- PyArray_DIMS(mps[i])+1,
- nd-1)) {
- PyErr_SetString(PyExc_ValueError,
- "array dimensions must "\
- "agree except for d_0");
- goto fail;
- }
- }
- if (nd == 0) {
- PyErr_SetString(PyExc_ValueError,
- "0-d arrays can't be concatenated");
+ arrays[iarrays] = (PyArrayObject *)PyArray_FromAny(item, NULL,
+ 0, 0, NPY_ARRAY_ALLOWNA, NULL);
+ Py_DECREF(item);
+ if (arrays[iarrays] == NULL) {
+ narrays = iarrays;
goto fail;
}
- new_dim += PyArray_DIMS(mps[i])[0];
}
- tmp = PyArray_DIMS(mps[0])[0];
- PyArray_DIMS(mps[0])[0] = new_dim;
- Py_INCREF(PyArray_DESCR(mps[0]));
- ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
- PyArray_DESCR(mps[0]), nd,
- PyArray_DIMS(mps[0]),
- NULL, NULL, 0,
- (PyObject *)ret);
- PyArray_DIMS(mps[0])[0] = tmp;
- if (ret == NULL) {
- goto fail;
+ if (axis >= NPY_MAXDIMS) {
+ ret = PyArray_ConcatenateFlattenedArrays(narrays, arrays, NPY_CORDER);
}
- data = PyArray_DATA(ret);
- for (i = 0; i < n; i++) {
- numbytes = PyArray_NBYTES(mps[i]);
- memcpy(data, PyArray_DATA(mps[i]), numbytes);
- data += numbytes;
+ else {
+ ret = PyArray_ConcatenateArrays(narrays, arrays, axis);
}
- PyArray_INCREF(ret);
- for (i = 0; i < n; i++) {
- Py_XDECREF(mps[i]);
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+ Py_DECREF(arrays[iarrays]);
}
- PyDataMem_FREE(mps);
+
return (PyObject *)ret;
- fail:
- Py_XDECREF(ret);
- for (i = 0; i < n; i++) {
- Py_XDECREF(mps[i]);
+fail:
+ /* 'narrays' was set to how far we got in the conversion */
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+ Py_DECREF(arrays[iarrays]);
}
- PyDataMem_FREE(mps);
+
return NULL;
}
@@ -652,7 +831,7 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2)
int typenum, nd, axis;
npy_intp is1, is2, os;
char *op;
- npy_intp dimensions[MAX_DIMS];
+ npy_intp dimensions[NPY_MAXDIMS];
PyArray_DotFunc *dot;
PyArray_Descr *typec;
NPY_BEGIN_THREADS_DEF;
@@ -761,7 +940,7 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out)
int typenum, nd, axis, matchDim;
npy_intp is1, is2, os;
char *op;
- npy_intp dimensions[MAX_DIMS];
+ npy_intp dimensions[NPY_MAXDIMS];
PyArray_DotFunc *dot;
PyArray_Descr *typec;
NPY_BEGIN_THREADS_DEF;
@@ -1510,8 +1689,8 @@ PyArray_EquivTypenums(int typenum1, int typenum2)
static PyObject *
_prepend_ones(PyArrayObject *arr, int nd, int ndmin)
{
- npy_intp newdims[MAX_DIMS];
- npy_intp newstrides[MAX_DIMS];
+ npy_intp newdims[NPY_MAXDIMS];
+ npy_intp newstrides[NPY_MAXDIMS];
int i, k, num;
PyArrayObject *ret;
PyArray_Descr *dtype;
@@ -1723,7 +1902,7 @@ array_copyto(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
goto fail;
}
wheremask = (PyArrayObject *)PyArray_FromAny(wheremask_in,
- dtype, 0, 0, 0, NULL);
+ dtype, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
if (wheremask == NULL) {
goto fail;
}
@@ -1879,7 +2058,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
}
else {
if (obj == NULL) {
- dptr = _pya_malloc(typecode->elsize);
+ dptr = PyArray_malloc(typecode->elsize);
if (dptr == NULL) {
return PyErr_NoMemory();
}
@@ -1904,7 +2083,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
/* free dptr which contains zeros */
if (alloc) {
- _pya_free(dptr);
+ PyArray_free(dptr);
}
return ret;
}
@@ -2716,7 +2895,7 @@ static PyObject *
array_set_datetimeparse_function(PyObject *NPY_UNUSED(self),
PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds))
{
- PyErr_SetString(PyExc_RuntimeError, "This function is to be removed");
+ PyErr_SetString(PyExc_RuntimeError, "This function has been removed");
return NULL;
}
@@ -2823,7 +3002,7 @@ array_can_cast_safely(PyObject *NPY_UNUSED(self), PyObject *args,
PyArray_IsPythonNumber(from_obj)) {
PyArrayObject *arr;
arr = (PyArrayObject *)PyArray_FromAny(from_obj,
- NULL, 0, 0, 0, NULL);
+ NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
if (arr == NULL) {
goto finish;
}
@@ -2885,7 +3064,8 @@ array_min_scalar_type(PyObject *NPY_UNUSED(dummy), PyObject *args)
return NULL;
}
- array = (PyArrayObject *)PyArray_FromAny(array_in, NULL, 0, 0, 0, NULL);
+ array = (PyArrayObject *)PyArray_FromAny(array_in,
+ NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
if (array == NULL) {
return NULL;
}
@@ -2930,7 +3110,7 @@ array_result_type(PyObject *NPY_UNUSED(dummy), PyObject *args)
goto finish;
}
arr[narr] = (PyArrayObject *)PyArray_FromAny(obj,
- NULL, 0, 0, 0, NULL);
+ NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
if (arr[narr] == NULL) {
goto finish;
}
@@ -3027,17 +3207,17 @@ _SigSegv_Handler(int signum)
}
#endif
-#define _test_code() { \
- test = *((char*)memptr); \
- if (!ro) { \
- *((char *)memptr) = '\0'; \
- *((char *)memptr) = test; \
- } \
- test = *((char*)memptr+size-1); \
- if (!ro) { \
- *((char *)memptr+size-1) = '\0'; \
- *((char *)memptr+size-1) = test; \
- } \
+#define _test_code() { \
+ test = *((char*)memptr); \
+ if (!ro) { \
+ *((char *)memptr) = '\0'; \
+ *((char *)memptr) = test; \
+ } \
+ test = *((char*)memptr+size-1); \
+ if (!ro) { \
+ *((char *)memptr+size-1) = '\0'; \
+ *((char *)memptr+size-1) = test; \
+ } \
}
static PyObject *
@@ -3901,7 +4081,7 @@ PyMODINIT_FUNC initmultiarray(void) {
if (!d) {
goto err;
}
- PyArray_Type.tp_free = _pya_free;
+ PyArray_Type.tp_free = PyArray_free;
if (PyType_Ready(&PyArray_Type) < 0) {
return RETVAL;
}
@@ -3911,7 +4091,7 @@ PyMODINIT_FUNC initmultiarray(void) {
PyArrayIter_Type.tp_iter = PyObject_SelfIter;
NpyIter_Type.tp_iter = PyObject_SelfIter;
PyArrayMultiIter_Type.tp_iter = PyObject_SelfIter;
- PyArrayMultiIter_Type.tp_free = _pya_free;
+ PyArrayMultiIter_Type.tp_free = PyArray_free;
if (PyType_Ready(&PyArrayIter_Type) < 0) {
return RETVAL;
}
diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c
index 6d7677d5e..0cb05beab 100644
--- a/numpy/core/src/multiarray/na_mask.c
+++ b/numpy/core/src/multiarray/na_mask.c
@@ -294,7 +294,8 @@ PyArray_AllocateMaskNA(PyArrayObject *arr,
shape = fa->dimensions;
/* This causes the NA mask and data memory orderings to match */
- PyArray_CreateSortedStridePerm(fa->nd, fa->strides, strideperm);
+ PyArray_CreateSortedStridePerm(fa->nd, fa->dimensions,
+ fa->strides, strideperm);
stride = maskna_dtype->elsize;
for (i = fa->nd-1; i >= 0; --i) {
npy_intp i_perm = strideperm[i].perm;
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 07efba65c..93c11b60d 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -2369,7 +2369,7 @@ npyiter_reverse_axis_ordering(NpyIter *iter)
NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
}
-static npy_intp
+static NPY_INLINE npy_intp
intp_abs(npy_intp x)
{
return (x < 0) ? -x : x;
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 4f84fe347..b49b4d988 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -876,10 +876,7 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b)
bstride = -bstride;
}
- if (astride > bstride) {
- return -1;
- }
- else if (astride == bstride) {
+ if (astride == bstride || astride == 0 || bstride == 0) {
/*
* Make the qsort stable by next comparing the perm order.
* (Note that two perm entries will never be equal)
@@ -888,6 +885,9 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b)
bperm = ((npy_stride_sort_item *)b)->perm;
return (aperm < bperm) ? -1 : 1;
}
+ if (astride > bstride) {
+ return -1;
+ }
else {
return 1;
}
@@ -901,22 +901,123 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b)
* [(2, 12), (0, 4), (1, -2)].
*/
NPY_NO_EXPORT void
-PyArray_CreateSortedStridePerm(int ndim, npy_intp * strides,
- npy_stride_sort_item *strideperm)
+PyArray_CreateSortedStridePerm(int ndim, npy_intp *shape,
+ npy_intp *strides,
+ npy_stride_sort_item *out_strideperm)
{
int i;
/* Set up the strideperm values */
for (i = 0; i < ndim; ++i) {
- strideperm[i].perm = i;
- strideperm[i].stride = strides[i];
+ out_strideperm[i].perm = i;
+ if (shape[i] == 1) {
+ out_strideperm[i].stride = 0;
+ }
+ else {
+ out_strideperm[i].stride = strides[i];
+ }
}
/* Sort them */
- qsort(strideperm, ndim, sizeof(npy_stride_sort_item),
+ qsort(out_strideperm, ndim, sizeof(npy_stride_sort_item),
&_npy_stride_sort_item_comparator);
}
+static NPY_INLINE npy_intp
+intp_abs(npy_intp x)
+{
+ return (x < 0) ? -x : x;
+}
+
+
+
+/*
+ * Creates a sorted stride perm matching the KEEPORDER behavior
+ * of the NpyIter object. Because this operates based on multiple
+ * input strides, the 'stride' member of the npy_stride_sort_item
+ * would be useless and we simply argsort a list of indices instead.
+ *
+ * The caller should have already validated that 'ndim' matches for
+ * every array in the arrays list.
+ */
+NPY_NO_EXPORT void
+PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays,
+ int ndim, int *out_strideperm)
+{
+ int i0, i1, ipos, j0, j1, iarrays;
+
+ /* Initialize the strideperm values to the identity. */
+ for (i0 = 0; i0 < ndim; ++i0) {
+ out_strideperm[i0] = i0;
+ }
+
+ /*
+ * This is the same as the custom stable insertion sort in
+ * the NpyIter object, but sorting in the reverse order as
+ * in the iterator. The iterator sorts from smallest stride
+ * to biggest stride (Fortran order), whereas here we sort
+ * from biggest stride to smallest stride (C order).
+ */
+ for (i0 = 1; i0 < ndim; ++i0) {
+
+ ipos = i0;
+ j0 = out_strideperm[i0];
+
+ for (i1 = i0 - 1; i1 >= 0; --i1) {
+ int ambig = 1, shouldswap = 0;
+
+ j1 = out_strideperm[i1];
+
+ for (iarrays = 0; iarrays < narrays; ++iarrays) {
+ if (PyArray_SHAPE(arrays[iarrays])[j0] != 1 &&
+ PyArray_SHAPE(arrays[iarrays])[j1] != 1) {
+ if (intp_abs(PyArray_STRIDES(arrays[iarrays])[j0]) <=
+ intp_abs(PyArray_STRIDES(arrays[iarrays])[j1])) {
+ /*
+ * Set swap even if it's not ambiguous already,
+ * because in the case of conflicts between
+ * different operands, C-order wins.
+ */
+ shouldswap = 0;
+ }
+ else {
+ /* Only set swap if it's still ambiguous */
+ if (ambig) {
+ shouldswap = 1;
+ }
+ }
+
+ /*
+ * A comparison has been done, so it's
+ * no longer ambiguous
+ */
+ ambig = 0;
+ }
+ }
+ /*
+ * If the comparison was unambiguous, either shift
+ * 'ipos' to 'i1' or stop looking for an insertion point
+ */
+ if (!ambig) {
+ if (shouldswap) {
+ ipos = i1;
+ }
+ else {
+ break;
+ }
+ }
+ }
+
+ /* Insert out_strideperm[i0] into the right place */
+ if (ipos != i0) {
+ for (i1 = i0; i1 > ipos; --i1) {
+ out_strideperm[i1] = out_strideperm[i1-1];
+ }
+ out_strideperm[ipos] = j0;
+ }
+ }
+}
+
/*NUMPY_API
* Ravel
* Returns a contiguous array
@@ -953,8 +1054,8 @@ PyArray_Ravel(PyArrayObject *a, NPY_ORDER order)
npy_intp stride;
int i, ndim = PyArray_NDIM(a);
- PyArray_CreateSortedStridePerm(PyArray_NDIM(a), PyArray_STRIDES(a),
- strideperm);
+ PyArray_CreateSortedStridePerm(PyArray_NDIM(a), PyArray_SHAPE(a),
+ PyArray_STRIDES(a), strideperm);
stride = PyArray_DESCR(a)->elsize;
for (i = ndim-1; i >= 0; --i) {
diff --git a/numpy/core/src/multiarray/shape.h b/numpy/core/src/multiarray/shape.h
index cffd847e2..a293254a7 100644
--- a/numpy/core/src/multiarray/shape.h
+++ b/numpy/core/src/multiarray/shape.h
@@ -8,4 +8,18 @@
NPY_NO_EXPORT PyObject *
build_shape_string(npy_intp n, npy_intp *vals);
+/*
+ * Creates a sorted stride perm matching the KEEPORDER behavior
+ * of the NpyIter object. Because this operates based on multiple
+ * input strides, the 'stride' member of the npy_stride_sort_item
+ * would be useless and we simply argsort a list of indices instead.
+ *
+ * The caller should have already validated that 'ndim' matches for
+ * every array in the arrays list.
+ */
+NPY_NO_EXPORT void
+PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays,
+ int ndim, int *out_strideperm);
+
+
#endif
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index ee644569a..e4d5edca3 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2582,13 +2582,12 @@ allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags,
int idim, ndim = PyArray_NDIM(arr);
if (dtype == NULL) {
- dtype = PyArray_DESCR(arr);
+ dtype = PyArray_DTYPE(arr);
Py_INCREF(dtype);
}
- PyArray_CreateSortedStridePerm(PyArray_NDIM(arr),
- PyArray_STRIDES(arr),
- strideperm);
+ PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), PyArray_SHAPE(arr),
+ PyArray_STRIDES(arr), strideperm);
/* Build the new strides and shape */
stride = dtype->elsize;
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index 85bcde99c..8c0f9272a 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -920,5 +920,28 @@ def test_array_maskna_diagonal():
res = a.diagonal(3)
assert_equal(res, [])
+def test_array_maskna_concatenate():
+ # np.concatenate
+ a = np.arange(6, maskna=True, dtype='i4').reshape(2,3)
+ a[1,0] = np.NA
+
+ b = np.array([[12],[13]], dtype='i4')
+ res = np.concatenate([a, b], axis=1)
+ assert_equal(np.isna(res), [[0,0,0,0], [1,0,0,0]])
+ assert_equal(res[~np.isna(res)], [0,1,2,12,4,5,13])
+ assert_equal(res.strides, (16, 4))
+
+ b = np.array([[10, np.NA, 11]], maskna=True, dtype='i4')
+ res = np.concatenate([a,b], axis=0)
+ assert_equal(np.isna(res), [[0,0,0], [1,0,0], [0,1,0]])
+ assert_equal(res[~np.isna(res)], [0,1,2,4,5,10,11])
+ assert_equal(res.strides, (12, 4))
+
+ b = np.array([[np.NA, 10]], order='F', maskna=True, dtype='i4')
+ res = np.concatenate([a.T, b], axis=0)
+ assert_equal(np.isna(res), [[0,1], [0,0], [0,0], [1,0]])
+ assert_equal(res[~np.isna(res)], [0,1,4,2,5,10])
+ assert_equal(res.strides, (4, 16))
+
if __name__ == "__main__":
run_module_suite()
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index f282dbe25..17a9cbbde 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -141,3 +141,5 @@ class TestVstack(TestCase):
desired = array([[1,2],[1,2]])
assert_array_equal(res,desired)
+if __name__ == "__main__":
+ run_module_suite()