ENH: missingdata: Rewrite PyArray_Concatenate to work with NA masks

It should also have less memory usage for heterogeneous inputs, because it no longer makes extra copies in that case.
author: Mark Wiebe <mwwiebe@gmail.com> 2011-08-16 16:54:16 -0700
committer: Charles Harris <charlesr.harris@gmail.com> 2011-08-27 07:26:55 -0600
commit: 9194b3af704df71aa9b1ff2f53f169848d0f9dc7 (patch)
tree: 1e6624032156dfa190e1adba79cc76e5700f19ce
parent: 99a21efff4b1f2292dc370c7c9c7c58f10385f2a (diff)
download: numpy-9194b3af704df71aa9b1ff2f53f169848d0f9dc7.tar.gz
15 files changed, 528 insertions, 188 deletions
diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst
index d5aa99639..ddedf85de 100644
--- a/doc/release/2.0.0-notes.rst
+++ b/doc/release/2.0.0-notes.rst
@@ -29,9 +29,12 @@ What works with NA:
     * Array methods:
        + ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod,
          ndarray.conjugate, ndarray.diagonal
+       + numpy.concatenate
 
 What doesn't work with NA:
     * Fancy indexing, such as with lists and partial boolean masks.
+    * ndarray.flat and any other methods that use the old iterator
+      mechanism instead of the newer nditer.
     * UFunc.reduce of multi-dimensional arrays, with skipna=True and a ufunc
       that doesn't have an identity.
     * UFunc.accumulate, UFunc.reduceat.
diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst
index 498b2a448..369360407 100644
--- a/doc/source/reference/c-api.array.rst
+++ b/doc/source/reference/c-api.array.rst
@@ -1667,18 +1667,20 @@ Conversion
     copied into every location. A -1 is returned if an error occurs,
     otherwise 0 is returned.
 
-.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype)
-
-    Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new view of
-    the array *self* as possibly a different data-type, *dtype*. If
-    *dtype* is ``NULL``, then the returned array will have the same
-    data type as *self*. The new data-type must be consistent with
-    the size of *self*. Either the itemsizes must be identical, or
-    *self* must be single-segment and the total number of bytes must
-    be the same.  In the latter case the dimensions of the returned
-    array will be altered in the last (or first for Fortran-style
-    contiguous arrays) dimension. The data area of the returned array
-    and self is exactly the same.
+.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype, PyTypeObject *ptype)
+
+    Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new
+    view of the array *self* as possibly a different data-type, *dtype*,
+    and different array subclass *ptype*.
+
+    If *dtype* is ``NULL``, then the returned array will have the same
+    data type as *self*. The new data-type must be consistent with the
+    size of *self*. Either the itemsizes must be identical, or *self* must
+    be single-segment and the total number of bytes must be the same.
+    In the latter case the dimensions of the returned array will be
+    altered in the last (or first for Fortran-style contiguous arrays)
+    dimension. The data area of the returned array and self is exactly
+    the same.
 
 
 Shape Manipulation
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index d992a7122..82f59c6b0 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -3702,7 +3702,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('put',
 
 add_newdoc('numpy.core.multiarray', 'copyto',
     """
-    copyto(dst, src, casting='same_kind', where=None)
+    copyto(dst, src, casting='same_kind', where=None, preservena=False)
 
     Copies values from `src` into `dst`, broadcasting as necessary.
     Raises a TypeError if the casting rule is violated, and if
@@ -3725,10 +3725,13 @@ add_newdoc('numpy.core.multiarray', 'copyto',
           * 'same_kind' means only safe casts or casts within a kind,
             like float64 to float32, are allowed.
           * 'unsafe' means any data conversions may be done.
-    where : array_like of bool
+    where : array_like of bool, optional
         A boolean array which is broadcasted to match the dimensions
         of `dst`, and selects elements to copy from `src` to `dst`
         wherever it contains the value True.
+    preservena : bool, optional
+        If set to True, leaves any NA values in `dst` untouched. This
+        is similar to the "hard mask" feature in numpy.ma.
 
     """)
 
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index 93de19299..8a4c80e27 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -267,5 +267,10 @@ def hstack(tup):
            [3, 4]])
 
     """
-    return _nx.concatenate(map(atleast_1d,tup),1)
+    arrs = map(atleast_1d,tup)
+    # As a special case, dimension 0 of 1-dimensional arrays is "horizontal"
+    if arrs[0].ndim == 1:
+        return _nx.concatenate(arrs, 0)
+    else:
+        return _nx.concatenate(arrs, 1)
 
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 0d72807ae..19b01cce9 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -1331,7 +1331,13 @@ NPY_NO_EXPORT PyArray_Descr *
 PyArray_MinScalarType(PyArrayObject *arr)
 {
     PyArray_Descr *dtype = PyArray_DESCR(arr);
-    if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num)) {
+    /*
+     * If the array isn't a numeric scalar or is a scalar but with
+     * its value masked out, just return the array's dtype.
+     */
+    if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num) ||
+                    (PyArray_HASMASKNA(arr) && !NpyMaskValue_IsExposed(
+                                    (npy_mask)*PyArray_MASKNA_DATA(arr)))) {
         Py_INCREF(dtype);
         return dtype;
     }
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 48d81d75b..bcd33afd8 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1175,6 +1175,7 @@ PyArray_NewLikeArray(PyArrayObject *prototype, NPY_ORDER order,
         int idim;
 
         PyArray_CreateSortedStridePerm(PyArray_NDIM(prototype),
+                                        PyArray_SHAPE(prototype),
                                         PyArray_STRIDES(prototype),
                                         strideperm);
 
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 39a8ba1bc..943859ae5 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -3922,7 +3922,7 @@ PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape,
     }
 
     /* Sort the axes based on the destination strides */
-    PyArray_CreateSortedStridePerm(ndim, strides, strideperm);
+    PyArray_CreateSortedStridePerm(ndim, shape, strides, strideperm);
     for (i = 0; i < ndim; ++i) {
         int iperm = strideperm[ndim - i - 1].perm;
         out_shape[i] = shape[iperm];
@@ -4052,7 +4052,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape,
     }
 
     /* Sort the axes based on the destination strides */
-    PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+    PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
     for (i = 0; i < ndim; ++i) {
         int iperm = strideperm[ndim - i - 1].perm;
         out_shape[i] = shape[iperm];
@@ -4186,7 +4186,7 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
     }
 
     /* Sort the axes based on the destination strides */
-    PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+    PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
     for (i = 0; i < ndim; ++i) {
         int iperm = strideperm[ndim - i - 1].perm;
         out_shape[i] = shape[iperm];
@@ -4324,7 +4324,7 @@ PyArray_PrepareFourRawArrayIter(int ndim, npy_intp *shape,
     }
 
     /* Sort the axes based on the destination strides */
-    PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
+    PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
     for (i = 0; i < ndim; ++i) {
         int iperm = strideperm[ndim - i - 1].perm;
         out_shape[i] = shape[iperm];
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index b4c212871..d23ecef83 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -48,6 +48,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 #include "datetime_strings.h"
 #include "datetime_busday.h"
 #include "datetime_busdaycal.h"
+#include "shape.h"
+#include "ctors.h"
 #include "na_singleton.h"
 #include "na_mask.h"
 
@@ -61,7 +63,7 @@ NPY_NO_EXPORT double
 PyArray_GetPriority(PyObject *obj, double default_)
 {
     PyObject *ret;
-    double priority = PyArray_PRIORITY;
+    double priority = NPY_PRIORITY;
 
     if (PyArray_CheckExact(obj))
         return priority;
@@ -202,7 +204,7 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
         break;
     case 2:
         n = PyArray_DIMS(ap)[0];
-        ptr2 = (char **)_pya_malloc(n * sizeof(char *));
+        ptr2 = (char **)PyArray_malloc(n * sizeof(char *));
         if (!ptr2) {
             goto fail;
         }
@@ -214,7 +216,7 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
     case 3:
         n = PyArray_DIMS(ap)[0];
         m = PyArray_DIMS(ap)[1];
-        ptr3 = (char ***)_pya_malloc(n*(m+1) * sizeof(char *));
+        ptr3 = (char ***)PyArray_malloc(n*(m+1) * sizeof(char *));
         if (!ptr3) {
             goto fail;
         }
@@ -294,177 +296,354 @@ PyArray_Free(PyObject *op, void *ptr)
         return -1;
     }
     if (PyArray_NDIM(ap) >= 2) {
-        _pya_free(ptr);
+        PyArray_free(ptr);
     }
     Py_DECREF(ap);
     return 0;
 }
 
 
-static PyObject *
-_swap_and_concat(PyObject *op, int axis, int n)
+/*
+ * Concatenates a list of ndarrays.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis)
 {
-    PyObject *newtup = NULL;
-    PyObject *otmp, *arr;
-    int i;
+    PyTypeObject *subtype = &PyArray_Type;
+    double priority = NPY_PRIORITY;
+    int iarrays, idim, ndim;
+    npy_intp shape[NPY_MAXDIMS], s, strides[NPY_MAXDIMS];
+    int strideperm[NPY_MAXDIMS];
+    PyArray_Descr *dtype = NULL;
+    PyArrayObject *ret = NULL;
+    PyArrayObject_fieldaccess *sliding_view = NULL;
+    int has_maskna;
 
-    newtup = PyTuple_New(n);
-    if (newtup == NULL) {
+    if (narrays <= 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "need at least one array to concatenate");
         return NULL;
     }
-    for (i = 0; i < n; i++) {
-        otmp = PySequence_GetItem(op, i);
-        arr = PyArray_FROM_O(otmp);
-        Py_DECREF(otmp);
-        if (arr == NULL) {
-            goto fail;
+
+    /* All the arrays must have the same 'ndim' */
+    ndim = PyArray_NDIM(arrays[0]);
+
+    if (ndim == 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "zero-dimensional arrays cannot be concatenated");
+        return NULL;
+    }
+
+    /* Handle standard Python negative indexing */
+    if (axis < 0) {
+        axis += narrays;
+    }
+    if (axis < 0 || axis >= ndim) {
+        PyErr_SetString(PyExc_IndexError,
+                        "axis out of bounds");
+        return NULL;
+    }
+
+    /*
+     * Figure out the final concatenated shape starting from the first
+     * array's shape. Also check whether any of the inputs have an
+     * NA mask.
+     */
+    memcpy(shape, PyArray_SHAPE(arrays[0]), ndim * sizeof(shape[0]));
+    has_maskna = PyArray_HASMASKNA(arrays[0]);
+    for (iarrays = 1; iarrays < narrays; ++iarrays) {
+        npy_intp *arr_shape;
+
+        if (PyArray_NDIM(arrays[iarrays]) != ndim) {
+            PyErr_SetString(PyExc_ValueError,
+                            "all the input arrays must have same "
+                            "number of dimensions");
+            return NULL;
         }
-        otmp = PyArray_SwapAxes((PyArrayObject *)arr, axis, 0);
-        Py_DECREF(arr);
-        if (otmp == NULL) {
-            goto fail;
+        arr_shape = PyArray_SHAPE(arrays[iarrays]);
+
+        for (idim = 0; idim < ndim; ++idim) {
+            /* Build up the size of the concatenation axis */
+            if (idim == axis) {
+                shape[idim] += arr_shape[idim];
+            }
+            /* Validate that the rest of the dimensions match */
+            else if (shape[idim] != arr_shape[idim]) {
+                PyErr_SetString(PyExc_ValueError,
+                                "all the input array dimensions "
+                                "except for the concatenation axis "
+                                "must match exactly");
+                return NULL;
+            }
+        }
+
+        has_maskna = has_maskna || PyArray_HASMASKNA(arrays[iarrays]);
+    }
+
+    /* Get the priority subtype for the array */
+    for (iarrays = 0; iarrays < narrays; ++iarrays) {
+        if (Py_TYPE(arrays[iarrays]) != subtype) {
+            double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0);
+            if (pr > priority) {
+                priority = pr;
+                subtype = Py_TYPE(arrays[iarrays]);
+            }
         }
-        PyTuple_SET_ITEM(newtup, i, otmp);
     }
-    otmp = PyArray_Concatenate(newtup, 0);
-    Py_DECREF(newtup);
-    if (otmp == NULL) {
+
+    /* Get the resulting dtype from combining all the arrays */
+    dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
+    if (dtype == NULL) {
         return NULL;
     }
-    arr = PyArray_SwapAxes((PyArrayObject *)otmp, axis, 0);
-    Py_DECREF(otmp);
-    return arr;
 
- fail:
-    Py_DECREF(newtup);
-    return NULL;
+    /*
+     * Figure out the permutation to apply to the strides to match
+     * the memory layout of the input arrays, using ambiguity
+     * resolution rules matching that of the NpyIter.
+     */
+    PyArray_CreateMultiSortedStridePerm(narrays, arrays, ndim, strideperm);
+    s = dtype->elsize;
+    for (idim = ndim-1; idim >= 0; --idim) {
+        int iperm = strideperm[idim];
+        strides[iperm] = s;
+        s *= shape[iperm];
+    }
+
+    /* Allocate the array for the result. This steals the 'dtype' reference. */
+    ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
+                                                    dtype,
+                                                    ndim,
+                                                    shape,
+                                                    strides,
+                                                    NULL,
+                                                    0,
+                                                    NULL);
+    if (ret == NULL) {
+        return NULL;
+    }
+
+    /* Add an NA mask if required */
+    if (has_maskna) {
+        if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
+            Py_DECREF(ret);
+            return NULL;
+        }
+    }
+
+    /*
+     * Create a view which slides through ret for assigning the
+     * successive input arrays.
+     */
+    sliding_view = (PyArrayObject_fieldaccess *)PyArray_View(ret,
+                                                        NULL, &PyArray_Type);
+    if (sliding_view == NULL) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+    for (iarrays = 0; iarrays < narrays; ++iarrays) {
+        /* Set the dimension to match the input array's */
+        sliding_view->dimensions[axis] = PyArray_SHAPE(arrays[iarrays])[axis];
+
+        /* Copy the data for this array */
+        if (PyArray_AssignArray((PyArrayObject *)sliding_view, arrays[iarrays],
+                            NULL, NPY_SAME_KIND_CASTING, 0, NULL) < 0) {
+            Py_DECREF(sliding_view);
+            Py_DECREF(ret);
+            return NULL;
+        }
+
+        /* Slide to the start of the next window */
+        sliding_view->data += sliding_view->dimensions[axis] *
+                                 sliding_view->strides[axis];
+        if (has_maskna) {
+            sliding_view->maskna_data += sliding_view->dimensions[axis] *
+                                     sliding_view->maskna_strides[axis];
+        }
+    }
+
+    Py_DECREF(sliding_view);
+    return ret;
+}
+
+/*
+ * Concatenates a list of ndarrays, flattening each in C-order.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays,
+                                    NPY_ORDER order)
+{
+    PyTypeObject *subtype = &PyArray_Type;
+    double priority = NPY_PRIORITY;
+    int iarrays;
+    npy_intp shape[2], strides[2];
+    PyArray_Descr *dtype = NULL;
+    PyArrayObject *ret = NULL;
+    PyArrayObject_fieldaccess *sliding_view = NULL;
+    int has_maskna;
+
+    if (narrays <= 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "need at least one array to concatenate");
+        return NULL;
+    }
+
+    /* All the arrays must have the same total number of elements */
+    shape[0] = narrays;
+    shape[1] = PyArray_SIZE(arrays[0]);
+
+    /*
+     * Figure out the final concatenated shape starting from the first
+     * array's shape. Also check whether any of the inputs have an
+     * NA mask.
+     */
+    has_maskna = PyArray_HASMASKNA(arrays[0]);
+    for (iarrays = 1; iarrays < narrays; ++iarrays) {
+        if (PyArray_SIZE(arrays[iarrays]) != shape[1]) {
+            PyErr_SetString(PyExc_ValueError,
+                            "all the input arrays must have same "
+                            "number of elements");
+            return NULL;
+        }
+
+        has_maskna = has_maskna || PyArray_HASMASKNA(arrays[iarrays]);
+    }
+
+    /* Get the priority subtype for the array */
+    for (iarrays = 0; iarrays < narrays; ++iarrays) {
+        if (Py_TYPE(arrays[iarrays]) != subtype) {
+            double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0);
+            if (pr > priority) {
+                priority = pr;
+                subtype = Py_TYPE(arrays[iarrays]);
+            }
+        }
+    }
+
+    /* Get the resulting dtype from combining all the arrays */
+    dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
+    if (dtype == NULL) {
+        return NULL;
+    }
+
+    strides[1] = dtype->elsize;
+    strides[2] = strides[1] * shape[1];
+
+    /* Allocate the array for the result. This steals the 'dtype' reference. */
+    ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
+                                                    dtype,
+                                                    2,
+                                                    shape,
+                                                    strides,
+                                                    NULL,
+                                                    0,
+                                                    NULL);
+    if (ret == NULL) {
+        return NULL;
+    }
+
+    /* Add an NA mask if required */
+    if (has_maskna) {
+        if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
+            Py_DECREF(ret);
+            return NULL;
+        }
+    }
+
+    /*
+     * Create a view which slides through ret for assigning the
+     * successive input arrays.
+     */
+    sliding_view = (PyArrayObject_fieldaccess *)PyArray_View(ret,
+                                                        NULL, &PyArray_Type);
+    if (sliding_view == NULL) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+    /* Each array gets flattened into one slot along 'axis' */
+    sliding_view->dimensions[0] = 1;
+    for (iarrays = 0; iarrays < narrays; ++iarrays) {
+
+        /* Copy the data for this array */
+        if (PyArray_CopyAsFlat((PyArrayObject *)sliding_view, arrays[iarrays],
+                            order) < 0) {
+            Py_DECREF(sliding_view);
+            Py_DECREF(ret);
+            return NULL;
+        }
+
+        /* Slide to the start of the next window */
+        sliding_view->data += sliding_view->strides[0];
+        if (has_maskna) {
+            sliding_view->maskna_data += sliding_view->maskna_strides[0];
+        }
+    }
+
+    Py_DECREF(sliding_view);
+    return ret;
 }
 
+
 /*NUMPY_API
  * Concatenate
  *
  * Concatenate an arbitrary Python sequence into an array.
  * op is a python object supporting the sequence interface.
  * Its elements will be concatenated together to form a single
- * multidimensional array. If axis is MAX_DIMS or bigger, then
+ * multidimensional array. If axis is NPY_MAXDIMS or bigger, then
  * each sequence object will be flattened before concatenation
 */
 NPY_NO_EXPORT PyObject *
 PyArray_Concatenate(PyObject *op, int axis)
 {
-    PyArrayObject *ret, **mps;
-    PyObject *otmp;
-    int i, n, tmp, nd = 0, new_dim;
-    char *data;
-    PyTypeObject *subtype;
-    double prior1, prior2;
-    npy_intp numbytes;
+    int iarrays, narrays;
+    PyArrayObject **arrays;
+    PyArrayObject *ret;
 
-    n = PySequence_Length(op);
-    if (n == -1) {
+    /* Convert the input list into arrays */
+    narrays = PySequence_Size(op);
+    if (narrays < 0) {
         return NULL;
     }
-    if (n == 0) {
-        PyErr_SetString(PyExc_ValueError,
-                        "concatenation of zero-length sequences is "\
-                        "impossible");
+    arrays = PyArray_malloc(narrays * sizeof(arrays[0]));
+    if (arrays == NULL) {
+        PyErr_NoMemory();
         return NULL;
     }
-
-    if ((axis < 0) || ((0 < axis) && (axis < MAX_DIMS))) {
-        return _swap_and_concat(op, axis, n);
-    }
-    mps = PyArray_ConvertToCommonType(op, &n);
-    if (mps == NULL) {
-        return NULL;
-    }
-
-    /*
-     * Make sure these arrays are legal to concatenate.
-     * Must have same dimensions except d0
-     */
-    prior1 = PyArray_PRIORITY;
-    subtype = &PyArray_Type;
-    ret = NULL;
-    for (i = 0; i < n; i++) {
-        if (axis >= MAX_DIMS) {
-            otmp = PyArray_Ravel(mps[i],0);
-            Py_DECREF(mps[i]);
-            mps[i] = (PyArrayObject *)otmp;
-        }
-        if (Py_TYPE(mps[i]) != subtype) {
-            prior2 = PyArray_GetPriority((PyObject *)(mps[i]), 0.0);
-            if (prior2 > prior1) {
-                prior1 = prior2;
-                subtype = Py_TYPE(mps[i]);
-            }
-        }
-    }
-
-    new_dim = 0;
-    for (i = 0; i < n; i++) {
-        if (mps[i] == NULL) {
+    for (iarrays = 0; iarrays < narrays; ++iarrays) {
+        PyObject *item = PySequence_GetItem(op, iarrays);
+        if (item == NULL) {
+            narrays = iarrays;
             goto fail;
         }
-        if (i == 0) {
-            nd = PyArray_NDIM(mps[i]);
-        }
-        else {
-            if (nd != PyArray_NDIM(mps[i])) {
-                PyErr_SetString(PyExc_ValueError,
-                                "arrays must have same "\
-                                "number of dimensions");
-                goto fail;
-            }
-            if (!PyArray_CompareLists(PyArray_DIMS(mps[0])+1,
-                                      PyArray_DIMS(mps[i])+1,
-                                      nd-1)) {
-                PyErr_SetString(PyExc_ValueError,
-                                "array dimensions must "\
-                                "agree except for d_0");
-                goto fail;
-            }
-        }
-        if (nd == 0) {
-            PyErr_SetString(PyExc_ValueError,
-                            "0-d arrays can't be concatenated");
+        arrays[iarrays] = (PyArrayObject *)PyArray_FromAny(item, NULL,
+                                            0, 0, NPY_ARRAY_ALLOWNA, NULL);
+        Py_DECREF(item);
+        if (arrays[iarrays] == NULL) {
+            narrays = iarrays;
             goto fail;
         }
-        new_dim += PyArray_DIMS(mps[i])[0];
     }
-    tmp = PyArray_DIMS(mps[0])[0];
-    PyArray_DIMS(mps[0])[0] = new_dim;
-    Py_INCREF(PyArray_DESCR(mps[0]));
-    ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
-                                                PyArray_DESCR(mps[0]), nd,
-                                                PyArray_DIMS(mps[0]),
-                                                NULL, NULL, 0,
-                                                (PyObject *)ret);
-    PyArray_DIMS(mps[0])[0] = tmp;
 
-    if (ret == NULL) {
-        goto fail;
+    if (axis >= NPY_MAXDIMS) {
+        ret = PyArray_ConcatenateFlattenedArrays(narrays, arrays, NPY_CORDER);
     }
-    data = PyArray_DATA(ret);
-    for (i = 0; i < n; i++) {
-        numbytes = PyArray_NBYTES(mps[i]);
-        memcpy(data, PyArray_DATA(mps[i]), numbytes);
-        data += numbytes;
+    else {
+        ret = PyArray_ConcatenateArrays(narrays, arrays, axis);
     }
 
-    PyArray_INCREF(ret);
-    for (i = 0; i < n; i++) {
-        Py_XDECREF(mps[i]);
+    for (iarrays = 0; iarrays < narrays; ++iarrays) {
+        Py_DECREF(arrays[iarrays]);
     }
-    PyDataMem_FREE(mps);
+
     return (PyObject *)ret;
 
- fail:
-    Py_XDECREF(ret);
-    for (i = 0; i < n; i++) {
-        Py_XDECREF(mps[i]);
+fail:
+    /* 'narrays' was set to how far we got in the conversion */
+    for (iarrays = 0; iarrays < narrays; ++iarrays) {
+        Py_DECREF(arrays[iarrays]);
     }
-    PyDataMem_FREE(mps);
+
     return NULL;
 }
 
@@ -652,7 +831,7 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2)
     int typenum, nd, axis;
     npy_intp is1, is2, os;
     char *op;
-    npy_intp dimensions[MAX_DIMS];
+    npy_intp dimensions[NPY_MAXDIMS];
     PyArray_DotFunc *dot;
     PyArray_Descr *typec;
     NPY_BEGIN_THREADS_DEF;
@@ -761,7 +940,7 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out)
     int typenum, nd, axis, matchDim;
     npy_intp is1, is2, os;
     char *op;
-    npy_intp dimensions[MAX_DIMS];
+    npy_intp dimensions[NPY_MAXDIMS];
     PyArray_DotFunc *dot;
     PyArray_Descr *typec;
     NPY_BEGIN_THREADS_DEF;
@@ -1510,8 +1689,8 @@ PyArray_EquivTypenums(int typenum1, int typenum2)
 static PyObject *
 _prepend_ones(PyArrayObject *arr, int nd, int ndmin)
 {
-    npy_intp newdims[MAX_DIMS];
-    npy_intp newstrides[MAX_DIMS];
+    npy_intp newdims[NPY_MAXDIMS];
+    npy_intp newstrides[NPY_MAXDIMS];
     int i, k, num;
     PyArrayObject *ret;
     PyArray_Descr *dtype;
@@ -1723,7 +1902,7 @@ array_copyto(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
             goto fail;
         }
         wheremask = (PyArrayObject *)PyArray_FromAny(wheremask_in,
-                                                dtype, 0, 0, 0, NULL);
+                                        dtype, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
         if (wheremask == NULL) {
             goto fail;
         }
@@ -1879,7 +2058,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
     }
     else {
         if (obj == NULL) {
-            dptr = _pya_malloc(typecode->elsize);
+            dptr = PyArray_malloc(typecode->elsize);
             if (dptr == NULL) {
                 return PyErr_NoMemory();
             }
@@ -1904,7 +2083,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
 
     /* free dptr which contains zeros */
     if (alloc) {
-        _pya_free(dptr);
+        PyArray_free(dptr);
     }
     return ret;
 }
@@ -2716,7 +2895,7 @@ static PyObject *
 array_set_datetimeparse_function(PyObject *NPY_UNUSED(self),
         PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds))
 {
-    PyErr_SetString(PyExc_RuntimeError, "This function is to be removed");
+    PyErr_SetString(PyExc_RuntimeError, "This function has been removed");
     return NULL;
 }
 
@@ -2823,7 +3002,7 @@ array_can_cast_safely(PyObject *NPY_UNUSED(self), PyObject *args,
                                 PyArray_IsPythonNumber(from_obj)) {
         PyArrayObject *arr;
         arr = (PyArrayObject *)PyArray_FromAny(from_obj,
-                                        NULL, 0, 0, 0, NULL);
+                                        NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
         if (arr == NULL) {
             goto finish;
         }
@@ -2885,7 +3064,8 @@ array_min_scalar_type(PyObject *NPY_UNUSED(dummy), PyObject *args)
         return NULL;
     }
 
-    array = (PyArrayObject *)PyArray_FromAny(array_in, NULL, 0, 0, 0, NULL);
+    array = (PyArrayObject *)PyArray_FromAny(array_in,
+                                NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
     if (array == NULL) {
         return NULL;
     }
@@ -2930,7 +3110,7 @@ array_result_type(PyObject *NPY_UNUSED(dummy), PyObject *args)
                 goto finish;
             }
             arr[narr] = (PyArrayObject *)PyArray_FromAny(obj,
-                                            NULL, 0, 0, 0, NULL);
+                                        NULL, 0, 0, NPY_ARRAY_ALLOWNA, NULL);
             if (arr[narr] == NULL) {
                 goto finish;
             }
@@ -3027,17 +3207,17 @@ _SigSegv_Handler(int signum)
 }
 #endif
 
-#define _test_code() {                          \
-        test = *((char*)memptr);                \
-        if (!ro) {                              \
-            *((char *)memptr) = '\0';           \
-            *((char *)memptr) = test;           \
-        }                                       \
-        test = *((char*)memptr+size-1);         \
-        if (!ro) {                              \
-            *((char *)memptr+size-1) = '\0';    \
-            *((char *)memptr+size-1) = test;    \
-        }                                       \
+#define _test_code() { \
+        test = *((char*)memptr); \
+        if (!ro) { \
+            *((char *)memptr) = '\0'; \
+            *((char *)memptr) = test; \
+        } \
+        test = *((char*)memptr+size-1); \
+        if (!ro) { \
+            *((char *)memptr+size-1) = '\0'; \
+            *((char *)memptr+size-1) = test; \
+        } \
     }
 
 static PyObject *
@@ -3901,7 +4081,7 @@ PyMODINIT_FUNC initmultiarray(void) {
     if (!d) {
         goto err;
     }
-    PyArray_Type.tp_free = _pya_free;
+    PyArray_Type.tp_free = PyArray_free;
     if (PyType_Ready(&PyArray_Type) < 0) {
         return RETVAL;
     }
@@ -3911,7 +4091,7 @@ PyMODINIT_FUNC initmultiarray(void) {
     PyArrayIter_Type.tp_iter = PyObject_SelfIter;
     NpyIter_Type.tp_iter = PyObject_SelfIter;
     PyArrayMultiIter_Type.tp_iter = PyObject_SelfIter;
-    PyArrayMultiIter_Type.tp_free = _pya_free;
+    PyArrayMultiIter_Type.tp_free = PyArray_free;
     if (PyType_Ready(&PyArrayIter_Type) < 0) {
         return RETVAL;
     }
diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c
index 6d7677d5e..0cb05beab 100644
--- a/numpy/core/src/multiarray/na_mask.c
+++ b/numpy/core/src/multiarray/na_mask.c
@@ -294,7 +294,8 @@ PyArray_AllocateMaskNA(PyArrayObject *arr,
         shape = fa->dimensions;
 
         /* This causes the NA mask and data memory orderings to match */
-        PyArray_CreateSortedStridePerm(fa->nd, fa->strides, strideperm);
+        PyArray_CreateSortedStridePerm(fa->nd, fa->dimensions,
+                                            fa->strides, strideperm);
         stride = maskna_dtype->elsize;
         for (i = fa->nd-1; i >= 0; --i) {
             npy_intp i_perm = strideperm[i].perm;
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 07efba65c..93c11b60d 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -2369,7 +2369,7 @@ npyiter_reverse_axis_ordering(NpyIter *iter)
     NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
 }
 
-static npy_intp
+static NPY_INLINE npy_intp
 intp_abs(npy_intp x)
 {
     return (x < 0) ? -x : x;
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 4f84fe347..b49b4d988 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -876,10 +876,7 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b)
         bstride = -bstride;
     }
 
-    if (astride > bstride) {
-        return -1;
-    }
-    else if (astride == bstride) {
+    if (astride == bstride || astride == 0 || bstride == 0) {
         /*
          * Make the qsort stable by next comparing the perm order.
          * (Note that two perm entries will never be equal)
@@ -888,6 +885,9 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b)
                 bperm = ((npy_stride_sort_item *)b)->perm;
         return (aperm < bperm) ? -1 : 1;
     }
+    if (astride > bstride) {
+        return -1;
+    }
     else {
         return 1;
     }
@@ -901,22 +901,123 @@ int _npy_stride_sort_item_comparator(const void *a, const void *b)
  * [(2, 12), (0, 4), (1, -2)].
  */
 NPY_NO_EXPORT void
-PyArray_CreateSortedStridePerm(int ndim, npy_intp * strides,
-                           npy_stride_sort_item *strideperm)
+PyArray_CreateSortedStridePerm(int ndim, npy_intp *shape,
+                        npy_intp *strides,
+                        npy_stride_sort_item *out_strideperm)
 {
     int i;
 
     /* Set up the strideperm values */
     for (i = 0; i < ndim; ++i) {
-        strideperm[i].perm = i;
-        strideperm[i].stride = strides[i];
+        out_strideperm[i].perm = i;
+        if (shape[i] == 1) {
+            out_strideperm[i].stride = 0;
+        }
+        else {
+            out_strideperm[i].stride = strides[i];
+        }
     }
 
     /* Sort them */
-    qsort(strideperm, ndim, sizeof(npy_stride_sort_item),
+    qsort(out_strideperm, ndim, sizeof(npy_stride_sort_item),
                                     &_npy_stride_sort_item_comparator);
 }
 
+static NPY_INLINE npy_intp
+intp_abs(npy_intp x)
+{
+    return (x < 0) ? -x : x;
+}
+
+
+
+/*
+ * Creates a sorted stride perm matching the KEEPORDER behavior
+ * of the NpyIter object. Because this operates based on multiple
+ * input strides, the 'stride' member of the npy_stride_sort_item
+ * would be useless and we simply argsort a list of indices instead.
+ *
+ * The caller should have already validated that 'ndim' matches for
+ * every array in the arrays list.
+ */
+NPY_NO_EXPORT void
+PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays,
+                        int ndim, int *out_strideperm)
+{
+    int i0, i1, ipos, j0, j1, iarrays;
+
+    /* Initialize the strideperm values to the identity. */
+    for (i0 = 0; i0 < ndim; ++i0) {
+        out_strideperm[i0] = i0;
+    }
+
+    /*
+     * This is the same as the custom stable insertion sort in
+     * the NpyIter object, but sorting in the reverse order as
+     * in the iterator. The iterator sorts from smallest stride
+     * to biggest stride (Fortran order), whereas here we sort
+     * from biggest stride to smallest stride (C order).
+     */
+    for (i0 = 1; i0 < ndim; ++i0) {
+
+        ipos = i0;
+        j0 = out_strideperm[i0];
+
+        for (i1 = i0 - 1; i1 >= 0; --i1) {
+            int ambig = 1, shouldswap = 0;
+
+            j1 = out_strideperm[i1];
+
+            for (iarrays = 0; iarrays < narrays; ++iarrays) {
+                if (PyArray_SHAPE(arrays[iarrays])[j0] != 1 &&
+                            PyArray_SHAPE(arrays[iarrays])[j1] != 1) {
+                    if (intp_abs(PyArray_STRIDES(arrays[iarrays])[j0]) <=
+                            intp_abs(PyArray_STRIDES(arrays[iarrays])[j1])) {
+                        /*
+                         * Set swap even if it's not ambiguous already,
+                         * because in the case of conflicts between
+                         * different operands, C-order wins.
+                         */
+                        shouldswap = 0;
+                    }
+                    else {
+                        /* Only set swap if it's still ambiguous */
+                        if (ambig) {
+                            shouldswap = 1;
+                        }
+                    }
+
+                    /*
+                     * A comparison has been done, so it's
+                     * no longer ambiguous
+                     */
+                    ambig = 0;
+                }
+            }
+            /*
+             * If the comparison was unambiguous, either shift
+             * 'ipos' to 'i1' or stop looking for an insertion point
+             */
+            if (!ambig) {
+                if (shouldswap) {
+                    ipos = i1;
+                }
+                else {
+                    break;
+                }
+            }
+        }
+
+        /* Insert out_strideperm[i0] into the right place */
+        if (ipos != i0) {
+            for (i1 = i0; i1 > ipos; --i1) {
+                out_strideperm[i1] = out_strideperm[i1-1];
+            }
+            out_strideperm[ipos] = j0;
+        }
+    }
+}
+
 /*NUMPY_API
  * Ravel
  * Returns a contiguous array
@@ -953,8 +1054,8 @@ PyArray_Ravel(PyArrayObject *a, NPY_ORDER order)
         npy_intp stride;
         int i, ndim = PyArray_NDIM(a);
 
-        PyArray_CreateSortedStridePerm(PyArray_NDIM(a), PyArray_STRIDES(a),
-                                        strideperm);
+        PyArray_CreateSortedStridePerm(PyArray_NDIM(a), PyArray_SHAPE(a),
+                                PyArray_STRIDES(a), strideperm);
 
         stride = PyArray_DESCR(a)->elsize;
         for (i = ndim-1; i >= 0; --i) {
diff --git a/numpy/core/src/multiarray/shape.h b/numpy/core/src/multiarray/shape.h
index cffd847e2..a293254a7 100644
--- a/numpy/core/src/multiarray/shape.h
+++ b/numpy/core/src/multiarray/shape.h
@@ -8,4 +8,18 @@
 NPY_NO_EXPORT PyObject *
 build_shape_string(npy_intp n, npy_intp *vals);
 
+/*
+ * Creates a sorted stride perm matching the KEEPORDER behavior
+ * of the NpyIter object. Because this operates based on multiple
+ * input strides, the 'stride' member of the npy_stride_sort_item
+ * would be useless and we simply argsort a list of indices instead.
+ *
+ * The caller should have already validated that 'ndim' matches for
+ * every array in the arrays list.
+ */
+NPY_NO_EXPORT void
+PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays,
+                        int ndim, int *out_strideperm);
+
+
 #endif
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index ee644569a..e4d5edca3 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2582,13 +2582,12 @@ allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags,
     int idim, ndim = PyArray_NDIM(arr);
 
     if (dtype == NULL) {
-        dtype = PyArray_DESCR(arr);
+        dtype = PyArray_DTYPE(arr);
         Py_INCREF(dtype);
     }
 
-    PyArray_CreateSortedStridePerm(PyArray_NDIM(arr),
-                                    PyArray_STRIDES(arr),
-                                    strideperm);
+    PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), PyArray_SHAPE(arr),
+                                    PyArray_STRIDES(arr), strideperm);
 
     /* Build the new strides and shape */
     stride = dtype->elsize;
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index 85bcde99c..8c0f9272a 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -920,5 +920,28 @@ def test_array_maskna_diagonal():
     res = a.diagonal(3)
     assert_equal(res, [])
 
+def test_array_maskna_concatenate():
+    # np.concatenate
+    a = np.arange(6, maskna=True, dtype='i4').reshape(2,3)
+    a[1,0] = np.NA
+
+    b = np.array([[12],[13]], dtype='i4')
+    res = np.concatenate([a, b], axis=1)
+    assert_equal(np.isna(res), [[0,0,0,0], [1,0,0,0]])
+    assert_equal(res[~np.isna(res)], [0,1,2,12,4,5,13])
+    assert_equal(res.strides, (16, 4))
+
+    b = np.array([[10, np.NA, 11]], maskna=True, dtype='i4')
+    res = np.concatenate([a,b], axis=0)
+    assert_equal(np.isna(res), [[0,0,0], [1,0,0], [0,1,0]])
+    assert_equal(res[~np.isna(res)], [0,1,2,4,5,10,11])
+    assert_equal(res.strides, (12, 4))
+
+    b = np.array([[np.NA, 10]], order='F', maskna=True, dtype='i4')
+    res = np.concatenate([a.T, b], axis=0)
+    assert_equal(np.isna(res), [[0,1], [0,0], [0,0], [1,0]])
+    assert_equal(res[~np.isna(res)], [0,1,4,2,5,10])
+    assert_equal(res.strides, (4, 16))
+
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index f282dbe25..17a9cbbde 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -141,3 +141,5 @@ class TestVstack(TestCase):
         desired = array([[1,2],[1,2]])
         assert_array_equal(res,desired)
 
+if __name__ == "__main__":
+    run_module_suite()
author	Mark Wiebe <mwwiebe@gmail.com>	2011-08-16 16:54:16 -0700
committer	Charles Harris <charlesr.harris@gmail.com>	2011-08-27 07:26:55 -0600
commit	9194b3af704df71aa9b1ff2f53f169848d0f9dc7 (patch)
tree	1e6624032156dfa190e1adba79cc76e5700f19ce
parent	99a21efff4b1f2292dc370c7c9c7c58f10385f2a (diff)
download	numpy-9194b3af704df71aa9b1ff2f53f169848d0f9dc7.tar.gz