11 files changed, 556 insertions, 383 deletions
diff --git a/numpy/core/SConscript b/numpy/core/SConscript
index 1a0171224..2afe02477 100644
--- a/numpy/core/SConscript
+++ b/numpy/core/SConscript
@@ -468,6 +468,7 @@ if ENABLE_SEPARATE_COMPILATION:
         pjoin('src', 'multiarray', 'item_selection.c'),
         pjoin('src', 'multiarray', 'calculation.c'),
         pjoin('src', 'multiarray', 'common.c'),
+        pjoin('src', 'multiarray', 'reduction.c'),
         pjoin('src', 'multiarray', 'refcount.c'),
         pjoin('src', 'multiarray', 'conversion_utils.c'),
         pjoin('src', 'multiarray', 'usertypes.c'),
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index b3b56fa47..fdf043ddb 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -21,43 +21,44 @@ from os.path import join
 __docformat__ = 'restructuredtext'
 
 # The files under src/ that are scanned for API functions
-API_FILES = [join('multiarray', 'methods.c'),
-             join('multiarray', 'arrayobject.c'),
+API_FILES = [join('multiarray', 'array_assign_array.c'),
              join('multiarray', 'array_assign_scalar.c'),
-             join('multiarray', 'array_assign_array.c'),
-             join('multiarray', 'flagsobject.c'),
-             join('multiarray', 'descriptor.c'),
-             join('multiarray', 'iterators.c'),
-             join('multiarray', 'getset.c'),
-             join('multiarray', 'number.c'),
-             join('multiarray', 'sequence.c'),
-             join('multiarray', 'ctors.c'),
-             join('multiarray', 'convert.c'),
-             join('multiarray', 'shape.c'),
-             join('multiarray', 'item_selection.c'),
-             join('multiarray', 'convert_datatype.c'),
+             join('multiarray', 'arrayobject.c'),
              join('multiarray', 'arraytypes.c.src'),
-             join('multiarray', 'multiarraymodule.c'),
-             join('multiarray', 'scalartypes.c.src'),
-             join('multiarray', 'scalarapi.c'),
+             join('multiarray', 'buffer.c'),
              join('multiarray', 'calculation.c'),
-             join('multiarray', 'usertypes.c'),
-             join('multiarray', 'refcount.c'),
              join('multiarray', 'conversion_utils.c'),
-             join('multiarray', 'buffer.c'),
+             join('multiarray', 'convert.c'),
+             join('multiarray', 'convert_datatype.c'),
+             join('multiarray', 'ctors.c'),
              join('multiarray', 'datetime.c'),
-             join('multiarray', 'datetime_strings.c'),
              join('multiarray', 'datetime_busday.c'),
              join('multiarray', 'datetime_busdaycal.c'),
+             join('multiarray', 'datetime_strings.c'),
+             join('multiarray', 'descriptor.c'),
+             join('multiarray', 'einsum.c.src'),
+             join('multiarray', 'flagsobject.c'),
+             join('multiarray', 'getset.c'),
+             join('multiarray', 'item_selection.c'),
+             join('multiarray', 'iterators.c'),
+             join('multiarray', 'methods.c'),
+             join('multiarray', 'multiarraymodule.c'),
+             join('multiarray', 'na_mask.c'),
              join('multiarray', 'nditer_api.c'),
              join('multiarray', 'nditer_constr.c'),
              join('multiarray', 'nditer_pywrap.c'),
              join('multiarray', 'nditer_templ.c.src'),
-             join('multiarray', 'einsum.c.src'),
-             join('multiarray', 'na_mask.c'),
+             join('multiarray', 'number.c'),
+             join('multiarray', 'reduction.c'),
+             join('multiarray', 'refcount.c'),
+             join('multiarray', 'scalartypes.c.src'),
+             join('multiarray', 'scalarapi.c'),
+             join('multiarray', 'sequence.c'),
+             join('multiarray', 'shape.c'),
+             join('multiarray', 'usertypes.c'),
+             join('umath', 'loops.c.src'),
              join('umath', 'ufunc_object.c'),
              join('umath', 'ufunc_type_resolution.c'),
-             join('umath', 'loops.c.src'),
             ]
 THIS_DIR = os.path.dirname(__file__)
 API_FILES = [os.path.join(THIS_DIR, '..', 'src', a) for a in API_FILES]
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index dc73df1b3..f963fa783 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -332,6 +332,9 @@ multiarray_funcs_api = {
     'PyArray_AssignMaskNA':                 293,
     'PyArray_AssignRawScalar':              294,
     'PyArray_AssignArray':                  295,
+    'PyArray_CreateReduceResult':           296,
+    'PyArray_InitializeReduceResult':       297,
+    'PyArray_RemoveAxesInPlace':            298
 }
 
 ufunc_types_api = {
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 8ca28bfde..f11fcfedb 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -719,6 +719,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'numpymemoryview.h'),
             join('src', 'multiarray', 'number.h'),
             join('src', 'multiarray', 'numpyos.h'),
+            join('src', 'multiarray', 'reduction.h'),
             join('src', 'multiarray', 'refcount.h'),
             join('src', 'multiarray', 'scalartypes.h'),
             join('src', 'multiarray', 'sequence.h'),
@@ -786,6 +787,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'number.c'),
             join('src', 'multiarray', 'numpymemoryview.c'),
             join('src', 'multiarray', 'numpyos.c'),
+            join('src', 'multiarray', 'reduction.c'),
             join('src', 'multiarray', 'refcount.c'),
             join('src', 'multiarray', 'sequence.c'),
             join('src', 'multiarray', 'shape.c'),
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 894e3fd22..186da0b9c 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -3942,3 +3942,26 @@ _array_fill_strides(npy_intp *strides, npy_intp *dims, int nd, size_t itemsize,
     }
     return itemsize;
 }
+
+/*
+ * Calls arr_of_subclass.__array_wrap__(towrap), in order to make 'towrap'
+ * have the same ndarray subclass as 'arr_of_subclass'.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_SubclassWrap(PyArrayObject *arr_of_subclass, PyArrayObject *towrap)
+{
+    PyObject *wrapped = PyObject_CallMethod((PyObject *)arr_of_subclass,
+                                        "__array_wrap__", "O", towrap);
+    if (wrapped == NULL) {
+        return NULL;
+    }
+    if (!PyArray_Check(wrapped)) {
+        PyErr_SetString(PyExc_RuntimeError,
+                "ndarray subclass __array_wrap__ method returned an "
+                "object which was not an instance of an ndarray subclass");
+        Py_DECREF(wrapped);
+        return NULL;
+    }
+
+    return (PyArrayObject *)wrapped;
+}
diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h
index d0a1ba687..e12d153ca 100644
--- a/numpy/core/src/multiarray/ctors.h
+++ b/numpy/core/src/multiarray/ctors.h
@@ -92,4 +92,12 @@ PyArray_GetArrayParamsFromObjectEx(PyObject *op,
 NPY_NO_EXPORT int
 _arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2);
 
+/*
+ * Calls arr_of_subclass.__array_wrap__(towrap), in order to make 'towrap'
+ * have the same ndarray subclass as 'arr_of_subclass'.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_SubclassWrap(PyArrayObject *arr_of_subclass, PyArrayObject *towrap);
+
+
 #endif
diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c
new file mode 100644
index 000000000..09070cb72
--- /dev/null
+++ b/numpy/core/src/multiarray/reduction.c
@@ -0,0 +1,402 @@
+/*
+ * This file implements generic methods for computing reductions on arrays.
+ *
+ * Written by Mark Wiebe (mwwiebe@gmail.com)
+ * Copyright (c) 2011 by Enthought, Inc.
+ *
+ * See LICENSE.txt for the license.
+ */
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API
+#define _MULTIARRAYMODULE
+#include <numpy/arrayobject.h>
+
+#include "npy_config.h"
+#include "numpy/npy_3kcompat.h"
+
+#include "reduction.h"
+
+/*
+ * Allocates a result array for a reduction operation, with
+ * dimensions matching 'arr' except set to 1 with 0 stride
+ * whereever axis_flags is True. Dropping the reduction axes
+ * from the result must be done later by the caller once the
+ * computation is complete.
+ *
+ * This function never adds an NA mask to the allocated
+ * result, that is the responsibility of the caller. It also
+ * always allocates a base class ndarray.
+ *
+ * If 'dtype' isn't NULL, this function steals its reference.
+ */
+static PyArrayObject *
+allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags,
+                        PyArray_Descr *dtype)
+{
+    npy_intp strides[NPY_MAXDIMS], stride;
+    npy_intp shape[NPY_MAXDIMS], *arr_shape = PyArray_DIMS(arr);
+    npy_stride_sort_item strideperm[NPY_MAXDIMS];
+    int idim, ndim = PyArray_NDIM(arr);
+
+    if (dtype == NULL) {
+        dtype = PyArray_DTYPE(arr);
+        Py_INCREF(dtype);
+    }
+
+    PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), PyArray_SHAPE(arr),
+                                    PyArray_STRIDES(arr), strideperm);
+
+    /* Build the new strides and shape */
+    stride = dtype->elsize;
+    memcpy(shape, arr_shape, ndim * sizeof(shape[0]));
+    for (idim = ndim-1; idim >= 0; --idim) {
+        npy_intp i_perm = strideperm[idim].perm;
+        if (axis_flags[i_perm]) {
+            strides[i_perm] = 0;
+            shape[i_perm] = 1;
+        }
+        else {
+            strides[i_perm] = stride;
+            stride *= shape[i_perm];
+        }
+    }
+
+    /* Finally, allocate the array */
+    return (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype,
+                                    ndim, shape, strides,
+                                    NULL, 0, NULL);
+}
+
+/*
+ * Conforms an output parameter 'out' to have 'ndim' dimensions
+ * with dimensions of size one added in the appropriate places
+ * indicated by 'axis_flags'.
+ *
+ * The return value is a view into 'out'.
+ */
+static PyArrayObject *
+conform_reduce_result(int ndim, npy_bool *axis_flags,
+                        PyArrayObject *out, const char *funcname)
+{
+    npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS];
+    npy_intp *strides_out = PyArray_STRIDES(out);
+    npy_intp *shape_out = PyArray_DIMS(out);
+    int idim, idim_out, ndim_out = PyArray_NDIM(out);
+    PyArray_Descr *dtype;
+    PyArrayObject_fieldaccess *ret;
+
+    /* Construct the strides and shape */
+    idim_out = 0;
+    for (idim = 0; idim < ndim; ++idim) {
+        if (axis_flags[idim]) {
+            strides[idim] = 0;
+            shape[idim] = 1;
+        }
+        else {
+            if (idim_out >= ndim_out) {
+                PyErr_Format(PyExc_ValueError,
+                        "output parameter for reduction operation %s "
+                        "does not have enough dimensions", funcname);
+                return NULL;
+            }
+            strides[idim] = strides_out[idim_out];
+            shape[idim] = shape_out[idim_out];
+            ++idim_out;
+        }
+    }
+
+    if (idim_out != ndim_out) {
+        PyErr_Format(PyExc_ValueError,
+                "output parameter for reduction operation %s "
+                "has too many dimensions", funcname);
+        return NULL;
+    }
+
+    /* Allocate the view */
+    dtype = PyArray_DESCR(out);
+    Py_INCREF(dtype);
+    ret = (PyArrayObject_fieldaccess *)PyArray_NewFromDescr(&PyArray_Type,
+                               dtype,
+                               ndim, shape,
+                               strides,
+                               PyArray_DATA(out),
+               PyArray_FLAGS(out) & ~(NPY_ARRAY_MASKNA|NPY_ARRAY_OWNMASKNA),
+                               NULL);
+    if (ret == NULL) {
+        return NULL;
+    }
+    Py_INCREF(out);
+    if (PyArray_SetBaseObject((PyArrayObject *)ret, (PyObject *)out) < 0) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+
+    /* Take a view of the mask if it exists */
+    if (PyArray_HASMASKNA(out)) {
+        npy_intp *strides_ret = ret->maskna_strides;
+        strides_out = PyArray_MASKNA_STRIDES(out);
+        idim_out = 0;
+        for (idim = 0; idim < ndim; ++idim) {
+            if (axis_flags[idim]) {
+                strides_ret[idim] = 0;
+            }
+            else {
+                strides_ret[idim] = strides_out[idim_out];
+                ++idim_out;
+            }
+        }
+
+        ret->maskna_dtype = PyArray_MASKNA_DTYPE(out);
+        Py_INCREF(ret->maskna_dtype);
+        ret->maskna_data = PyArray_MASKNA_DATA(out);
+        ret->flags |= NPY_ARRAY_MASKNA;
+    }
+
+    return (PyArrayObject *)ret;
+}
+
+/*NUMPY_API
+ *
+ * Creates a result for reducing 'operand' along the axes specified
+ * in 'axis_flags'. If 'dtype' isn't NULL, this function steals a
+ * reference to 'dtype'.
+ *
+ * If 'out' isn't NULL, this function creates a view conforming
+ * to the number of dimensions of 'operand', adding a singleton dimension
+ * for each reduction axis specified. In this case, 'dtype' is ignored
+ * (but its reference is still stolen), and the caller must handle any
+ * type conversion/validity check for 'out'. When 'need_namask' is true,
+ * raises an exception if 'out' doesn't have an NA mask.
+ *
+ * If 'out' is NULL, it allocates a new array whose shape matches
+ * that of 'operand', except for at the reduction axes. An NA mask
+ * is added if 'need_namask' is true.  If 'dtype' is NULL, the dtype
+ * of 'operand' is used for the result.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
+                    PyArray_Descr *dtype, npy_bool *axis_flags,
+                    int need_namask, const char *funcname)
+{
+    PyArrayObject *result;
+
+    if (out == NULL) {
+        /* This function steals the reference to 'dtype' */
+        result = allocate_reduce_result(operand, axis_flags, dtype);
+
+        /* Allocate an NA mask if necessary */
+        if (need_namask && result != NULL) {
+            if (PyArray_AllocateMaskNA(result, 1, 0, 1) < 0) {
+                Py_DECREF(result);
+                return NULL;
+            }
+        }
+    }
+    else {
+        /* Steal the dtype reference */
+        Py_XDECREF(dtype);
+
+        if (need_namask && !PyArray_HASMASKNA(out)) {
+            PyErr_Format(PyExc_ValueError,
+                    "output parameter for reduction operation %s "
+                    "needs an NA mask, but the array provided does "
+                    "not have one", funcname);
+            return NULL;
+        }
+
+        result = conform_reduce_result(PyArray_NDIM(operand), axis_flags,
+                                        out, funcname);
+    }
+
+    return result;
+}
+
+/*NUMPY_API
+ *
+ * This function initializes a result array for a reduction operation
+ * which has no identity. This means it needs to copy the first element
+ * it sees along the reduction axes to result, then return a view of
+ * the operand which excludes that element.
+ *
+ * If a reduction has an identity, such as 0 or 1, the result should
+ * be initialized by calling PyArray_AssignZero(result, NULL, !skipna, NULL)
+ * or PyArray_AssignOne(result, NULL, !skipna, NULL), because this
+ * function raises an exception when there are no elements to reduce.
+ *
+ * For regular reduction, this means it copies the subarray indexed
+ * at zero along each reduction axis into 'result', then returns a view
+ * into 'operand' excluding those copied elements. If 'operand' has
+ * an NA mask in this case, the caller should have already done
+ * the reduction on the mask. This function copies the subarray with
+ * 'replacena' set to True, so that the already accumulated NA mask
+ * in result doesn't get overwritten.
+ *
+ * For 'skipna' reduction, this is more complicated. In the one dimensional
+ * case, it searches for the first non-NA element, copies that element
+ * to 'result', then returns a view into the rest of 'operand'. For
+ * multi-dimensional reductions, the initial elements may be scattered
+ * throughout the array.
+ *
+ * To deal with this, a view of 'operand' is taken, and given its own
+ * copy of the NA mask. Additionally, an array of flags is created,
+ * matching the shape of 'result', and initialized to all False.
+ * Then, the elements of the 'operand' view are gone through, and any time
+ * an exposed element is encounted which isn't already flagged in the
+ * auxiliary array, it is copied into 'result' and flagged as copied.
+ * The element is masked as an NA in the view of 'operand', so that the
+ * later reduction step will skip it during processing.
+ *
+ * result  : The array into which the result is computed. This must have
+ *           the same number of dimensions as 'operand', but for each
+ *           axis i where 'axis_flags[i]' is True, it has a single element.
+ * operand : The array being reduced.
+ * axis_flags : An array of boolean flags, one for each axis of 'operand'.
+ *              When a flag is True, it indicates to reduce along that axis.
+ * skipna  : If True, indicates that the reduction is being calculated
+ *           as if the NA values are being dropped from the computation
+ *           instead of accumulating into an NA result.
+ * funcname : The name of the reduction operation, for the purpose of
+ *            better quality error messages. For example, "numpy.max"
+ *            would be a good name for NumPy's max function.
+ *
+ * Returns a view which contains the remaining elements on which to do
+ * the reduction.
+ */
+NPY_NO_EXPORT PyArrayObject *
+PyArray_InitializeReduceResult(
+                    PyArrayObject *result, PyArrayObject *operand,
+                    npy_bool *axis_flags, int skipna, const char *funcname)
+{
+    npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS], shape0;
+    PyArrayObject *op_view = NULL;
+    int idim, ndim;
+
+    ndim = PyArray_NDIM(operand);
+
+    /*
+     * If 'skipna' is False, or 'operand' has no NA mask in which
+     * case the 'skipna' flag does nothing.
+     */
+    if (!skipna || !PyArray_HASMASKNA(operand)) {
+        if (PyArray_SIZE(operand) == 0) {
+            PyErr_Format(PyExc_ValueError,
+                    "zero-size array to reduction operation %s "
+                    "which has no identity",
+                    funcname);
+            return NULL;
+        }
+
+        /* Take a view into 'operand' which we can modify. */
+        op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type);
+        if (op_view == NULL) {
+            return NULL;
+        }
+    }
+    /*
+     * Otherwise 'skipna' is True and 'operand' has an NA mask. Deal
+     * with the simple one-dimensional case first
+     */
+    else if (ndim == 1) {
+        char *data, *maskna_data;
+        npy_intp *maskna_strides;
+
+        ndim = PyArray_NDIM(operand);
+
+        op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type);
+        if (op_view == NULL) {
+            return NULL;
+        }
+
+        shape = PyArray_DIMS(op_view);
+        shape0 = shape[0];
+        data = PyArray_DATA(op_view);
+        strides = PyArray_STRIDES(op_view);
+        maskna_data = PyArray_MASKNA_DATA(op_view);
+        maskna_strides = PyArray_MASKNA_STRIDES(op_view);
+
+        /* Shrink the array from the start until we find an exposed element */
+        while (shape0 > 0 &&
+                    !NpyMaskValue_IsExposed((npy_mask)*maskna_data)) {
+            --shape0;
+            data += strides[0];
+            maskna_data += maskna_strides[0];
+        }
+
+        if (shape0 == 0) {
+            Py_DECREF(op_view);
+            PyErr_Format(PyExc_ValueError,
+                    "fully NA array with skipna=True to reduction operation "
+                    "%s which has no identity", funcname);
+            return NULL;
+        }
+
+        /*
+         * With the first element exposed, fall through to the code
+         * which copies the element and adjusts the view just as in the
+         * non-skipna case.
+         */
+        shape[0] = shape0;
+        ((PyArrayObject_fieldaccess *)op_view)->data = data;
+        ((PyArrayObject_fieldaccess *)op_view)->maskna_data = maskna_data;
+    }
+    /*
+     * Here 'skipna' is True and 'operand' has an NA mask, but
+     * 'operand' has more than one dimension, so it's the complicated
+     * case
+     */
+    else {
+        PyErr_SetString(PyExc_ValueError,
+                    "skipna=True with a non-identity reduction "
+                    "and an array with ndim > 1 isn't implemented yet");
+            return NULL;
+    }
+
+    /*
+     * Now copy the subarray of the first element along each reduction axis,
+     * then return a view to the rest.
+     *
+     * Adjust the shape to only look at the first element along
+     * any of the reduction axes.
+     */
+    shape = PyArray_SHAPE(op_view);
+    memcpy(shape_orig, shape, ndim * sizeof(npy_intp));
+    for (idim = 0; idim < ndim; ++idim) {
+        if (axis_flags[idim]) {
+            shape[idim] = 1;
+        }
+    }
+
+    /*
+     * Copy the elements into the result to start, with
+     * 'preservena' set to True so that we don't overwrite
+     * what we already calculated in ReduceNAMask.
+     */
+    if (PyArray_AssignArray(result, op_view, NULL, NPY_UNSAFE_CASTING,
+                                                            1, NULL) < 0) {
+        Py_DECREF(op_view);
+        return NULL;
+    }
+
+    /* Adjust the shape to only look at the remaining elements */
+    strides = PyArray_STRIDES(op_view);
+    for (idim = 0; idim < ndim; ++idim) {
+        if (axis_flags[idim]) {
+            shape[idim] = shape_orig[idim] - 1;
+            ((PyArrayObject_fieldaccess *)op_view)->data += strides[idim];
+        }
+    }
+    if (PyArray_HASMASKNA(op_view)) {
+        strides = PyArray_MASKNA_STRIDES(op_view);
+        for (idim = 0; idim < ndim; ++idim) {
+            if (axis_flags[idim]) {
+                ((PyArrayObject_fieldaccess *)op_view)->maskna_data +=
+                                                            strides[idim];
+            }
+        }
+    }
+
+    return op_view;
+}
diff --git a/numpy/core/src/multiarray/reduction.h b/numpy/core/src/multiarray/reduction.h
new file mode 100644
index 000000000..f19909dbc
--- /dev/null
+++ b/numpy/core/src/multiarray/reduction.h
@@ -0,0 +1,4 @@
+#ifndef _NPY_PRIVATE__REDUCTION_H_
+#define _NPY_PRIVATE__REDUCTION_H_
+
+#endif
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index b49b4d988..496d32955 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -663,45 +663,48 @@ _fix_unknown_dimension(PyArray_Dims *newshape, npy_intp s_original)
 NPY_NO_EXPORT PyObject *
 PyArray_Squeeze(PyArrayObject *self)
 {
-    int nd = PyArray_NDIM(self);
-    int newnd = nd;
-    npy_intp dimensions[NPY_MAXDIMS];
-    npy_intp strides[NPY_MAXDIMS];
-    int i, j;
     PyArrayObject *ret;
-    PyArray_Descr *dtype;
-
-    if (nd == 0) {
-        Py_INCREF(self);
-        return (PyObject *)self;
-    }
-    for (j = 0, i = 0; i < nd; i++) {
-        if (PyArray_DIMS(self)[i] == 1) {
-            newnd -= 1;
+    npy_bool unit_dims[NPY_MAXDIMS];
+    int idim, ndim, any_ones;
+    npy_intp *shape;
+
+    ndim = PyArray_NDIM(self);
+    shape = PyArray_SHAPE(self);
+
+    any_ones = 0;
+    for (idim = 0; idim < ndim; ++idim) {
+        if (shape[idim] == 1) {
+            unit_dims[idim] = 1;
+            any_ones = 1;
         }
         else {
-            dimensions[j] = PyArray_DIMS(self)[i];
-            strides[j++] = PyArray_STRIDES(self)[i];
+            unit_dims[idim] = 0;
         }
     }
 
-    dtype = PyArray_DESCR(self);
-    Py_INCREF(dtype);
-    ret = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(self),
-                               dtype,
-                               newnd, dimensions,
-                               strides, PyArray_DATA(self),
-                               PyArray_FLAGS(self),
-                               (PyObject *)self);
+    /* If there were no ones to squeeze out, return the same array */
+    if (!any_ones) {
+        Py_INCREF(self);
+        return (PyObject *)self;
+    }
+
+    ret = (PyArrayObject *)PyArray_View(self, NULL, &PyArray_Type);
     if (ret == NULL) {
         return NULL;
     }
-    PyArray_CLEARFLAGS(ret, NPY_ARRAY_OWNDATA);
-    Py_INCREF(self);
-    if (PyArray_SetBaseObject(ret, (PyObject *)self) < 0) {
+
+    PyArray_RemoveAxesInPlace(ret, unit_dims);
+
+    /*
+     * If self isn't not a base class ndarray, call its
+     * __array_wrap__ method
+     */
+    if (Py_TYPE(self) != &PyArray_Type) {
+        PyArrayObject *tmp = PyArray_SubclassWrap(self, ret);
         Py_DECREF(ret);
-        return NULL;
+        ret = tmp;
     }
+
     return (PyObject *)ret;
 }
 
@@ -1185,3 +1188,45 @@ build_shape_string(npy_intp n, npy_intp *vals)
     PyUString_ConcatAndDel(&ret, tmp);
     return ret;
 }
+
+/*NUMPY_API
+ *
+ * Removes the axes flagged as True from the array,
+ * modifying it in place. If an axis flagged for removal
+ * has a shape entry bigger than one, this effectively selects
+ * index zero for that axis.
+ *
+ * For example, this can be used to remove the reduction axes
+ * from a reduction result once its computation is complete.
+ */
+NPY_NO_EXPORT void
+PyArray_RemoveAxesInPlace(PyArrayObject *arr, npy_bool *flags)
+{
+    PyArrayObject_fieldaccess *fa = (PyArrayObject_fieldaccess *)arr;
+    npy_intp *shape = fa->dimensions, *strides = fa->strides;
+    int idim, ndim = fa->nd, idim_out = 0;
+
+    /* Compress the dimensions and strides */
+    for (idim = 0; idim < ndim; ++idim) {
+        if (!flags[idim]) {
+            shape[idim_out] = shape[idim];
+            strides[idim_out] = strides[idim];
+            ++idim_out;
+        }
+    }
+
+    /* Compress the mask strides if the result has an NA mask */
+    if (PyArray_HASMASKNA(arr)) {
+        strides = fa->maskna_strides;
+        idim_out = 0;
+        for (idim = 0; idim < ndim; ++idim) {
+            if (!flags[idim]) {
+                strides[idim_out] = strides[idim];
+                ++idim_out;
+            }
+        }
+    }
+
+    /* The final number of dimensions */
+    fa->nd = idim_out;
+}
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index e4d5edca3..a4f9bb105 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2560,171 +2560,6 @@ get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr,
 }
 
 /*
- * Allocates a result array for a reduction operation, with
- * dimensions matching 'arr' except set to 1 with 0 stride
- * whereever axis_flags is True. Dropping the reduction axes
- * from the result must be done later by the caller once the
- * computation is complete.
- *
- * This function never adds an NA mask to the allocated
- * result, that is the responsibility of the caller. It also
- * always allocates a base class ndarray.
- *
- * If 'dtype' isn't NULL, this function steals its reference.
- */
-static PyArrayObject *
-allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags,
-                        PyArray_Descr *dtype)
-{
-    npy_intp strides[NPY_MAXDIMS], stride;
-    npy_intp shape[NPY_MAXDIMS], *arr_shape = PyArray_DIMS(arr);
-    npy_stride_sort_item strideperm[NPY_MAXDIMS];
-    int idim, ndim = PyArray_NDIM(arr);
-
-    if (dtype == NULL) {
-        dtype = PyArray_DTYPE(arr);
-        Py_INCREF(dtype);
-    }
-
-    PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), PyArray_SHAPE(arr),
-                                    PyArray_STRIDES(arr), strideperm);
-
-    /* Build the new strides and shape */
-    stride = dtype->elsize;
-    memcpy(shape, arr_shape, ndim * sizeof(shape[0]));
-    for (idim = ndim-1; idim >= 0; --idim) {
-        npy_intp i_perm = strideperm[idim].perm;
-        if (axis_flags[i_perm]) {
-            strides[i_perm] = 0;
-            shape[i_perm] = 1;
-        }
-        else {
-            strides[i_perm] = stride;
-            stride *= shape[i_perm];
-        }
-    }
-
-    /* Finally, allocate the array */
-    return (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype,
-                                    ndim, shape, strides,
-                                    NULL, 0, NULL);
-}
-
-/*
- * Conforms an output parameter 'out' to have 'ndim' dimensions
- * with dimensions of size one added in the appropriate places
- * indicated by 'axis_flags'.
- *
- * The return value is a view into 'out'.
- */
-static PyArrayObject *
-conform_reduce_result(int ndim, npy_bool *axis_flags, PyArrayObject *out)
-{
-    npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS];
-    npy_intp *strides_out = PyArray_STRIDES(out);
-    npy_intp *shape_out = PyArray_DIMS(out);
-    int idim, idim_out, ndim_out = PyArray_NDIM(out);
-    PyArray_Descr *dtype;
-    PyArrayObject_fieldaccess *ret;
-
-    /* Construct the strides and shape */
-    idim_out = 0;
-    for (idim = 0; idim < ndim; ++idim) {
-        if (axis_flags[idim]) {
-            strides[idim] = 0;
-            shape[idim] = 1;
-        }
-        else {
-            if (idim_out >= ndim_out) {
-                PyErr_SetString(PyExc_ValueError,
-                        "output parameter for reduce does not have "
-                        "enough dimensions");
-                return NULL;
-            }
-            strides[idim] = strides_out[idim_out];
-            shape[idim] = shape_out[idim_out];
-            ++idim_out;
-        }
-    }
-
-    if (idim_out != ndim_out) {
-        PyErr_SetString(PyExc_ValueError,
-                "output parameter for reduce has too many "
-                "dimensions");
-        return NULL;
-    }
-
-    /* Allocate the view */
-    dtype = PyArray_DESCR(out);
-    Py_INCREF(dtype);
-    ret = (PyArrayObject_fieldaccess *)PyArray_NewFromDescr(&PyArray_Type,
-                               dtype,
-                               ndim, shape,
-                               strides,
-                               PyArray_DATA(out),
-               PyArray_FLAGS(out) & ~(NPY_ARRAY_MASKNA|NPY_ARRAY_OWNMASKNA),
-                               NULL);
-    if (ret == NULL) {
-        return NULL;
-    }
-    Py_INCREF(out);
-    if (PyArray_SetBaseObject((PyArrayObject *)ret, (PyObject *)out) < 0) {
-        Py_DECREF(ret);
-        return NULL;
-    }
-
-    /* Take a view of the mask if it exists */
-    if (PyArray_HASMASKNA(out)) {
-        npy_intp *strides_ret = ret->maskna_strides;
-        strides_out = PyArray_MASKNA_STRIDES(out);
-        idim_out = 0;
-        for (idim = 0; idim < ndim; ++idim) {
-            if (axis_flags[idim]) {
-                strides_ret[idim] = 0;
-            }
-            else {
-                strides_ret[idim] = strides_out[idim_out];
-                ++idim_out;
-            }
-        }
-
-        ret->maskna_dtype = PyArray_MASKNA_DTYPE(out);
-        Py_INCREF(ret->maskna_dtype);
-        ret->maskna_data = PyArray_MASKNA_DATA(out);
-        ret->flags |= NPY_ARRAY_MASKNA;
-    }
-
-    return (PyArrayObject *)ret;
-}
-
-/* Allocate 'result' or conform 'out' to 'self' (in 'result') */
-static PyArrayObject *
-allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out,
-                        npy_bool *axis_flags, PyArray_Descr *otype_dtype,
-                        int addmask)
-{
-    PyArrayObject *result;
-
-    if (out == NULL) {
-        Py_XINCREF(otype_dtype);
-        result = allocate_reduce_result(arr, axis_flags, otype_dtype);
-
-        /* Allocate an NA mask if necessary */
-        if (addmask && result != NULL) {
-            if (PyArray_AllocateMaskNA(result, 1, 0, 1) < 0) {
-                Py_DECREF(result);
-                return NULL;
-            }
-        }
-    }
-    else {
-        result = conform_reduce_result(PyArray_NDIM(arr), axis_flags, out);
-    }
-
-    return result;
-}
-
-/*
  * Either:
  *   1) Fills 'result' with the identity, and returns a reference to 'arr'.
  *   2) Copies the first values along each reduction axis into 'result',
@@ -2735,10 +2570,6 @@ initialize_reduce_result(int identity, PyArrayObject *result,
                         npy_bool *axis_flags, PyArrayObject *arr,
                         int skipna, char *ufunc_name)
 {
-    npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS], shape0;
-    PyArrayObject *arr_view;
-    int idim, ndim;
-
     if (identity == PyUFunc_One) {
         if (PyArray_AssignOne(result, NULL, !skipna, NULL) < 0) {
             return NULL;
@@ -2753,165 +2584,10 @@ initialize_reduce_result(int identity, PyArrayObject *result,
         Py_INCREF(arr);
         return arr;
     }
-    /*
-     * With skipna=True and where 'arr' has an NA mask,
-     * need to do some additional fiddling if there's no unit.
-     */
-    else if (skipna && PyArray_HASMASKNA(arr)) {
-        char *data, *maskna_data;
-        npy_intp *maskna_strides;
-
-        ndim = PyArray_NDIM(arr);
-
-        /*
-         * Currently only supporting one dimension in this case.
-         */
-        if (ndim != 1) {
-            PyErr_SetString(PyExc_ValueError,
-                    "skipna=True with a non-identity reduction "
-                    "and an array with ndim > 1 isn't implemented yet");
-            return NULL;
-        }
-
-        arr_view = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type);
-        if (arr_view == NULL) {
-            return NULL;
-        }
-
-        shape = PyArray_DIMS(arr_view);
-        shape0 = shape[0];
-        data = PyArray_DATA(arr_view);
-        strides = PyArray_STRIDES(arr_view);
-        maskna_data = PyArray_MASKNA_DATA(arr_view);
-        maskna_strides = PyArray_MASKNA_STRIDES(arr_view);
-
-        /* Shrink the array from the start until we find an exposed element */
-        while (shape0 > 0 &&
-                    !NpyMaskValue_IsExposed((npy_mask)*maskna_data)) {
-            --shape0;
-            data += strides[0];
-            maskna_data += maskna_strides[0];
-        }
-
-        if (shape0 == 0) {
-            Py_DECREF(arr_view);
-            PyErr_Format(PyExc_ValueError,
-                    "fully NA array with skipna=True to "
-                    "%s.reduce which has no identity", ufunc_name);
-            return NULL;
-        }
-
-        /* With the first element exposed, fall through to the other code */
-        shape[0] = shape0;
-        ((PyArrayObject_fieldaccess *)arr_view)->data = data;
-        ((PyArrayObject_fieldaccess *)arr_view)->maskna_data = maskna_data;
-    }
-    /*
-     * If there is no identity, copy the first element along the
-     * reduction dimensions.
-     */
     else {
-        ndim = PyArray_NDIM(arr);
-
-        if (PyArray_SIZE(arr) == 0) {
-            PyErr_Format(PyExc_ValueError,
-                    "zero-size array to %s.reduce which has no identity",
-                    ufunc_name);
-            return NULL;
-        }
-
-        /*
-         * TODO: Should the ufunc tell us whether it's commutative
-         *       and/or associative, and this operation can be reordered?
-         *       When reducing more than one axis at a time, this is
-         *       important.
-         *
-         * Take a view into 'arr' which we can modify.
-         */
-        arr_view = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type);
-        if (arr_view == NULL) {
-            return NULL;
-        }
-    }
-
-    /*
-     * Adjust the shape to only look at the first element along
-     * any of the reduction axes.
-     */
-    shape = PyArray_DIMS(arr_view);
-    memcpy(shape_orig, shape, ndim * sizeof(npy_intp));
-    for (idim = 0; idim < ndim; ++idim) {
-        if (axis_flags[idim]) {
-            shape[idim] = 1;
-        }
-    }
-
-    /*
-     * Copy the elements into the result to start, with
-     * 'preservena' set to True so that we don't overwrite
-     * what we already calculated in ReduceNAMask.
-     */
-    if (PyArray_AssignArray(result, arr_view, NULL, NPY_UNSAFE_CASTING,
-                            1, NULL) < 0) {
-        Py_DECREF(arr_view);
-        return NULL;
+        return PyArray_InitializeReduceResult(
+                        result, arr, axis_flags, skipna, ufunc_name);
     }
-
-    /* Adjust the shape to only look at the remaining elements */
-    strides = PyArray_STRIDES(arr_view);
-    for (idim = 0; idim < ndim; ++idim) {
-        if (axis_flags[idim]) {
-            shape[idim] = shape_orig[idim] - 1;
-            ((PyArrayObject_fieldaccess *)arr_view)->data += strides[idim];
-        }
-    }
-    if (PyArray_HASMASKNA(arr_view)) {
-        strides = PyArray_MASKNA_STRIDES(arr_view);
-        for (idim = 0; idim < ndim; ++idim) {
-            if (axis_flags[idim]) {
-                ((PyArrayObject_fieldaccess *)arr_view)->maskna_data +=
-                                                            strides[idim];
-            }
-        }
-    }
-
-    return arr_view;
-}
-
-/*
- * Removes the dimensions flagged as True from the array,
- * modifying it in place.
- */
-static void
-strip_flagged_dimensions(PyArrayObject *arr, npy_bool *flags)
-{
-    PyArrayObject_fieldaccess *fa = (PyArrayObject_fieldaccess *)arr;
-    npy_intp *shape = fa->dimensions, *strides = fa->strides;
-    int idim, ndim = fa->nd, idim_out = 0;
-
-    /* Compress the dimensions and strides */
-    for (idim = 0; idim < ndim; ++idim) {
-        if (!flags[idim]) {
-            shape[idim_out] = shape[idim];
-            strides[idim_out] = strides[idim];
-            ++idim_out;
-        }
-    }
-
-    /* Compress the mask strides if the result has an NA mask */
-    if (PyArray_HASMASKNA(arr)) {
-        strides = fa->maskna_strides;
-        idim_out = 0;
-        for (idim = 0; idim < ndim; ++idim) {
-            if (!flags[idim]) {
-                strides[idim_out] = strides[idim];
-                ++idim_out;
-            }
-        }
-    }
-
-    /* The final number of dimensions */
-    fa->nd = idim_out;
 }
 
 /*
@@ -3045,8 +2721,10 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
     }
 
     /* Allocate an output or conform 'out' to 'self' */
-    result = allocate_or_conform_reduce_result(arr, out,
-                            axis_flags, otype_dtype, !skipna && use_maskna);
+    Py_XINCREF(otype_dtype);
+    result = PyArray_CreateReduceResult(arr, out,
+                            otype_dtype, axis_flags, !skipna && use_maskna,
+                            ufunc_name);
     if (result == NULL) {
         return NULL;
     }
@@ -3256,7 +2934,7 @@ finish:
 
     /* Strip out the extra 'one' dimensions in the result */
     if (out == NULL) {
-        strip_flagged_dimensions(result, axis_flags);
+        PyArray_RemoveAxesInPlace(result, axis_flags);
     }
     else {
         Py_DECREF(result);
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index ff1db7603..b839b661e 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -149,8 +149,6 @@ def test_array_maskna_item():
     a.itemset((1,1), np.NA)
     assert_(np.isna(a[1,1]))
 
-
-
 def test_array_maskna_payload():
     # Single numbered index
     a = np.zeros((2,), maskna=True)
@@ -962,6 +960,7 @@ def test_array_maskna_concatenate():
     assert_equal(res.strides, (4, 16))
 
 def test_array_maskna_column_stack():
+    # np.column_stack
     a = np.array((1,2,3), maskna=True)
     b = np.array((2,3,4), maskna=True)
     b[2] = np.NA
@@ -978,6 +977,13 @@ def test_array_maskna_compress():
     res = a.compress(mask)
     assert_equal(res, [1,2,3,4])
 
+def test_array_maskna_squeeze():
+    # np.squeeze
+    a = np.zeros((1,3,1,1,4,2,1), maskna=True)
+    a[0,1,0,0,3,0,0] = np.NA
+    res = np.squeeze(a)
+    assert_equal(res.shape, (3,4,2))
+    assert_(np.isna(res[1,3,0]))
 
 if __name__ == "__main__":
     run_module_suite()