diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-08-17 10:25:40 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:55 -0600 |
commit | a1faa1b6883c47333508a0476c1304b0a8a3f64e (patch) | |
tree | 24556096a9cf47bd97088b2c4ad452cff57e0c6f | |
parent | f597374edc298810083799e8539c99fc0a93b319 (diff) | |
download | numpy-a1faa1b6883c47333508a0476c1304b0a8a3f64e.tar.gz |
ENH: missingdata: Move some of the refactored reduction code into the API
Doing this so that it can be used both in multiarray and umath, to make
writing new reduction operations generally easier.
Also made PyArray_Squeeze work with NA-masked arrays.
-rw-r--r-- | doc/release/2.0.0-notes.rst | 20 | ||||
-rw-r--r-- | numpy/core/SConscript | 1 | ||||
-rw-r--r-- | numpy/core/code_generators/genapi.py | 49 | ||||
-rw-r--r-- | numpy/core/code_generators/numpy_api.py | 3 | ||||
-rw-r--r-- | numpy/core/setup.py | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 23 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.h | 8 | ||||
-rw-r--r-- | numpy/core/src/multiarray/reduction.c | 402 | ||||
-rw-r--r-- | numpy/core/src/multiarray/reduction.h | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.c | 101 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 336 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 10 |
12 files changed, 575 insertions, 384 deletions
diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst index d50797cb3..ca9bc4147 100644 --- a/doc/release/2.0.0-notes.rst +++ b/doc/release/2.0.0-notes.rst @@ -31,7 +31,7 @@ What works with NA: + ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod, ndarray.conjugate, ndarray.diagonal, ndarray.flatten + numpy.concatenate, numpy.column_stack, numpy.hstack, - numpy.vstack, numpy.dstack + numpy.vstack, numpy.dstack, numpy.squeeze What doesn't work with NA: * Fancy indexing, such as with lists and partial boolean masks. @@ -50,6 +50,19 @@ What doesn't work with NA: numpy.append, numpy.insert (relies on fancy indexing), numpy.where, +Differences with R: + * R's parameter rm.na=T is spelled skipna=True in NumPy. + * np.isna(nan) is False, but R's is.na(nan) is TRUE. This is because + NumPy's NA is treated independently of the underlying data type. + * Boolean indexing, where the result is compressed to just + the elements with true in the mask, raises if the booelan mask + has an NA value in it. This is because that value could be either + True or False, meaning the count of the output array is actually + NA. R treats this case in a manner inconsistent with the NA model, + returning NA values in the spots where the boolean index has NA. + This may have a practical advantage in spite of violating the + NA theoretical model, so NumPy could adopt the behavior if necessary + Custom formatter for printing arrays @@ -76,6 +89,11 @@ and depended in an undesirable way on the particular axis chosen for concatenation. A bug was also fixed which silently allowed out of bounds axis arguments. +The ufuncs logical_or, logical_and, and logical_not now follow Python's +behavior with object arrays, instead of trying to call methods on the +objects. For example the expression (3 and 'test') produces the string +'test', and now np.logical_and(np.array(3, 'O'), np.array('test', 'O')) +produces 'test' as well. Deprecations ============ diff --git a/numpy/core/SConscript b/numpy/core/SConscript index 1a0171224..2afe02477 100644 --- a/numpy/core/SConscript +++ b/numpy/core/SConscript @@ -468,6 +468,7 @@ if ENABLE_SEPARATE_COMPILATION: pjoin('src', 'multiarray', 'item_selection.c'), pjoin('src', 'multiarray', 'calculation.c'), pjoin('src', 'multiarray', 'common.c'), + pjoin('src', 'multiarray', 'reduction.c'), pjoin('src', 'multiarray', 'refcount.c'), pjoin('src', 'multiarray', 'conversion_utils.c'), pjoin('src', 'multiarray', 'usertypes.c'), diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index b3b56fa47..fdf043ddb 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -21,43 +21,44 @@ from os.path import join __docformat__ = 'restructuredtext' # The files under src/ that are scanned for API functions -API_FILES = [join('multiarray', 'methods.c'), - join('multiarray', 'arrayobject.c'), +API_FILES = [join('multiarray', 'array_assign_array.c'), join('multiarray', 'array_assign_scalar.c'), - join('multiarray', 'array_assign_array.c'), - join('multiarray', 'flagsobject.c'), - join('multiarray', 'descriptor.c'), - join('multiarray', 'iterators.c'), - join('multiarray', 'getset.c'), - join('multiarray', 'number.c'), - join('multiarray', 'sequence.c'), - join('multiarray', 'ctors.c'), - join('multiarray', 'convert.c'), - join('multiarray', 'shape.c'), - join('multiarray', 'item_selection.c'), - join('multiarray', 'convert_datatype.c'), + join('multiarray', 'arrayobject.c'), join('multiarray', 'arraytypes.c.src'), - join('multiarray', 'multiarraymodule.c'), - join('multiarray', 'scalartypes.c.src'), - join('multiarray', 'scalarapi.c'), + join('multiarray', 'buffer.c'), join('multiarray', 'calculation.c'), - join('multiarray', 'usertypes.c'), - join('multiarray', 'refcount.c'), join('multiarray', 'conversion_utils.c'), - join('multiarray', 'buffer.c'), + join('multiarray', 'convert.c'), + join('multiarray', 'convert_datatype.c'), + join('multiarray', 'ctors.c'), join('multiarray', 'datetime.c'), - join('multiarray', 'datetime_strings.c'), join('multiarray', 'datetime_busday.c'), join('multiarray', 'datetime_busdaycal.c'), + join('multiarray', 'datetime_strings.c'), + join('multiarray', 'descriptor.c'), + join('multiarray', 'einsum.c.src'), + join('multiarray', 'flagsobject.c'), + join('multiarray', 'getset.c'), + join('multiarray', 'item_selection.c'), + join('multiarray', 'iterators.c'), + join('multiarray', 'methods.c'), + join('multiarray', 'multiarraymodule.c'), + join('multiarray', 'na_mask.c'), join('multiarray', 'nditer_api.c'), join('multiarray', 'nditer_constr.c'), join('multiarray', 'nditer_pywrap.c'), join('multiarray', 'nditer_templ.c.src'), - join('multiarray', 'einsum.c.src'), - join('multiarray', 'na_mask.c'), + join('multiarray', 'number.c'), + join('multiarray', 'reduction.c'), + join('multiarray', 'refcount.c'), + join('multiarray', 'scalartypes.c.src'), + join('multiarray', 'scalarapi.c'), + join('multiarray', 'sequence.c'), + join('multiarray', 'shape.c'), + join('multiarray', 'usertypes.c'), + join('umath', 'loops.c.src'), join('umath', 'ufunc_object.c'), join('umath', 'ufunc_type_resolution.c'), - join('umath', 'loops.c.src'), ] THIS_DIR = os.path.dirname(__file__) API_FILES = [os.path.join(THIS_DIR, '..', 'src', a) for a in API_FILES] diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index dc73df1b3..f963fa783 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -332,6 +332,9 @@ multiarray_funcs_api = { 'PyArray_AssignMaskNA': 293, 'PyArray_AssignRawScalar': 294, 'PyArray_AssignArray': 295, + 'PyArray_CreateReduceResult': 296, + 'PyArray_InitializeReduceResult': 297, + 'PyArray_RemoveAxesInPlace': 298 } ufunc_types_api = { diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 8ca28bfde..f11fcfedb 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -719,6 +719,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'numpymemoryview.h'), join('src', 'multiarray', 'number.h'), join('src', 'multiarray', 'numpyos.h'), + join('src', 'multiarray', 'reduction.h'), join('src', 'multiarray', 'refcount.h'), join('src', 'multiarray', 'scalartypes.h'), join('src', 'multiarray', 'sequence.h'), @@ -786,6 +787,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'number.c'), join('src', 'multiarray', 'numpymemoryview.c'), join('src', 'multiarray', 'numpyos.c'), + join('src', 'multiarray', 'reduction.c'), join('src', 'multiarray', 'refcount.c'), join('src', 'multiarray', 'sequence.c'), join('src', 'multiarray', 'shape.c'), diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 894e3fd22..186da0b9c 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -3942,3 +3942,26 @@ _array_fill_strides(npy_intp *strides, npy_intp *dims, int nd, size_t itemsize, } return itemsize; } + +/* + * Calls arr_of_subclass.__array_wrap__(towrap), in order to make 'towrap' + * have the same ndarray subclass as 'arr_of_subclass'. + */ +NPY_NO_EXPORT PyArrayObject * +PyArray_SubclassWrap(PyArrayObject *arr_of_subclass, PyArrayObject *towrap) +{ + PyObject *wrapped = PyObject_CallMethod((PyObject *)arr_of_subclass, + "__array_wrap__", "O", towrap); + if (wrapped == NULL) { + return NULL; + } + if (!PyArray_Check(wrapped)) { + PyErr_SetString(PyExc_RuntimeError, + "ndarray subclass __array_wrap__ method returned an " + "object which was not an instance of an ndarray subclass"); + Py_DECREF(wrapped); + return NULL; + } + + return (PyArrayObject *)wrapped; +} diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h index d0a1ba687..e12d153ca 100644 --- a/numpy/core/src/multiarray/ctors.h +++ b/numpy/core/src/multiarray/ctors.h @@ -92,4 +92,12 @@ PyArray_GetArrayParamsFromObjectEx(PyObject *op, NPY_NO_EXPORT int _arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2); +/* + * Calls arr_of_subclass.__array_wrap__(towrap), in order to make 'towrap' + * have the same ndarray subclass as 'arr_of_subclass'. + */ +NPY_NO_EXPORT PyArrayObject * +PyArray_SubclassWrap(PyArrayObject *arr_of_subclass, PyArrayObject *towrap); + + #endif diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c new file mode 100644 index 000000000..09070cb72 --- /dev/null +++ b/numpy/core/src/multiarray/reduction.c @@ -0,0 +1,402 @@ +/* + * This file implements generic methods for computing reductions on arrays. + * + * Written by Mark Wiebe (mwwiebe@gmail.com) + * Copyright (c) 2011 by Enthought, Inc. + * + * See LICENSE.txt for the license. + */ + +#define PY_SSIZE_T_CLEAN +#include <Python.h> + +#define NPY_NO_DEPRECATED_API +#define _MULTIARRAYMODULE +#include <numpy/arrayobject.h> + +#include "npy_config.h" +#include "numpy/npy_3kcompat.h" + +#include "reduction.h" + +/* + * Allocates a result array for a reduction operation, with + * dimensions matching 'arr' except set to 1 with 0 stride + * whereever axis_flags is True. Dropping the reduction axes + * from the result must be done later by the caller once the + * computation is complete. + * + * This function never adds an NA mask to the allocated + * result, that is the responsibility of the caller. It also + * always allocates a base class ndarray. + * + * If 'dtype' isn't NULL, this function steals its reference. + */ +static PyArrayObject * +allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags, + PyArray_Descr *dtype) +{ + npy_intp strides[NPY_MAXDIMS], stride; + npy_intp shape[NPY_MAXDIMS], *arr_shape = PyArray_DIMS(arr); + npy_stride_sort_item strideperm[NPY_MAXDIMS]; + int idim, ndim = PyArray_NDIM(arr); + + if (dtype == NULL) { + dtype = PyArray_DTYPE(arr); + Py_INCREF(dtype); + } + + PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), PyArray_SHAPE(arr), + PyArray_STRIDES(arr), strideperm); + + /* Build the new strides and shape */ + stride = dtype->elsize; + memcpy(shape, arr_shape, ndim * sizeof(shape[0])); + for (idim = ndim-1; idim >= 0; --idim) { + npy_intp i_perm = strideperm[idim].perm; + if (axis_flags[i_perm]) { + strides[i_perm] = 0; + shape[i_perm] = 1; + } + else { + strides[i_perm] = stride; + stride *= shape[i_perm]; + } + } + + /* Finally, allocate the array */ + return (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, + ndim, shape, strides, + NULL, 0, NULL); +} + +/* + * Conforms an output parameter 'out' to have 'ndim' dimensions + * with dimensions of size one added in the appropriate places + * indicated by 'axis_flags'. + * + * The return value is a view into 'out'. + */ +static PyArrayObject * +conform_reduce_result(int ndim, npy_bool *axis_flags, + PyArrayObject *out, const char *funcname) +{ + npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS]; + npy_intp *strides_out = PyArray_STRIDES(out); + npy_intp *shape_out = PyArray_DIMS(out); + int idim, idim_out, ndim_out = PyArray_NDIM(out); + PyArray_Descr *dtype; + PyArrayObject_fieldaccess *ret; + + /* Construct the strides and shape */ + idim_out = 0; + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + strides[idim] = 0; + shape[idim] = 1; + } + else { + if (idim_out >= ndim_out) { + PyErr_Format(PyExc_ValueError, + "output parameter for reduction operation %s " + "does not have enough dimensions", funcname); + return NULL; + } + strides[idim] = strides_out[idim_out]; + shape[idim] = shape_out[idim_out]; + ++idim_out; + } + } + + if (idim_out != ndim_out) { + PyErr_Format(PyExc_ValueError, + "output parameter for reduction operation %s " + "has too many dimensions", funcname); + return NULL; + } + + /* Allocate the view */ + dtype = PyArray_DESCR(out); + Py_INCREF(dtype); + ret = (PyArrayObject_fieldaccess *)PyArray_NewFromDescr(&PyArray_Type, + dtype, + ndim, shape, + strides, + PyArray_DATA(out), + PyArray_FLAGS(out) & ~(NPY_ARRAY_MASKNA|NPY_ARRAY_OWNMASKNA), + NULL); + if (ret == NULL) { + return NULL; + } + Py_INCREF(out); + if (PyArray_SetBaseObject((PyArrayObject *)ret, (PyObject *)out) < 0) { + Py_DECREF(ret); + return NULL; + } + + /* Take a view of the mask if it exists */ + if (PyArray_HASMASKNA(out)) { + npy_intp *strides_ret = ret->maskna_strides; + strides_out = PyArray_MASKNA_STRIDES(out); + idim_out = 0; + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + strides_ret[idim] = 0; + } + else { + strides_ret[idim] = strides_out[idim_out]; + ++idim_out; + } + } + + ret->maskna_dtype = PyArray_MASKNA_DTYPE(out); + Py_INCREF(ret->maskna_dtype); + ret->maskna_data = PyArray_MASKNA_DATA(out); + ret->flags |= NPY_ARRAY_MASKNA; + } + + return (PyArrayObject *)ret; +} + +/*NUMPY_API + * + * Creates a result for reducing 'operand' along the axes specified + * in 'axis_flags'. If 'dtype' isn't NULL, this function steals a + * reference to 'dtype'. + * + * If 'out' isn't NULL, this function creates a view conforming + * to the number of dimensions of 'operand', adding a singleton dimension + * for each reduction axis specified. In this case, 'dtype' is ignored + * (but its reference is still stolen), and the caller must handle any + * type conversion/validity check for 'out'. When 'need_namask' is true, + * raises an exception if 'out' doesn't have an NA mask. + * + * If 'out' is NULL, it allocates a new array whose shape matches + * that of 'operand', except for at the reduction axes. An NA mask + * is added if 'need_namask' is true. If 'dtype' is NULL, the dtype + * of 'operand' is used for the result. + */ +NPY_NO_EXPORT PyArrayObject * +PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out, + PyArray_Descr *dtype, npy_bool *axis_flags, + int need_namask, const char *funcname) +{ + PyArrayObject *result; + + if (out == NULL) { + /* This function steals the reference to 'dtype' */ + result = allocate_reduce_result(operand, axis_flags, dtype); + + /* Allocate an NA mask if necessary */ + if (need_namask && result != NULL) { + if (PyArray_AllocateMaskNA(result, 1, 0, 1) < 0) { + Py_DECREF(result); + return NULL; + } + } + } + else { + /* Steal the dtype reference */ + Py_XDECREF(dtype); + + if (need_namask && !PyArray_HASMASKNA(out)) { + PyErr_Format(PyExc_ValueError, + "output parameter for reduction operation %s " + "needs an NA mask, but the array provided does " + "not have one", funcname); + return NULL; + } + + result = conform_reduce_result(PyArray_NDIM(operand), axis_flags, + out, funcname); + } + + return result; +} + +/*NUMPY_API + * + * This function initializes a result array for a reduction operation + * which has no identity. This means it needs to copy the first element + * it sees along the reduction axes to result, then return a view of + * the operand which excludes that element. + * + * If a reduction has an identity, such as 0 or 1, the result should + * be initialized by calling PyArray_AssignZero(result, NULL, !skipna, NULL) + * or PyArray_AssignOne(result, NULL, !skipna, NULL), because this + * function raises an exception when there are no elements to reduce. + * + * For regular reduction, this means it copies the subarray indexed + * at zero along each reduction axis into 'result', then returns a view + * into 'operand' excluding those copied elements. If 'operand' has + * an NA mask in this case, the caller should have already done + * the reduction on the mask. This function copies the subarray with + * 'replacena' set to True, so that the already accumulated NA mask + * in result doesn't get overwritten. + * + * For 'skipna' reduction, this is more complicated. In the one dimensional + * case, it searches for the first non-NA element, copies that element + * to 'result', then returns a view into the rest of 'operand'. For + * multi-dimensional reductions, the initial elements may be scattered + * throughout the array. + * + * To deal with this, a view of 'operand' is taken, and given its own + * copy of the NA mask. Additionally, an array of flags is created, + * matching the shape of 'result', and initialized to all False. + * Then, the elements of the 'operand' view are gone through, and any time + * an exposed element is encounted which isn't already flagged in the + * auxiliary array, it is copied into 'result' and flagged as copied. + * The element is masked as an NA in the view of 'operand', so that the + * later reduction step will skip it during processing. + * + * result : The array into which the result is computed. This must have + * the same number of dimensions as 'operand', but for each + * axis i where 'axis_flags[i]' is True, it has a single element. + * operand : The array being reduced. + * axis_flags : An array of boolean flags, one for each axis of 'operand'. + * When a flag is True, it indicates to reduce along that axis. + * skipna : If True, indicates that the reduction is being calculated + * as if the NA values are being dropped from the computation + * instead of accumulating into an NA result. + * funcname : The name of the reduction operation, for the purpose of + * better quality error messages. For example, "numpy.max" + * would be a good name for NumPy's max function. + * + * Returns a view which contains the remaining elements on which to do + * the reduction. + */ +NPY_NO_EXPORT PyArrayObject * +PyArray_InitializeReduceResult( + PyArrayObject *result, PyArrayObject *operand, + npy_bool *axis_flags, int skipna, const char *funcname) +{ + npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS], shape0; + PyArrayObject *op_view = NULL; + int idim, ndim; + + ndim = PyArray_NDIM(operand); + + /* + * If 'skipna' is False, or 'operand' has no NA mask in which + * case the 'skipna' flag does nothing. + */ + if (!skipna || !PyArray_HASMASKNA(operand)) { + if (PyArray_SIZE(operand) == 0) { + PyErr_Format(PyExc_ValueError, + "zero-size array to reduction operation %s " + "which has no identity", + funcname); + return NULL; + } + + /* Take a view into 'operand' which we can modify. */ + op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type); + if (op_view == NULL) { + return NULL; + } + } + /* + * Otherwise 'skipna' is True and 'operand' has an NA mask. Deal + * with the simple one-dimensional case first + */ + else if (ndim == 1) { + char *data, *maskna_data; + npy_intp *maskna_strides; + + ndim = PyArray_NDIM(operand); + + op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type); + if (op_view == NULL) { + return NULL; + } + + shape = PyArray_DIMS(op_view); + shape0 = shape[0]; + data = PyArray_DATA(op_view); + strides = PyArray_STRIDES(op_view); + maskna_data = PyArray_MASKNA_DATA(op_view); + maskna_strides = PyArray_MASKNA_STRIDES(op_view); + + /* Shrink the array from the start until we find an exposed element */ + while (shape0 > 0 && + !NpyMaskValue_IsExposed((npy_mask)*maskna_data)) { + --shape0; + data += strides[0]; + maskna_data += maskna_strides[0]; + } + + if (shape0 == 0) { + Py_DECREF(op_view); + PyErr_Format(PyExc_ValueError, + "fully NA array with skipna=True to reduction operation " + "%s which has no identity", funcname); + return NULL; + } + + /* + * With the first element exposed, fall through to the code + * which copies the element and adjusts the view just as in the + * non-skipna case. + */ + shape[0] = shape0; + ((PyArrayObject_fieldaccess *)op_view)->data = data; + ((PyArrayObject_fieldaccess *)op_view)->maskna_data = maskna_data; + } + /* + * Here 'skipna' is True and 'operand' has an NA mask, but + * 'operand' has more than one dimension, so it's the complicated + * case + */ + else { + PyErr_SetString(PyExc_ValueError, + "skipna=True with a non-identity reduction " + "and an array with ndim > 1 isn't implemented yet"); + return NULL; + } + + /* + * Now copy the subarray of the first element along each reduction axis, + * then return a view to the rest. + * + * Adjust the shape to only look at the first element along + * any of the reduction axes. + */ + shape = PyArray_SHAPE(op_view); + memcpy(shape_orig, shape, ndim * sizeof(npy_intp)); + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + shape[idim] = 1; + } + } + + /* + * Copy the elements into the result to start, with + * 'preservena' set to True so that we don't overwrite + * what we already calculated in ReduceNAMask. + */ + if (PyArray_AssignArray(result, op_view, NULL, NPY_UNSAFE_CASTING, + 1, NULL) < 0) { + Py_DECREF(op_view); + return NULL; + } + + /* Adjust the shape to only look at the remaining elements */ + strides = PyArray_STRIDES(op_view); + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + shape[idim] = shape_orig[idim] - 1; + ((PyArrayObject_fieldaccess *)op_view)->data += strides[idim]; + } + } + if (PyArray_HASMASKNA(op_view)) { + strides = PyArray_MASKNA_STRIDES(op_view); + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + ((PyArrayObject_fieldaccess *)op_view)->maskna_data += + strides[idim]; + } + } + } + + return op_view; +} diff --git a/numpy/core/src/multiarray/reduction.h b/numpy/core/src/multiarray/reduction.h new file mode 100644 index 000000000..f19909dbc --- /dev/null +++ b/numpy/core/src/multiarray/reduction.h @@ -0,0 +1,4 @@ +#ifndef _NPY_PRIVATE__REDUCTION_H_ +#define _NPY_PRIVATE__REDUCTION_H_ + +#endif diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index b49b4d988..496d32955 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -663,45 +663,48 @@ _fix_unknown_dimension(PyArray_Dims *newshape, npy_intp s_original) NPY_NO_EXPORT PyObject * PyArray_Squeeze(PyArrayObject *self) { - int nd = PyArray_NDIM(self); - int newnd = nd; - npy_intp dimensions[NPY_MAXDIMS]; - npy_intp strides[NPY_MAXDIMS]; - int i, j; PyArrayObject *ret; - PyArray_Descr *dtype; - - if (nd == 0) { - Py_INCREF(self); - return (PyObject *)self; - } - for (j = 0, i = 0; i < nd; i++) { - if (PyArray_DIMS(self)[i] == 1) { - newnd -= 1; + npy_bool unit_dims[NPY_MAXDIMS]; + int idim, ndim, any_ones; + npy_intp *shape; + + ndim = PyArray_NDIM(self); + shape = PyArray_SHAPE(self); + + any_ones = 0; + for (idim = 0; idim < ndim; ++idim) { + if (shape[idim] == 1) { + unit_dims[idim] = 1; + any_ones = 1; } else { - dimensions[j] = PyArray_DIMS(self)[i]; - strides[j++] = PyArray_STRIDES(self)[i]; + unit_dims[idim] = 0; } } - dtype = PyArray_DESCR(self); - Py_INCREF(dtype); - ret = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(self), - dtype, - newnd, dimensions, - strides, PyArray_DATA(self), - PyArray_FLAGS(self), - (PyObject *)self); + /* If there were no ones to squeeze out, return the same array */ + if (!any_ones) { + Py_INCREF(self); + return (PyObject *)self; + } + + ret = (PyArrayObject *)PyArray_View(self, NULL, &PyArray_Type); if (ret == NULL) { return NULL; } - PyArray_CLEARFLAGS(ret, NPY_ARRAY_OWNDATA); - Py_INCREF(self); - if (PyArray_SetBaseObject(ret, (PyObject *)self) < 0) { + + PyArray_RemoveAxesInPlace(ret, unit_dims); + + /* + * If self isn't not a base class ndarray, call its + * __array_wrap__ method + */ + if (Py_TYPE(self) != &PyArray_Type) { + PyArrayObject *tmp = PyArray_SubclassWrap(self, ret); Py_DECREF(ret); - return NULL; + ret = tmp; } + return (PyObject *)ret; } @@ -1185,3 +1188,45 @@ build_shape_string(npy_intp n, npy_intp *vals) PyUString_ConcatAndDel(&ret, tmp); return ret; } + +/*NUMPY_API + * + * Removes the axes flagged as True from the array, + * modifying it in place. If an axis flagged for removal + * has a shape entry bigger than one, this effectively selects + * index zero for that axis. + * + * For example, this can be used to remove the reduction axes + * from a reduction result once its computation is complete. + */ +NPY_NO_EXPORT void +PyArray_RemoveAxesInPlace(PyArrayObject *arr, npy_bool *flags) +{ + PyArrayObject_fieldaccess *fa = (PyArrayObject_fieldaccess *)arr; + npy_intp *shape = fa->dimensions, *strides = fa->strides; + int idim, ndim = fa->nd, idim_out = 0; + + /* Compress the dimensions and strides */ + for (idim = 0; idim < ndim; ++idim) { + if (!flags[idim]) { + shape[idim_out] = shape[idim]; + strides[idim_out] = strides[idim]; + ++idim_out; + } + } + + /* Compress the mask strides if the result has an NA mask */ + if (PyArray_HASMASKNA(arr)) { + strides = fa->maskna_strides; + idim_out = 0; + for (idim = 0; idim < ndim; ++idim) { + if (!flags[idim]) { + strides[idim_out] = strides[idim]; + ++idim_out; + } + } + } + + /* The final number of dimensions */ + fa->nd = idim_out; +} diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index e4d5edca3..a4f9bb105 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2560,171 +2560,6 @@ get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr, } /* - * Allocates a result array for a reduction operation, with - * dimensions matching 'arr' except set to 1 with 0 stride - * whereever axis_flags is True. Dropping the reduction axes - * from the result must be done later by the caller once the - * computation is complete. - * - * This function never adds an NA mask to the allocated - * result, that is the responsibility of the caller. It also - * always allocates a base class ndarray. - * - * If 'dtype' isn't NULL, this function steals its reference. - */ -static PyArrayObject * -allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags, - PyArray_Descr *dtype) -{ - npy_intp strides[NPY_MAXDIMS], stride; - npy_intp shape[NPY_MAXDIMS], *arr_shape = PyArray_DIMS(arr); - npy_stride_sort_item strideperm[NPY_MAXDIMS]; - int idim, ndim = PyArray_NDIM(arr); - - if (dtype == NULL) { - dtype = PyArray_DTYPE(arr); - Py_INCREF(dtype); - } - - PyArray_CreateSortedStridePerm(PyArray_NDIM(arr), PyArray_SHAPE(arr), - PyArray_STRIDES(arr), strideperm); - - /* Build the new strides and shape */ - stride = dtype->elsize; - memcpy(shape, arr_shape, ndim * sizeof(shape[0])); - for (idim = ndim-1; idim >= 0; --idim) { - npy_intp i_perm = strideperm[idim].perm; - if (axis_flags[i_perm]) { - strides[i_perm] = 0; - shape[i_perm] = 1; - } - else { - strides[i_perm] = stride; - stride *= shape[i_perm]; - } - } - - /* Finally, allocate the array */ - return (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, - ndim, shape, strides, - NULL, 0, NULL); -} - -/* - * Conforms an output parameter 'out' to have 'ndim' dimensions - * with dimensions of size one added in the appropriate places - * indicated by 'axis_flags'. - * - * The return value is a view into 'out'. - */ -static PyArrayObject * -conform_reduce_result(int ndim, npy_bool *axis_flags, PyArrayObject *out) -{ - npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS]; - npy_intp *strides_out = PyArray_STRIDES(out); - npy_intp *shape_out = PyArray_DIMS(out); - int idim, idim_out, ndim_out = PyArray_NDIM(out); - PyArray_Descr *dtype; - PyArrayObject_fieldaccess *ret; - - /* Construct the strides and shape */ - idim_out = 0; - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - strides[idim] = 0; - shape[idim] = 1; - } - else { - if (idim_out >= ndim_out) { - PyErr_SetString(PyExc_ValueError, - "output parameter for reduce does not have " - "enough dimensions"); - return NULL; - } - strides[idim] = strides_out[idim_out]; - shape[idim] = shape_out[idim_out]; - ++idim_out; - } - } - - if (idim_out != ndim_out) { - PyErr_SetString(PyExc_ValueError, - "output parameter for reduce has too many " - "dimensions"); - return NULL; - } - - /* Allocate the view */ - dtype = PyArray_DESCR(out); - Py_INCREF(dtype); - ret = (PyArrayObject_fieldaccess *)PyArray_NewFromDescr(&PyArray_Type, - dtype, - ndim, shape, - strides, - PyArray_DATA(out), - PyArray_FLAGS(out) & ~(NPY_ARRAY_MASKNA|NPY_ARRAY_OWNMASKNA), - NULL); - if (ret == NULL) { - return NULL; - } - Py_INCREF(out); - if (PyArray_SetBaseObject((PyArrayObject *)ret, (PyObject *)out) < 0) { - Py_DECREF(ret); - return NULL; - } - - /* Take a view of the mask if it exists */ - if (PyArray_HASMASKNA(out)) { - npy_intp *strides_ret = ret->maskna_strides; - strides_out = PyArray_MASKNA_STRIDES(out); - idim_out = 0; - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - strides_ret[idim] = 0; - } - else { - strides_ret[idim] = strides_out[idim_out]; - ++idim_out; - } - } - - ret->maskna_dtype = PyArray_MASKNA_DTYPE(out); - Py_INCREF(ret->maskna_dtype); - ret->maskna_data = PyArray_MASKNA_DATA(out); - ret->flags |= NPY_ARRAY_MASKNA; - } - - return (PyArrayObject *)ret; -} - -/* Allocate 'result' or conform 'out' to 'self' (in 'result') */ -static PyArrayObject * -allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out, - npy_bool *axis_flags, PyArray_Descr *otype_dtype, - int addmask) -{ - PyArrayObject *result; - - if (out == NULL) { - Py_XINCREF(otype_dtype); - result = allocate_reduce_result(arr, axis_flags, otype_dtype); - - /* Allocate an NA mask if necessary */ - if (addmask && result != NULL) { - if (PyArray_AllocateMaskNA(result, 1, 0, 1) < 0) { - Py_DECREF(result); - return NULL; - } - } - } - else { - result = conform_reduce_result(PyArray_NDIM(arr), axis_flags, out); - } - - return result; -} - -/* * Either: * 1) Fills 'result' with the identity, and returns a reference to 'arr'. * 2) Copies the first values along each reduction axis into 'result', @@ -2735,10 +2570,6 @@ initialize_reduce_result(int identity, PyArrayObject *result, npy_bool *axis_flags, PyArrayObject *arr, int skipna, char *ufunc_name) { - npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS], shape0; - PyArrayObject *arr_view; - int idim, ndim; - if (identity == PyUFunc_One) { if (PyArray_AssignOne(result, NULL, !skipna, NULL) < 0) { return NULL; @@ -2753,165 +2584,10 @@ initialize_reduce_result(int identity, PyArrayObject *result, Py_INCREF(arr); return arr; } - /* - * With skipna=True and where 'arr' has an NA mask, - * need to do some additional fiddling if there's no unit. - */ - else if (skipna && PyArray_HASMASKNA(arr)) { - char *data, *maskna_data; - npy_intp *maskna_strides; - - ndim = PyArray_NDIM(arr); - - /* - * Currently only supporting one dimension in this case. - */ - if (ndim != 1) { - PyErr_SetString(PyExc_ValueError, - "skipna=True with a non-identity reduction " - "and an array with ndim > 1 isn't implemented yet"); - return NULL; - } - - arr_view = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type); - if (arr_view == NULL) { - return NULL; - } - - shape = PyArray_DIMS(arr_view); - shape0 = shape[0]; - data = PyArray_DATA(arr_view); - strides = PyArray_STRIDES(arr_view); - maskna_data = PyArray_MASKNA_DATA(arr_view); - maskna_strides = PyArray_MASKNA_STRIDES(arr_view); - - /* Shrink the array from the start until we find an exposed element */ - while (shape0 > 0 && - !NpyMaskValue_IsExposed((npy_mask)*maskna_data)) { - --shape0; - data += strides[0]; - maskna_data += maskna_strides[0]; - } - - if (shape0 == 0) { - Py_DECREF(arr_view); - PyErr_Format(PyExc_ValueError, - "fully NA array with skipna=True to " - "%s.reduce which has no identity", ufunc_name); - return NULL; - } - - /* With the first element exposed, fall through to the other code */ - shape[0] = shape0; - ((PyArrayObject_fieldaccess *)arr_view)->data = data; - ((PyArrayObject_fieldaccess *)arr_view)->maskna_data = maskna_data; - } - /* - * If there is no identity, copy the first element along the - * reduction dimensions. - */ else { - ndim = PyArray_NDIM(arr); - - if (PyArray_SIZE(arr) == 0) { - PyErr_Format(PyExc_ValueError, - "zero-size array to %s.reduce which has no identity", - ufunc_name); - return NULL; - } - - /* - * TODO: Should the ufunc tell us whether it's commutative - * and/or associative, and this operation can be reordered? - * When reducing more than one axis at a time, this is - * important. - * - * Take a view into 'arr' which we can modify. - */ - arr_view = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type); - if (arr_view == NULL) { - return NULL; - } - } - - /* - * Adjust the shape to only look at the first element along - * any of the reduction axes. - */ - shape = PyArray_DIMS(arr_view); - memcpy(shape_orig, shape, ndim * sizeof(npy_intp)); - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - shape[idim] = 1; - } - } - - /* - * Copy the elements into the result to start, with - * 'preservena' set to True so that we don't overwrite - * what we already calculated in ReduceNAMask. - */ - if (PyArray_AssignArray(result, arr_view, NULL, NPY_UNSAFE_CASTING, - 1, NULL) < 0) { - Py_DECREF(arr_view); - return NULL; + return PyArray_InitializeReduceResult( + result, arr, axis_flags, skipna, ufunc_name); } - - /* Adjust the shape to only look at the remaining elements */ - strides = PyArray_STRIDES(arr_view); - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - shape[idim] = shape_orig[idim] - 1; - ((PyArrayObject_fieldaccess *)arr_view)->data += strides[idim]; - } - } - if (PyArray_HASMASKNA(arr_view)) { - strides = PyArray_MASKNA_STRIDES(arr_view); - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - ((PyArrayObject_fieldaccess *)arr_view)->maskna_data += - strides[idim]; - } - } - } - - return arr_view; -} - -/* - * Removes the dimensions flagged as True from the array, - * modifying it in place. - */ -static void -strip_flagged_dimensions(PyArrayObject *arr, npy_bool *flags) -{ - PyArrayObject_fieldaccess *fa = (PyArrayObject_fieldaccess *)arr; - npy_intp *shape = fa->dimensions, *strides = fa->strides; - int idim, ndim = fa->nd, idim_out = 0; - - /* Compress the dimensions and strides */ - for (idim = 0; idim < ndim; ++idim) { - if (!flags[idim]) { - shape[idim_out] = shape[idim]; - strides[idim_out] = strides[idim]; - ++idim_out; - } - } - - /* Compress the mask strides if the result has an NA mask */ - if (PyArray_HASMASKNA(arr)) { - strides = fa->maskna_strides; - idim_out = 0; - for (idim = 0; idim < ndim; ++idim) { - if (!flags[idim]) { - strides[idim_out] = strides[idim]; - ++idim_out; - } - } - } - - /* The final number of dimensions */ - fa->nd = idim_out; } /* @@ -3045,8 +2721,10 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out, } /* Allocate an output or conform 'out' to 'self' */ - result = allocate_or_conform_reduce_result(arr, out, - axis_flags, otype_dtype, !skipna && use_maskna); + Py_XINCREF(otype_dtype); + result = PyArray_CreateReduceResult(arr, out, + otype_dtype, axis_flags, !skipna && use_maskna, + ufunc_name); if (result == NULL) { return NULL; } @@ -3256,7 +2934,7 @@ finish: /* Strip out the extra 'one' dimensions in the result */ if (out == NULL) { - strip_flagged_dimensions(result, axis_flags); + PyArray_RemoveAxesInPlace(result, axis_flags); } else { Py_DECREF(result); diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index ff1db7603..b839b661e 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -149,8 +149,6 @@ def test_array_maskna_item(): a.itemset((1,1), np.NA) assert_(np.isna(a[1,1])) - - def test_array_maskna_payload(): # Single numbered index a = np.zeros((2,), maskna=True) @@ -962,6 +960,7 @@ def test_array_maskna_concatenate(): assert_equal(res.strides, (4, 16)) def test_array_maskna_column_stack(): + # np.column_stack a = np.array((1,2,3), maskna=True) b = np.array((2,3,4), maskna=True) b[2] = np.NA @@ -978,6 +977,13 @@ def test_array_maskna_compress(): res = a.compress(mask) assert_equal(res, [1,2,3,4]) +def test_array_maskna_squeeze(): + # np.squeeze + a = np.zeros((1,3,1,1,4,2,1), maskna=True) + a[0,1,0,0,3,0,0] = np.NA + res = np.squeeze(a) + assert_equal(res.shape, (3,4,2)) + assert_(np.isna(res[1,3,0])) if __name__ == "__main__": run_module_suite() |