summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErik M. Bray <embray@stsci.edu>2015-10-08 17:33:02 -0400
committerErik M. Bray <erik.bray@lri.fr>2016-05-31 16:30:48 +0200
commit0bf907ac5ed0d3c3917cd55d0511633ea8feb8f3 (patch)
treeab094fcb6738f818bb3f5bfdd573d13f02a59f35
parente6593fbe72d28929345370696ea292a394795089 (diff)
downloadnumpy-0bf907ac5ed0d3c3917cd55d0511633ea8feb8f3.tar.gz
BUG: Fix numerous bugs related to zero-width string arrays (#473, #1901, #2196, #2585, #4955)
PyArray_NewFromDescr normally does not allow zero-width string dtypes (or rather, it automatically converts them to 1-width strings). This affects any code that uses PyArray_NewFromDescr, which is a lot. So we extend PyArray_NewFromDescr_int to allow disabling this functionality in a couple narrow cases where it's appropriate--one is when extracting a field from a structured array that has a zero-width string dtype (which, intentionally or not, has been allowed). The other, which is related, is returning a view of an array that has a zero-width string dtype. This shouldn't otherwise break or change any existing behavior. Remove roadblocks to creating structured dtypes with zero-width fields using dict-based constructors (this was possible by other means such as the list-based constructor--with the previous fix in particular it should not be necessary to block this anymore). Adds tests based on the tests cases given in the issues this is fixing. Fix a bug with array to zero-width array assignment revealed by the tests. I am slightly concerned that the blunt-force check for this in raw_array_assign_array may be masking a bug somewhere else though.
-rw-r--r--numpy/core/_internal.py2
-rw-r--r--numpy/core/records.py2
-rw-r--r--numpy/core/src/multiarray/convert.c5
-rw-r--r--numpy/core/src/multiarray/ctors.c33
-rw-r--r--numpy/core/src/multiarray/ctors.h6
-rw-r--r--numpy/core/src/multiarray/descriptor.c2
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c13
-rw-r--r--numpy/core/src/multiarray/mapping.c9
-rw-r--r--numpy/core/src/multiarray/methods.c14
-rw-r--r--numpy/core/tests/test_dtype.py10
-rw-r--r--numpy/core/tests/test_multiarray.py24
-rw-r--r--numpy/core/tests/test_records.py14
12 files changed, 96 insertions, 38 deletions
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 47c933411..5ad440fa4 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -33,8 +33,6 @@ def _makenames_list(adict, align):
if (num < 0):
raise ValueError("invalid offset.")
format = dtype(obj[0], align=align)
- if (format.itemsize == 0):
- raise ValueError("all itemsizes must be fixed.")
if (n > 2):
title = obj[2]
else:
diff --git a/numpy/core/records.py b/numpy/core/records.py
index 9f5dcc811..3bee394cd 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -424,7 +424,7 @@ class recarray(ndarray):
return self
def __array_finalize__(self, obj):
- if self.dtype.type is not record:
+ if self.dtype.type is not record and self.dtype.fields:
# if self.dtype is not np.record, invoke __setattr__ which will
# convert it to a record if it is a void dtype.
self.dtype = self.dtype
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index 5499160be..8d51a6dd2 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -14,6 +14,7 @@
#include "npy_pycompat.h"
#include "arrayobject.h"
+#include "ctors.h"
#include "mapping.h"
#include "lowlevel_strided_loops.h"
#include "scalartypes.h"
@@ -600,13 +601,13 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
dtype = PyArray_DESCR(self);
Py_INCREF(dtype);
- ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
+ ret = (PyArrayObject *)PyArray_NewFromDescr_int(subtype,
dtype,
PyArray_NDIM(self), PyArray_DIMS(self),
PyArray_STRIDES(self),
PyArray_DATA(self),
flags,
- (PyObject *)self);
+ (PyObject *)self, 0, 1);
if (ret == NULL) {
Py_XDECREF(type);
return NULL;
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 0017de0ad..777121bb8 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -894,10 +894,11 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
*
* steals a reference to descr (even on failure)
*/
-static PyObject *
+NPY_NO_EXPORT PyObject *
PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
npy_intp *dims, npy_intp *strides, void *data,
- int flags, PyObject *obj, int zeroed)
+ int flags, PyObject *obj, int zeroed,
+ int allow_emptystring)
{
PyArrayObject_fields *fa;
int i;
@@ -916,7 +917,8 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
newstrides, nd);
ret = PyArray_NewFromDescr_int(subtype, descr, nd, newdims,
newstrides,
- data, flags, obj, zeroed);
+ data, flags, obj, zeroed,
+ allow_emptystring);
return ret;
}
@@ -935,16 +937,17 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
PyErr_SetString(PyExc_TypeError, "Empty data-type");
Py_DECREF(descr);
return NULL;
- }
- PyArray_DESCR_REPLACE(descr);
- if (descr == NULL) {
- return NULL;
- }
- if (descr->type_num == NPY_STRING) {
- nbytes = descr->elsize = 1;
- }
- else {
- nbytes = descr->elsize = sizeof(npy_ucs4);
+ } else if (!allow_emptystring) {
+ PyArray_DESCR_REPLACE(descr);
+ if (descr == NULL) {
+ return NULL;
+ }
+ if (descr->type_num == NPY_STRING) {
+ nbytes = descr->elsize = 1;
+ }
+ else {
+ nbytes = descr->elsize = sizeof(npy_ucs4);
+ }
}
}
@@ -1134,7 +1137,7 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
{
return PyArray_NewFromDescr_int(subtype, descr, nd,
dims, strides, data,
- flags, obj, 0);
+ flags, obj, 0, 0);
}
/*NUMPY_API
@@ -2855,7 +2858,7 @@ PyArray_Zeros(int nd, npy_intp *dims, PyArray_Descr *type, int is_f_order)
type,
nd, dims,
NULL, NULL,
- is_f_order, NULL, 1);
+ is_f_order, NULL, 1, 0);
if (ret == NULL) {
return NULL;
diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h
index 783818def..e889910cb 100644
--- a/numpy/core/src/multiarray/ctors.h
+++ b/numpy/core/src/multiarray/ctors.h
@@ -6,6 +6,12 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
npy_intp *dims, npy_intp *strides, void *data,
int flags, PyObject *obj);
+NPY_NO_EXPORT PyObject *
+PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
+ npy_intp *dims, npy_intp *strides, void *data,
+ int flags, PyObject *obj, int zeroed,
+ int allow_emptystring);
+
NPY_NO_EXPORT PyObject *PyArray_New(PyTypeObject *, int nd, npy_intp *,
int, npy_intp *, void *, int, int, PyObject *);
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 7886179f1..7f8be5356 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -1138,7 +1138,7 @@ _convert_from_dict(PyObject *obj, int align)
}
}
Py_DECREF(tup);
- if ((ret == NPY_FAIL) || (newdescr->elsize == 0)) {
+ if (ret == NPY_FAIL) {
goto fail;
}
dtypeflags |= (newdescr->flags & NPY_FROM_FIELDS);
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index fd371a1f6..d7759b6cc 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -22,6 +22,7 @@
#include "npy_pycompat.h"
#include "convert_datatype.h"
+#include "ctors.h"
#include "_datetime.h"
#include "datetime_strings.h"
@@ -549,8 +550,8 @@ wrap_copy_swap_function(int aligned,
* The copyswap functions shouldn't need that.
*/
Py_INCREF(dtype);
- data->arr = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype,
- 1, &shape, NULL, NULL, 0, NULL);
+ data->arr = (PyArrayObject *)PyArray_NewFromDescr_int(&PyArray_Type, dtype,
+ 1, &shape, NULL, NULL, 0, NULL, 0, 1);
if (data->arr == NULL) {
PyArray_free(data);
return NPY_FAIL;
@@ -1405,8 +1406,8 @@ get_nbo_cast_transfer_function(int aligned,
return NPY_FAIL;
}
}
- data->aip = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, tmp_dtype,
- 1, &shape, NULL, NULL, 0, NULL);
+ data->aip = (PyArrayObject *)PyArray_NewFromDescr_int(&PyArray_Type,
+ tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, 0, 1);
if (data->aip == NULL) {
PyArray_free(data);
return NPY_FAIL;
@@ -1429,8 +1430,8 @@ get_nbo_cast_transfer_function(int aligned,
return NPY_FAIL;
}
}
- data->aop = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, tmp_dtype,
- 1, &shape, NULL, NULL, 0, NULL);
+ data->aop = (PyArrayObject *)PyArray_NewFromDescr_int(&PyArray_Type,
+ tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, 0, 1);
if (data->aop == NULL) {
Py_DECREF(data->aip);
PyArray_free(data);
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 5da76a39b..3d33f8a85 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -14,6 +14,7 @@
#include "npy_import.h"
#include "common.h"
+#include "ctors.h"
#include "iterators.h"
#include "mapping.h"
#include "lowlevel_strided_loops.h"
@@ -1291,7 +1292,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
/* view the array at the new offset+dtype */
Py_INCREF(fieldtype);
- *view = (PyArrayObject*)PyArray_NewFromDescr(
+ *view = (PyArrayObject*)PyArray_NewFromDescr_int(
Py_TYPE(arr),
fieldtype,
PyArray_NDIM(arr),
@@ -1299,7 +1300,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
PyArray_STRIDES(arr),
PyArray_BYTES(arr) + offset,
PyArray_FLAGS(arr),
- (PyObject *)arr);
+ (PyObject *)arr, 0, 1);
if (*view == NULL) {
return 0;
}
@@ -1397,7 +1398,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
view_dtype->fields = fields;
view_dtype->flags = PyArray_DESCR(arr)->flags;
- *view = (PyArrayObject*)PyArray_NewFromDescr(
+ *view = (PyArrayObject*)PyArray_NewFromDescr_int(
Py_TYPE(arr),
view_dtype,
PyArray_NDIM(arr),
@@ -1405,7 +1406,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
PyArray_STRIDES(arr),
PyArray_DATA(arr),
PyArray_FLAGS(arr),
- (PyObject *)arr);
+ (PyObject *)arr, 0, 1);
if (*view == NULL) {
return 0;
}
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index b8e066b79..634690648 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -379,13 +379,13 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset)
Py_DECREF(safe);
}
- ret = PyArray_NewFromDescr(Py_TYPE(self),
- typed,
- PyArray_NDIM(self), PyArray_DIMS(self),
- PyArray_STRIDES(self),
- PyArray_BYTES(self) + offset,
- PyArray_FLAGS(self)&(~NPY_ARRAY_F_CONTIGUOUS),
- (PyObject *)self);
+ ret = PyArray_NewFromDescr_int(Py_TYPE(self),
+ typed,
+ PyArray_NDIM(self), PyArray_DIMS(self),
+ PyArray_STRIDES(self),
+ PyArray_BYTES(self) + offset,
+ PyArray_FLAGS(self)&(~NPY_ARRAY_F_CONTIGUOUS),
+ (PyObject *)self, 0, 1);
if (ret == NULL) {
return NULL;
}
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index a6cb66b7d..f0721d7a3 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -256,6 +256,16 @@ class TestRecord(TestCase):
dt2 = np.dtype((np.void, dt.fields))
assert_equal(dt2.fields, dt.fields)
+ def test_from_dict_with_zero_width_field(self):
+ # Regression test for #6430 / #2196
+ dt = np.dtype([('val1', np.float32, (0,)), ('val2', int)])
+ dt2 = np.dtype({'names': ['val1', 'val2'],
+ 'formats': [(np.float32, (0,)), int]})
+
+ assert_dtype_equal(dt, dt2)
+ assert_equal(dt.fields['val1'][0].itemsize, 0)
+ assert_equal(dt.itemsize, dt.fields['val2'][0].itemsize)
+
def test_bool_commastring(self):
d = np.dtype('?,?,?') # raises?
assert_equal(len(d.names), 3)
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index ec2b9fdb3..faca404ae 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -923,6 +923,30 @@ class TestStructured(TestCase):
assert_raises(ValueError, testassign)
+ def test_zero_width_string(self):
+ # Test for PR #6430 / issues #473, #4955, #2585
+
+ dt = np.dtype([('I', int), ('S', 'S0')])
+
+ x = np.zeros(3, dtype=dt)
+
+ assert_equal(x['S'], [b'', b'', b''])
+ assert_equal(x['S'].itemsize, 0)
+
+ x['S'] = ['a', 'b', 'c']
+ assert_equal(x['S'], [b'', b'', b''])
+ assert_equal(x['I'], [0, 0, 0])
+
+ # Variation on test case from #4955
+ x['S'][x['I'] == 0] = 'hello'
+ assert_equal(x['S'], [b'', b'', b''])
+ assert_equal(x['I'], [0, 0, 0])
+
+ # Variation on test case from #2585
+ x['S'] = 'A'
+ assert_equal(x['S'], [b'', b'', b''])
+ assert_equal(x['I'], [0, 0, 0])
+
class TestBool(TestCase):
def test_test_interning(self):
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 2c85546a7..6ef6badda 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -254,6 +254,20 @@ class TestFromrecords(TestCase):
assert_equal(a[0]['qux'].D, asbytes('fgehi'))
assert_equal(a[0]['qux']['D'], asbytes('fgehi'))
+ def test_zero_width_strings(self):
+ # Test for #6430, based on the test case from #1901
+
+ cols = [['test'] * 3, [''] * 3]
+ rec = np.rec.fromarrays(cols)
+ assert_equal(rec['f0'], ['test', 'test', 'test'])
+ assert_equal(rec['f1'], ['', '', ''])
+
+ dt = np.dtype([('f0', '|S4'), ('f1', '|S')])
+ rec = np.rec.fromarrays(cols, dtype=dt)
+ assert_equal(rec.itemsize, 4)
+ assert_equal(rec['f0'], [b'test', b'test', b'test'])
+ assert_equal(rec['f1'], [b'', b'', b''])
+
class TestRecord(TestCase):
def setUp(self):