diff options
author | Jay Bourque <jay.bourque@continuum.io> | 2013-04-24 21:42:15 -0500 |
---|---|---|
committer | Jay Bourque <jay.bourque@continuum.io> | 2013-04-24 21:42:15 -0500 |
commit | f231558bab5be194f3a7e57ba4c9738056d3a04d (patch) | |
tree | 685bd451992330e7d6b9ba8acf8094be0a87c2cf | |
parent | d0d8d1c1deb28fb2b43c7180cd0e293608b6964e (diff) | |
download | numpy-f231558bab5be194f3a7e57ba4c9738056d3a04d.tar.gz |
Fix for astype('S') string truncate issue
Calling astype('S') for an array of string objects results in a string array where dtype='S64', even if the original string objects are longer than 64 characters. Add call to GetParamsFromObject() to determine maximum string object length, and use that as string dtype size.
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 26 | ||||
-rw-r--r-- | numpy/core/tests/test_api.py | 7 |
2 files changed, 33 insertions, 0 deletions
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index e1483f4e1..457844618 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -141,6 +141,12 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, { PyArray_DatetimeMetaData *meta; int flex_type_num; + PyArrayObject *arr = NULL, *ret; + PyArray_Descr *dtype = NULL; + int ndim = 0; + npy_intp dims[NPY_MAXDIMS]; + PyObject *list = NULL; + int result; if (*flex_dtype == NULL) { if (!PyErr_Occurred()) { @@ -220,6 +226,26 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, break; case NPY_OBJECT: size = 64; + /* If we're adapting a string dtype for an array of string + objects, call GetArrayParamsFromObject to figure out + maximum string size, and use that as new dtype size. */ + if (flex_type_num == NPY_STRING && data_obj != NULL) { + /* Convert data array to list of objects since + GetArrayParamsFromObject won't iterator through + items in an array. */ + list = PyArray_ToList(data_obj); + if (list != NULL) { + result = PyArray_GetArrayParamsFromObject( + list, + flex_dtype, + 0, &dtype, + &ndim, dims, &arr, NULL); + if (result == 0 && dtype != NULL) { + size = dtype->elsize; + } + Py_DECREF(list); + } + } break; case NPY_STRING: case NPY_VOID: diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 1d4b93b0f..388c23d3a 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -89,6 +89,13 @@ def test_array_astype(): assert_(not (a is b)) assert_(type(b) != np.matrix) + # Make sure converting from string object to fixed length string + # does not truncate. + a = np.array(['a'*100], dtype='O') + b = a.astype('S') + assert_equal(a, b) + assert_equal(b.dtype, np.dtype('S100')) + def test_copyto_fromscalar(): a = np.arange(6, dtype='f4').reshape(2,3) |