summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/compat/py3k.py25
-rw-r--r--numpy/core/src/multiarray/common.c141
-rw-r--r--numpy/core/src/multiarray/common.h4
-rw-r--r--numpy/core/tests/test_regression.py16
4 files changed, 173 insertions, 13 deletions
diff --git a/numpy/compat/py3k.py b/numpy/compat/py3k.py
index 001455de5..0a03929be 100644
--- a/numpy/compat/py3k.py
+++ b/numpy/compat/py3k.py
@@ -13,32 +13,45 @@ if sys.version_info[0] >= 3:
import io
bytes = bytes
unicode = str
- asunicode = str
+
+ def asunicode(s):
+ if isinstance(s, bytes):
+ return s.decode('latin1')
+ return str(s)
+
def asbytes(s):
if isinstance(s, bytes):
return s
- return s.encode('latin1')
+ return str(s).encode('latin1')
+
def asstr(s):
- if isinstance(s, str):
- return s
- return s.decode('latin1')
+ if isinstance(s, bytes):
+ return s.decode('latin1')
+ return str(s)
+
def isfileobj(f):
return isinstance(f, (io.FileIO, io.BufferedReader))
+
def open_latin1(filename, mode='r'):
return open(filename, mode=mode, encoding='iso-8859-1')
+
strchar = 'U'
+
else:
bytes = str
unicode = unicode
asbytes = str
asstr = str
strchar = 'S'
+
def isfileobj(f):
return isinstance(f, file)
+
def asunicode(s):
if isinstance(s, unicode):
return s
- return s.decode('ascii')
+ return str(s).decode('ascii')
+
def open_latin1(filename, mode='r'):
return open(filename, mode=mode)
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 5015a575f..74551176f 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -68,6 +68,15 @@ _use_default_type(PyObject *op)
#endif
/*
+ * These constants are used to signal that the recursive dtype determination in
+ * PyArray_DTypeFromObject encountered a string type, and that the recursive
+ * search must be restarted so that string representation lengths can be
+ * computed for all scalar types.
+ */
+#define RETRY_WITH_STRING 1
+#define RETRY_WITH_UNICODE 2
+
+/*
* Recursively examines the object to determine an appropriate dtype
* to use for converting to an ndarray.
*
@@ -88,10 +97,33 @@ _use_default_type(PyObject *op)
*
* Returns 0 on success, -1 on failure.
*/
-NPY_NO_EXPORT int
+ NPY_NO_EXPORT int
PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
PyArray_Descr **out_dtype)
{
+ int res;
+
+ res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_contains_na,
+ out_dtype, 0);
+ if (res == RETRY_WITH_STRING) {
+ res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_contains_na,
+ out_dtype, NPY_STRING);
+ if (res == RETRY_WITH_UNICODE) {
+ res = PyArray_DTypeFromObjectHelper(obj, maxdims,
+ out_contains_na, out_dtype, NPY_UNICODE);
+ }
+ }
+ else if (res == RETRY_WITH_UNICODE) {
+ res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_contains_na,
+ out_dtype, NPY_UNICODE);
+ }
+ return res;
+}
+
+NPY_NO_EXPORT int
+PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, int *out_contains_na,
+ PyArray_Descr **out_dtype, int string_type)
+{
int i, size;
PyArray_Descr *dtype = NULL;
PyObject *ip;
@@ -103,6 +135,7 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
if (PyArray_Check(obj)) {
/* Check for any NAs in the array */
int containsna = PyArray_ContainsNA((PyArrayObject *)obj, NULL, NULL);
+
if (containsna == -1) {
goto fail;
}
@@ -116,9 +149,43 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
/* Check if it's a NumPy scalar */
if (PyArray_IsScalar(obj, Generic)) {
- dtype = PyArray_DescrFromScalar(obj);
- if (dtype == NULL) {
- goto fail;
+ int itemsize;
+ PyObject *temp;
+
+ if (!string_type) {
+ dtype = PyArray_DescrFromScalar(obj);
+ if (dtype == NULL) {
+ goto fail;
+ }
+ }
+ else {
+ if (string_type == NPY_STRING) {
+ if ((temp = PyObject_Str(obj)) == NULL) {
+ return -1;
+ }
+ itemsize = PyString_GET_SIZE(temp);
+ }
+ else if (string_type == NPY_UNICODE) {
+#if defined(NPY_PY3K)
+ if ((temp = PyObject_Str(obj)) == NULL) {
+#else
+ if ((temp = PyObject_Unicode(obj)) == NULL) {
+#endif
+ return -1;
+ }
+ itemsize = PyUnicode_GET_DATA_SIZE(temp);
+ }
+ Py_DECREF(temp);
+ if (*out_dtype != NULL &&
+ (*out_dtype)->type_num == string_type &&
+ (*out_dtype)->elsize >= itemsize) {
+ return 0;
+ }
+ dtype = PyArray_DescrNewFromType(string_type);
+ if (dtype == NULL) {
+ goto fail;
+ }
+ dtype->elsize = itemsize;
}
goto promote_types;
}
@@ -126,6 +193,41 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
/* Check if it's a Python scalar */
dtype = _array_find_python_scalar_type(obj);
if (dtype != NULL) {
+ int itemsize;
+ PyObject *temp;
+
+ if (string_type) {
+ if (string_type == NPY_STRING) {
+ if ((temp = PyObject_Str(obj)) == NULL) {
+ return -1;
+ }
+ itemsize = PyString_GET_SIZE(temp);
+ }
+ else if (string_type == NPY_UNICODE) {
+#if defined(NPY_PY3K)
+ if ((temp = PyObject_Str(obj)) == NULL) {
+#else
+ if ((temp = PyObject_Unicode(obj)) == NULL) {
+#endif
+ return -1;
+ }
+ itemsize = PyUnicode_GET_DATA_SIZE(temp);
+#ifndef Py_UNICODE_WIDE
+ itemsize <<= 1;
+#endif
+ }
+ Py_DECREF(temp);
+ if (*out_dtype != NULL &&
+ (*out_dtype)->type_num == string_type &&
+ (*out_dtype)->elsize >= itemsize) {
+ return 0;
+ }
+ dtype = PyArray_DescrNewFromType(string_type);
+ if (dtype == NULL) {
+ goto fail;
+ }
+ dtype->elsize = itemsize;
+ }
goto promote_types;
}
@@ -313,15 +415,21 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
}
/* Recursive call for each sequence item */
for (i = 0; i < size; ++i) {
+ int res;
ip = PySequence_GetItem(obj, i);
- if (ip==NULL) {
+ if (ip == NULL) {
goto fail;
}
- if (PyArray_DTypeFromObject(ip, maxdims - 1,
- out_contains_na, out_dtype) < 0) {
+ res = PyArray_DTypeFromObjectHelper(ip, maxdims - 1,
+ out_contains_na, out_dtype, string_type);
+ if (res < 0) {
Py_DECREF(ip);
goto fail;
}
+ else if (res > 0) {
+ Py_DECREF(ip);
+ return res;
+ }
Py_DECREF(ip);
}
@@ -331,6 +439,12 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
promote_types:
/* Set 'out_dtype' if it's NULL */
if (*out_dtype == NULL) {
+ if (!string_type && dtype->type_num == NPY_STRING) {
+ return RETRY_WITH_STRING;
+ }
+ if (!string_type && dtype->type_num == NPY_UNICODE) {
+ return RETRY_WITH_UNICODE;
+ }
*out_dtype = dtype;
return 0;
}
@@ -342,6 +456,16 @@ promote_types:
return -1;
}
Py_DECREF(*out_dtype);
+ if (!string_type &&
+ res_dtype->type_num == NPY_UNICODE &&
+ (*out_dtype)->type_num != NPY_UNICODE) {
+ return RETRY_WITH_UNICODE;
+ }
+ if (!string_type &&
+ res_dtype->type_num == NPY_STRING &&
+ (*out_dtype)->type_num != NPY_STRING) {
+ return RETRY_WITH_STRING;
+ }
*out_dtype = res_dtype;
return 0;
}
@@ -352,6 +476,9 @@ fail:
return -1;
}
+#undef RETRY_WITH_STRING
+#undef RETRY_WITH_UNICODE
+
/* new reference */
NPY_NO_EXPORT PyArray_Descr *
_array_typedescr_fromstr(char *str)
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 248d752f6..750f52fa5 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -27,6 +27,10 @@ NPY_NO_EXPORT int
PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
PyArray_Descr **out_dtype);
+NPY_NO_EXPORT int
+PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, int *out_contains_na,
+ PyArray_Descr **out_dtype, int string_status);
+
/*
* Returns NULL without setting an exception if no scalar is matched, a
* new dtype reference otherwise.
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index c03c5b02f..5f9067759 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -1643,5 +1643,21 @@ class TestRegression(TestCase):
x = np.arange(0, 4, dtype='datetime64[D]')
assert_raises(TypeError, x.searchsorted, 1)
+ def test_string_truncation(self):
+ # Ticket #1990 - Data can be truncated in creation of an array from a
+ # mixed sequence of numeric values and strings
+ for val in [True, 1234, 123.4, complex(1, 234)]:
+ for tostr in [asunicode, asbytes]:
+ b = np.array([val, tostr('xx')])
+ assert_equal(tostr(b[0]), tostr(val))
+ b = np.array([tostr('xx'), val])
+ assert_equal(tostr(b[1]), tostr(val))
+
+ # test also with longer strings
+ b = np.array([val, tostr('xxxxxxxxxx')])
+ assert_equal(tostr(b[0]), tostr(val))
+ b = np.array([tostr('xxxxxxxxxx'), val])
+ assert_equal(tostr(b[1]), tostr(val))
+
if __name__ == "__main__":
run_module_suite()