diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2017-10-22 12:01:43 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-22 12:01:43 -0600 |
commit | 5d54ba0d52e66368c948c0e5fb83cac83e71eb50 (patch) | |
tree | 9d4a79e3b267749c8c7680807330a2a0ca6aa0bf | |
parent | b3f72f810ab1ef7142fc65dfe81c29e9bbba6328 (diff) | |
parent | 256397341fbd8c65b28558b177be5e3b43889980 (diff) | |
download | numpy-5d54ba0d52e66368c948c0e5fb83cac83e71eb50.tar.gz |
Merge pull request #9487 from eric-wieser/fromstring-bytes
DEP: Letting fromstring pretend to be frombuffer is a bad idea
-rw-r--r-- | doc/release/1.14.0-notes.rst | 6 | ||||
-rw-r--r-- | numpy/add_newdocs.py | 23 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 11 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 9 | ||||
-rw-r--r-- | numpy/core/tests/test_regression.py | 16 |
5 files changed, 44 insertions, 21 deletions
diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst index 4f8f94850..0aeeadd40 100644 --- a/doc/release/1.14.0-notes.rst +++ b/doc/release/1.14.0-notes.rst @@ -31,6 +31,12 @@ from ``np.array([1, 2, 3])[np.True_]``. This behavior is deprecated. empty, use ``array.size > 0``. * Calling ``np.bincount`` with ``minlength=None`` is deprecated - instead, ``minlength=0`` should be used. +``np.fromstring`` should always be passed a ``sep`` argument +------------------------------------------------------------ +Without this argument, this falls back on a broken version of `np.frombuffer` +that silently accepts and then encode unicode strings. If reading binary data +is desired, ``frombuffer`` should be used directly. + Future Changes ============== diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index ce2f1c6ec..ad18f18c8 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -961,7 +961,7 @@ add_newdoc('numpy.core.multiarray', 'fromstring', """ fromstring(string, dtype=float, count=-1, sep='') - A new 1-D array initialized from raw binary or text data in a string. + A new 1-D array initialized from text data in a string. Parameters ---------- @@ -975,11 +975,13 @@ add_newdoc('numpy.core.multiarray', 'fromstring', negative (the default), the count will be determined from the length of the data. sep : str, optional - If not provided or, equivalently, the empty string, the data will - be interpreted as binary data; otherwise, as ASCII text with - decimal numbers. Also in this latter case, this argument is - interpreted as the string separating numbers in the data; extra - whitespace between elements is also ignored. + The string separating numbers in the data; extra whitespace between + elements is also ignored. + + .. deprecated:: 1.14 + If this argument is not provided, `fromstring` falls back on the + behaviour of `frombuffer` after encoding unicode string inputs as + either utf-8 (python 3), or the default encoding (python 2). Returns ------- @@ -998,14 +1000,10 @@ add_newdoc('numpy.core.multiarray', 'fromstring', Examples -------- - >>> np.fromstring('\\x01\\x02', dtype=np.uint8) - array([1, 2], dtype=uint8) >>> np.fromstring('1 2', dtype=int, sep=' ') array([1, 2]) >>> np.fromstring('1, 2', dtype=int, sep=',') array([1, 2]) - >>> np.fromstring('\\x01\\x02\\x03\\x04\\x05', dtype=np.uint8, count=3) - array([1, 2, 3], dtype=uint8) """) @@ -1154,6 +1152,11 @@ add_newdoc('numpy.core.multiarray', 'frombuffer', array(['w', 'o', 'r', 'l', 'd'], dtype='|S1') + >>> np.frombuffer(b'\\x01\\x02', dtype=np.uint8) + array([1, 2], dtype=uint8) + >>> np.frombuffer(b'\\x01\\x02\\x03\\x04\\x05', dtype=np.uint8, count=3) + array([1, 2, 3], dtype=uint8) + """) add_newdoc('numpy.core.multiarray', 'concatenate', diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 210882ff0..499ec343c 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -2098,6 +2098,17 @@ array_fromstring(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds Py_XDECREF(descr); return NULL; } + + /* binary mode, condition copied from PyArray_FromString */ + if (sep == NULL || strlen(sep) == 0) { + /* Numpy 1.14, 2017-10-19 */ + if (DEPRECATE( + "The binary mode of fromstring is deprecated, as it behaves " + "surprisingly on unicode inputs. Use frombuffer instead") < 0) { + Py_DECREF(descr); + return NULL; + } + } return PyArray_FromString(data, (npy_intp)s, descr, (npy_intp)nin, sep); } diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 4808303ae..f6a5b4983 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4183,11 +4183,11 @@ class TestIO(object): def test_roundtrip_binary_str(self): s = self.x.tobytes() - y = np.fromstring(s, dtype=self.dtype) + y = np.frombuffer(s, dtype=self.dtype) assert_array_equal(y, self.x.flat) s = self.x.tobytes('F') - y = np.fromstring(s, dtype=self.dtype) + y = np.frombuffer(s, dtype=self.dtype) assert_array_equal(y, self.x.flatten('F')) def test_roundtrip_str(self): @@ -4302,7 +4302,10 @@ class TestIO(object): assert_equal(pos, 10, err_msg=err_msg) def _check_from(self, s, value, **kw): - y = np.fromstring(s, **kw) + if 'sep' not in kw: + y = np.frombuffer(s, **kw) + else: + y = np.fromstring(s, **kw) assert_array_equal(y, value) f = open(self.filename, 'wb') diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 34f9080fb..f791f6725 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -289,7 +289,7 @@ class TestRegression(object): # Fix in r2836 # Create non-contiguous Fortran ordered array x = np.array(np.random.rand(3, 3), order='F')[:, :2] - assert_array_almost_equal(x.ravel(), np.fromstring(x.tobytes())) + assert_array_almost_equal(x.ravel(), np.frombuffer(x.tobytes())) def test_flat_assignment(self): # Correct behaviour of ticket #194 @@ -833,14 +833,14 @@ class TestRegression(object): def test_string_argsort_with_zeros(self): # Check argsort for strings containing zeros. - x = np.fromstring("\x00\x02\x00\x01", dtype="|S2") + x = np.frombuffer(b"\x00\x02\x00\x01", dtype="|S2") assert_array_equal(x.argsort(kind='m'), np.array([1, 0])) assert_array_equal(x.argsort(kind='q'), np.array([1, 0])) def test_string_sort_with_zeros(self): # Check sort for strings containing zeros. - x = np.fromstring("\x00\x02\x00\x01", dtype="|S2") - y = np.fromstring("\x00\x01\x00\x02", dtype="|S2") + x = np.frombuffer(b"\x00\x02\x00\x01", dtype="|S2") + y = np.frombuffer(b"\x00\x01\x00\x02", dtype="|S2") assert_array_equal(np.sort(x, kind="q"), y) def test_copy_detection_zero_dim(self): @@ -1430,10 +1430,10 @@ class TestRegression(object): y = x.byteswap() if x.dtype.byteorder == z.dtype.byteorder: # little-endian machine - assert_equal(x, np.fromstring(y.tobytes(), dtype=dtype.newbyteorder())) + assert_equal(x, np.frombuffer(y.tobytes(), dtype=dtype.newbyteorder())) else: # big-endian machine - assert_equal(x, np.fromstring(y.tobytes(), dtype=dtype)) + assert_equal(x, np.frombuffer(y.tobytes(), dtype=dtype)) # double check real and imaginary parts: assert_equal(x.real, y.real.byteswap()) assert_equal(x.imag, y.imag.byteswap()) @@ -1783,8 +1783,8 @@ class TestRegression(object): assert_equal(a1, a2) def test_fields_strides(self): - "Ticket #1760" - r = np.fromstring('abcdefghijklmnop'*4*3, dtype='i4,(2,3)u2') + "gh-2355" + r = np.frombuffer(b'abcdefghijklmnop'*4*3, dtype='i4,(2,3)u2') assert_equal(r[0:3:2]['f1'], r['f1'][0:3:2]) assert_equal(r[0:3:2]['f1'][0], r[0:3:2][0]['f1']) assert_equal(r[0:3:2]['f1'][0][()], r[0:3:2][0]['f1'][()]) |