diff options
author | Matti Picus <matti.picus@gmail.com> | 2019-09-13 09:25:21 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-13 09:25:21 +0300 |
commit | 79ee678fb2f1518b3f45e26066a02b45b22d456c (patch) | |
tree | e8b8f529c7cfcf9451e8b12655291a5d7279e2e5 /numpy | |
parent | e97975a2027e81d93ccdc96ab767653773b3cc04 (diff) | |
parent | bf6809a63238dd50b2a42a7d62824f1e438a222a (diff) | |
download | numpy-79ee678fb2f1518b3f45e26066a02b45b22d456c.tar.gz |
Merge pull request #13605 from seberg/fromfile-eof-deprecation
DEP: Deprecate silent ignoring of bad data in fromfile/fromstring
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/_multiarray_tests.c.src | 22 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 78 | ||||
-rw-r--r-- | numpy/core/tests/test_deprecations.py | 60 | ||||
-rw-r--r-- | numpy/core/tests/test_longdouble.py | 38 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 3 | ||||
-rw-r--r-- | numpy/core/tests/test_regression.py | 3 |
6 files changed, 174 insertions, 30 deletions
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index 1365e87bb..b0985c80f 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -593,6 +593,25 @@ fail: return NULL; } +/* + * Helper to test fromstring of 0 terminated strings, as the C-API supports + * the -1 length identifier. + */ +static PyObject * +fromstring_null_term_c_api(PyObject *dummy, PyObject *byte_obj) +{ + char *string; + + string = PyBytes_AsString(byte_obj); + if (string == NULL) { + return NULL; + } + + return PyArray_FromString( + string, -1, PyArray_DescrFromType(NPY_FLOAT64), -1, " "); +} + + /* check no elison for avoided increfs */ static PyObject * incref_elide(PyObject *dummy, PyObject *args) @@ -1927,6 +1946,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"test_inplace_increment", inplace_increment, METH_VARARGS, NULL}, + {"fromstring_null_term_c_api", + fromstring_null_term_c_api, + METH_O, NULL}, {"incref_elide", incref_elide, METH_VARARGS, NULL}, diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 50d58ac16..188e68001 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -40,9 +40,31 @@ * regards to the handling of text representations. */ +/* + * Scanning function for next element parsing and seperator skipping. + * These functions return: + * - 0 to indicate more data to read + * - -1 when reading stopped at the end of the string/file + * - -2 when reading stopped before the end was reached. + * + * The dtype specific parsing functions may set the python error state + * (they have to get the GIL first) additionally. + */ typedef int (*next_element)(void **, void *, PyArray_Descr *, void *); typedef int (*skip_separator)(void **, const char *, void *); + +static npy_bool +string_is_fully_read(char const* start, char const* end) { + if (end == NULL) { + return *start == '\0'; /* null terminated */ + } + else { + return start >= end; /* fixed length */ + } +} + + static int fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype, const char *end) @@ -50,19 +72,23 @@ fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype, char *e = *s; int r = dtype->f->fromstr(*s, dptr, &e, dtype); /* - * fromstr always returns 0 for basic dtypes - * s points to the end of the parsed string - * if an error occurs s is not changed + * fromstr always returns 0 for basic dtypes; s points to the end of the + * parsed string. If s is not changed an error occurred or the end was + * reached. */ - if (*s == e) { - /* Nothing read */ - return -1; + if (*s == e || r < 0) { + /* Nothing read, could be end of string or an error (or both) */ + if (string_is_fully_read(*s, end)) { + return -1; + } + return -2; } *s = e; if (end != NULL && *s > end) { + /* Stop the iteration if we read far enough */ return -1; } - return r; + return 0; } static int @@ -75,9 +101,13 @@ fromfile_next_element(FILE **fp, void *dptr, PyArray_Descr *dtype, if (r == 1) { return 0; } - else { + else if (r == EOF) { return -1; } + else { + /* unable to read more, but EOF not reached indicating an error. */ + return -2; + } } /* @@ -143,9 +173,10 @@ fromstr_skip_separator(char **s, const char *sep, const char *end) { char *string = *s; int result = 0; + while (1) { char c = *string; - if (c == '\0' || (end != NULL && string >= end)) { + if (string_is_fully_read(string, end)) { result = -1; break; } @@ -3596,6 +3627,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, npy_intp i; char *dptr, *clean_sep, *tmp; int err = 0; + int stop_reading_flag; /* -1 indicates end reached; -2 a parsing error */ npy_intp thisbuf = 0; npy_intp size; npy_intp bytes, totalbytes; @@ -3623,9 +3655,9 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, NPY_BEGIN_ALLOW_THREADS; totalbytes = bytes = size * dtype->elsize; dptr = PyArray_DATA(r); - for (i= 0; num < 0 || i < num; i++) { - if (next(&stream, dptr, dtype, stream_data) < 0) { - /* EOF */ + for (i = 0; num < 0 || i < num; i++) { + stop_reading_flag = next(&stream, dptr, dtype, stream_data); + if (stop_reading_flag < 0) { break; } *nread += 1; @@ -3642,7 +3674,12 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, dptr = tmp + (totalbytes - bytes); thisbuf = 0; } - if (skip_sep(&stream, clean_sep, stream_data) < 0) { + stop_reading_flag = skip_sep(&stream, clean_sep, stream_data); + if (stop_reading_flag < 0) { + if (num == i + 1) { + /* if we read as much as requested sep is optional */ + stop_reading_flag = -1; + } break; } } @@ -3661,6 +3698,21 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, } } NPY_END_ALLOW_THREADS; + + if (stop_reading_flag == -2) { + if (PyErr_Occurred()) { + /* If an error is already set (unlikely), do not create new one */ + Py_DECREF(r); + return NULL; + } + /* 2019-09-12, NumPy 1.18 */ + if (DEPRECATE( + "string or file could not be read to its end due to unmatched " + "data; this will raise a ValueError in the future.") < 0) { + goto fail; + } + } + free(clean_sep); fail: diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index e8aa0c70b..46cebdd31 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -10,12 +10,16 @@ import sys import operator import warnings import pytest +import shutil +import tempfile import numpy as np from numpy.testing import ( - assert_raises, assert_warns, assert_ + assert_raises, assert_warns, assert_, assert_array_equal ) +from numpy.core._multiarray_tests import fromstring_null_term_c_api + try: import pytz _has_pytz = True @@ -514,11 +518,65 @@ class TestPositiveOnNonNumerical(_DeprecationTestCase): def test_positive_on_non_number(self): self.assert_deprecated(operator.pos, args=(np.array('foo'),)) + class TestFromstring(_DeprecationTestCase): # 2017-10-19, 1.14 def test_fromstring(self): self.assert_deprecated(np.fromstring, args=('\x00'*80,)) + +class TestFromStringAndFileInvalidData(_DeprecationTestCase): + # 2019-06-08, 1.17.0 + # Tests should be moved to real tests when deprecation is done. + message = "string or file could not be read to its end" + + @pytest.mark.parametrize("invalid_str", [",invalid_data", "invalid_sep"]) + def test_deprecate_unparsable_data_file(self, invalid_str): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + + with tempfile.TemporaryFile(mode="w") as f: + x.tofile(f, sep=',', format='%.2f') + f.write(invalid_str) + + f.seek(0) + self.assert_deprecated(lambda: np.fromfile(f, sep=",")) + f.seek(0) + self.assert_deprecated(lambda: np.fromfile(f, sep=",", count=5)) + # Should not raise: + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + f.seek(0) + res = np.fromfile(f, sep=",", count=4) + assert_array_equal(res, x) + + @pytest.mark.parametrize("invalid_str", [",invalid_data", "invalid_sep"]) + def test_deprecate_unparsable_string(self, invalid_str): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + x_str = "1.51,2,3.51,4{}".format(invalid_str) + + self.assert_deprecated(lambda: np.fromstring(x_str, sep=",")) + self.assert_deprecated(lambda: np.fromstring(x_str, sep=",", count=5)) + + # The C-level API can use not fixed size, but 0 terminated strings, + # so test that as well: + bytestr = x_str.encode("ascii") + self.assert_deprecated(lambda: fromstring_null_term_c_api(bytestr)) + + with assert_warns(DeprecationWarning): + # this is slightly strange, in that fromstring leaves data + # potentially uninitialized (would be good to error when all is + # read, but count is larger then actual data maybe). + res = np.fromstring(x_str, sep=",", count=5) + assert_array_equal(res[:-1], x) + + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + + # Should not raise: + res = np.fromstring(x_str, sep=",", count=4) + assert_array_equal(res, x) + + class Test_GetSet_NumericOps(_DeprecationTestCase): # 2018-09-20, 1.16.0 def test_get_numeric_ops(self): diff --git a/numpy/core/tests/test_longdouble.py b/numpy/core/tests/test_longdouble.py index ee4197f8f..59ac5923c 100644 --- a/numpy/core/tests/test_longdouble.py +++ b/numpy/core/tests/test_longdouble.py @@ -5,7 +5,8 @@ import pytest import numpy as np from numpy.testing import ( - assert_, assert_equal, assert_raises, assert_array_equal, temppath, + assert_, assert_equal, assert_raises, assert_warns, assert_array_equal, + temppath, ) from numpy.core.tests._locales import CommaDecimalPointLocale @@ -71,18 +72,21 @@ def test_fromstring(): def test_fromstring_bogus(): - assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "), - np.array([1., 2., 3.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "), + np.array([1., 2., 3.])) def test_fromstring_empty(): - assert_equal(np.fromstring("xxxxx", sep="x"), - np.array([])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("xxxxx", sep="x"), + np.array([])) def test_fromstring_missing(): - assert_equal(np.fromstring("1xx3x4x5x6", sep="x"), - np.array([1])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1xx3x4x5x6", sep="x"), + np.array([1])) class TestFileBased(object): @@ -95,7 +99,9 @@ class TestFileBased(object): with temppath() as path: with open(path, 'wt') as f: f.write("1. 2. 3. flop 4.\n") - res = np.fromfile(path, dtype=float, sep=" ") + + with assert_warns(DeprecationWarning): + res = np.fromfile(path, dtype=float, sep=" ") assert_equal(res, np.array([1., 2., 3.])) @pytest.mark.skipif(string_to_longdouble_inaccurate, @@ -186,12 +192,14 @@ class TestCommaDecimalPointLocale(CommaDecimalPointLocale): assert_equal(a[0], f) def test_fromstring_best_effort_float(self): - assert_equal(np.fromstring("1,234", dtype=float, sep=" "), - np.array([1.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1,234", dtype=float, sep=" "), + np.array([1.])) def test_fromstring_best_effort(self): - assert_equal(np.fromstring("1,234", dtype=np.longdouble, sep=" "), - np.array([1.])) + with assert_warns(DeprecationWarning): + assert_equal(np.fromstring("1,234", dtype=np.longdouble, sep=" "), + np.array([1.])) def test_fromstring_foreign(self): s = "1.234" @@ -204,8 +212,10 @@ class TestCommaDecimalPointLocale(CommaDecimalPointLocale): assert_array_equal(a, b) def test_fromstring_foreign_value(self): - b = np.fromstring("1,234", dtype=np.longdouble, sep=" ") - assert_array_equal(b[0], 1) + with assert_warns(DeprecationWarning): + b = np.fromstring("1,234", dtype=np.longdouble, sep=" ") + assert_array_equal(b[0], 1) + @pytest.mark.parametrize("int_val", [ # cases discussed in gh-10723 diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 05f01ad82..58572f268 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4963,7 +4963,8 @@ class TestIO(object): self._check_from(b'1,2,3,4', [1., 2., 3., 4.], dtype=float, sep=',') def test_malformed(self): - self._check_from(b'1.234 1,234', [1.234, 1.], sep=' ') + with assert_warns(DeprecationWarning): + self._check_from(b'1.234 1,234', [1.234, 1.], sep=' ') def test_long_sep(self): self._check_from(b'1_x_3_x_4_x_5', [1, 3, 4, 5], sep='_x_') diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index cbd4ceafc..9dc231deb 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1539,7 +1539,8 @@ class TestRegression(object): def test_fromstring_crash(self): # Ticket #1345: the following should not cause a crash - np.fromstring(b'aa, aa, 1.0', sep=',') + with assert_warns(DeprecationWarning): + np.fromstring(b'aa, aa, 1.0', sep=',') def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() |