diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2022-01-11 15:46:21 -0600 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2022-01-14 20:07:07 -0600 |
commit | e1f7ad16518f95b6c5b560a03375b4329c8136ff (patch) | |
tree | b3b7319c474a99997f82a7a7eb175f7b906ff106 | |
parent | 3ca9f5a2a252e020a44a355f4fc8114d91ea3423 (diff) | |
download | numpy-e1f7ad16518f95b6c5b560a03375b4329c8136ff.tar.gz |
BUG: Fix complex parser and add tests for whitespace and failure paths
-rw-r--r-- | numpy/core/src/multiarray/textreading/conversions.c | 52 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 27 |
2 files changed, 60 insertions, 19 deletions
diff --git a/numpy/core/src/multiarray/textreading/conversions.c b/numpy/core/src/multiarray/textreading/conversions.c index 04060baa1..8c685ea64 100644 --- a/numpy/core/src/multiarray/textreading/conversions.c +++ b/numpy/core/src/multiarray/textreading/conversions.c @@ -46,11 +46,13 @@ to_bool(PyArray_Descr *NPY_UNUSED(descr), static NPY_INLINE int double_from_ucs4( const Py_UCS4 *str, const Py_UCS4 *end, - bool skip_trailing_whitespace, double *result, const Py_UCS4 **p_end) + bool strip_whitespace, double *result, const Py_UCS4 **p_end) { /* skip leading whitespace */ - while (Py_UNICODE_ISSPACE(*str)) { - str++; + if (strip_whitespace) { + while (Py_UNICODE_ISSPACE(*str)) { + str++; + } } if (str == end) { return -1; /* empty or only whitespace: not a floating point number */ @@ -69,7 +71,9 @@ double_from_ucs4( char *c = ascii; for (; str < end; str++, c++) { if (NPY_UNLIKELY(*str >= 128)) { - break; /* the following cannot be a number anymore */ + /* Character cannot be used, ignore for end calculation and stop */ + end = str; + break; } *c = (char)(*str); } @@ -86,7 +90,7 @@ double_from_ucs4( return -1; } - if (skip_trailing_whitespace) { + if (strip_whitespace) { /* and then skip any remainig whitespace: */ while (Py_UNICODE_ISSPACE(*end)) { end++; @@ -158,6 +162,10 @@ to_complex_int( if (allow_parens && (*item == '(')) { unmatched_opening_paren = true; ++item; + /* Allow whitespace within the parentheses: "( 1j)" */ + while (Py_UNICODE_ISSPACE(*item)) { + ++item; + } } if (double_from_ucs4(item, token_end, false, p_real, &p_end) < 0) { return false; @@ -168,23 +176,15 @@ to_complex_int( return !unmatched_opening_paren; } if (*p_end == imaginary_unit) { - // Pure imaginary part only (e.g "1.5j") + /* Only an imaginary part (e.g "1.5j") */ *p_imag = *p_real; *p_real = 0.0; ++p_end; - if (unmatched_opening_paren && (*p_end == ')')) { - ++p_end; - unmatched_opening_paren = false; - } - } - else if (unmatched_opening_paren && (*p_end == ')')) { - *p_imag = 0.0; - ++p_end; - unmatched_opening_paren = false; } - else { + else if (*p_end == '+' || *p_end == '-') { + /* Imaginary part still to parse */ if (*p_end == '+') { - ++p_end; + ++p_end; /* Advance to support +- (and ++) */ } if (double_from_ucs4(p_end, token_end, false, p_imag, &p_end) < 0) { return false; @@ -193,11 +193,25 @@ to_complex_int( return false; } ++p_end; - if (unmatched_opening_paren && (*p_end == ')')) { + } + else { + *p_imag = 0; + } + + if (unmatched_opening_paren) { + /* Allow whitespace inside brackets as in "(1+2j )" or "( 1j )" */ + while (Py_UNICODE_ISSPACE(*p_end)) { + ++p_end; + } + if (*p_end == ')') { ++p_end; - unmatched_opening_paren = false; + } + else { + /* parentheses was not closed */ + return false; } } + while (Py_UNICODE_ISSPACE(*p_end)) { ++p_end; } diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index b4ca5b74b..5ba852e3d 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -3263,3 +3263,30 @@ def test_loadtxt_warn_on_skipped_data(skiprows): txt = TextIO(data) with pytest.warns(UserWarning, match="input contained no data"): np.loadtxt(txt, skiprows=skiprows) + + +@pytest.mark.parametrize("dtype", + np.typecodes["AllInteger"] + "efdFD" + "?") +def test_loadtxt_unicode_whitespace_stripping(dtype): + # Test that all numeric types (and bool) strip whitespace correctly + # \u202F is a narrow no-break space, `\n` is just a whitespace if quoted. + # Currently, skip float128 as it did not always support this and has no + # "custom" parsing: + txt = StringIO(' 3 ,"\u202F2\n"') + res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar='"') + assert_array_equal(res, np.array([3, 2]).astype(dtype)) + +@pytest.mark.parametrize("dtype", "FD") +def test_loadtxt_unicode_whitespace_stripping_complex(dtype): + # Complex has a few extra cases since it has two components and parentheses + line = " 1 , 2+3j , ( 4+5j ), ( 6+-7j ) , 8j , ( 9j ) \n" + data = [line, line.replace(" ", "\u202F")] + res = np.loadtxt(data, dtype=dtype, delimiter=',') + assert_array_equal(res, np.array([[1, 2+3j, 4+5j, 6-7j, 8j, 9j]] * 2)) + +@pytest.mark.parametrize("dtype", "FD") +@pytest.mark.parametrize("field", + ["1 +2j", "1+ 2j", "1+2 j", "1+-+3", "(1j", "(1", "(1+2j", "1+2j)"]) +def test_loadtxt_bad_complex(dtype, field): + with pytest.raises(ValueError): + np.loadtxt([field + "\n"], dtype=dtype, delimiter=",") |