diff options
| author | Sebastian Berg <sebastianb@nvidia.com> | 2023-01-11 23:25:05 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-01-11 14:25:05 -0800 |
| commit | 77253908b0b9e4c296b9ff3c7733566eaa760e36 (patch) | |
| tree | 98bd7c2c8f137268595df08e93359b0063264f22 | |
| parent | 55e47e36ec9dbdd47d58900a8b504d9e159f1beb (diff) | |
| download | numpy-77253908b0b9e4c296b9ff3c7733566eaa760e36.tar.gz | |
ENH: Improve loadtxt error with dtype and non-matchinig column number (#22996)
The number "changed" is weird if the user fixed it, so give a different
message in that case.
| -rw-r--r-- | numpy/core/src/multiarray/textreading/rows.c | 11 | ||||
| -rw-r--r-- | numpy/lib/tests/test_loadtxt.py | 11 |
2 files changed, 20 insertions, 2 deletions
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c index 9c490b3f7..b8a6943dc 100644 --- a/numpy/core/src/multiarray/textreading/rows.c +++ b/numpy/core/src/multiarray/textreading/rows.c @@ -318,10 +318,19 @@ read_rows(stream *s, } if (!usecols && (actual_num_fields != current_num_fields)) { - PyErr_Format(PyExc_ValueError, + if (homogeneous) { + PyErr_Format(PyExc_ValueError, "the number of columns changed from %zd to %zd at row %zd; " "use `usecols` to select a subset and avoid this error", actual_num_fields, current_num_fields, row_count+1); + } + else { + PyErr_Format(PyExc_ValueError, + "the dtype passed requires %zd columns but %zd were found " + "at row %zd; " + "use `usecols` to select a subset and avoid this error", + actual_num_fields, current_num_fields, row_count+1); + } goto error; } diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py index 8a5b044b8..2d805e434 100644 --- a/numpy/lib/tests/test_loadtxt.py +++ b/numpy/lib/tests/test_loadtxt.py @@ -244,6 +244,14 @@ def test_converters_negative_indices_with_usecols(): usecols=[0, -1], converters={-1: (lambda x: -1)}) assert_array_equal(res, [[0, -1], [0, -1]]) + +def test_ragged_error(): + rows = ["1,2,3", "1,2,3", "4,3,2,1"] + with pytest.raises(ValueError, + match="the number of columns changed from 3 to 4 at row 3"): + np.loadtxt(rows, delimiter=",") + + def test_ragged_usecols(): # usecols, and negative ones, work even with varying number of columns. txt = StringIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n") @@ -553,7 +561,8 @@ def test_quote_support_default(): txt = StringIO('"lat,long", 45, 30\n') dtype = np.dtype([('f0', 'U24'), ('f1', np.float64), ('f2', np.float64)]) - with pytest.raises(ValueError, match="the number of columns changed"): + with pytest.raises(ValueError, + match="the dtype passed requires 3 columns but 4 were"): np.loadtxt(txt, dtype=dtype, delimiter=",") # Enable quoting support with non-None value for quotechar param |
