TST: Move most new loadtxt tests to its own file

This also adds two basic new tests around files/strings containing the \0 character (prooving that we handle that gracefully). Also adds tests for: * the `_` thousands delimiter (should fail, but doesn't for float128 right now) * Failure modes when the number of rows changes (negative specifically) Many of these tests came originally from Warren Weckesser and others were added by Ross Barnowsky: Co-authored-by: Warren Weckesser <warren.weckesser@gmail.com> Co-authored-by: Ross Barnowski <rossbar@berkeley.edu>
author: Sebastian Berg <sebastian@sipsolutions.net> 2022-01-14 19:04:16 -0600
committer: Sebastian Berg <sebastian@sipsolutions.net> 2022-01-14 20:07:07 -0600
commit: 90c71f0a8a84d9f17243e28e01527b5fd1ecdbb9 (patch)
tree: 311c223111667426615e083d08618f2bf91985e2 /numpy/lib/tests
parent: 0cb6bdcf2a28e8a3a74a302d0807cd054a15925f (diff)
download: numpy-90c71f0a8a84d9f17243e28e01527b5fd1ecdbb9.tar.gz
2 files changed, 836 insertions, 767 deletions
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 5f66e0b6a..f142972b2 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -2722,770 +2722,3 @@ def test_load_refcount():
     with assert_no_gc_cycles():
         x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt)
         assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt))
-
-
-def test_loadtxt_scientific_notation():
-    """Test that both 'e' and 'E' are parsed correctly."""
-    data = TextIO(
-        (
-            "1.0e-1,2.0E1,3.0\n"
-            "4.0e-2,5.0E-1,6.0\n"
-            "7.0e-3,8.0E1,9.0\n"
-            "0.0e-4,1.0E-1,2.0"
-        )
-    )
-    expected = np.array(
-        [[0.1, 20., 3.0], [0.04, 0.5, 6], [0.007, 80., 9], [0, 0.1, 2]]
-    )
-    assert_array_equal(np.loadtxt(data, delimiter=","), expected)
-
-
-@pytest.mark.parametrize("comment", ["..", "//", "@-", "this is a comment:"])
-def test_loadtxt_comment_multiple_chars(comment):
-    content = "# IGNORE\n1.5, 2.5# ABC\n3.0,4.0# XXX\n5.5,6.0\n"
-    txt = TextIO(content.replace("#", comment))
-    a = np.loadtxt(txt, delimiter=",", comments=comment)
-    assert_equal(a, [[1.5, 2.5], [3.0, 4.0], [5.5, 6.0]])
-
-
-@pytest.fixture
-def mixed_types_structured():
-    """
-    Fixture providing hetergeneous input data with a structured dtype, along
-    with the associated structured array.
-    """
-    data = TextIO(
-        (
-            "1000;2.4;alpha;-34\n"
-            "2000;3.1;beta;29\n"
-            "3500;9.9;gamma;120\n"
-            "4090;8.1;delta;0\n"
-            "5001;4.4;epsilon;-99\n"
-            "6543;7.8;omega;-1\n"
-        )
-    )
-    dtype = np.dtype(
-        [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
-    )
-    expected = np.array(
-        [
-            (1000, 2.4, "alpha", -34),
-            (2000, 3.1, "beta", 29),
-            (3500, 9.9, "gamma", 120),
-            (4090, 8.1, "delta", 0),
-            (5001, 4.4, "epsilon", -99),
-            (6543, 7.8, "omega", -1)
-        ],
-        dtype=dtype
-    )
-    return data, dtype, expected
-
-
-@pytest.mark.parametrize('skiprows', [0, 1, 2, 3])
-def test_loadtxt_structured_dtype_and_skiprows_no_empty_lines(
-        skiprows, mixed_types_structured
-    ):
-    data, dtype, expected = mixed_types_structured
-    a = np.loadtxt(data, dtype=dtype, delimiter=";", skiprows=skiprows)
-    assert_array_equal(a, expected[skiprows:])
-
-
-def test_loadtxt_unpack_structured(mixed_types_structured):
-    data, dtype, expected = mixed_types_structured
-
-    a, b, c, d = np.loadtxt(data, dtype=dtype, delimiter=";", unpack=True)
-    assert_array_equal(a, expected["f0"])
-    assert_array_equal(b, expected["f1"])
-    assert_array_equal(c, expected["f2"])
-    assert_array_equal(d, expected["f3"])
-
-
-def test_loadtxt_structured_dtype_with_shape():
-    dtype = np.dtype([("a", "u1", 2), ("b", "u1", 2)])
-    data = TextIO("0,1,2,3\n6,7,8,9\n")
-    expected = np.array([((0, 1), (2, 3)), ((6, 7), (8, 9))], dtype=dtype)
-    assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dtype), expected)
-
-
-def test_loadtxt_structured_dtype_with_multi_shape():
-    dtype = np.dtype([("a", "u1", (2, 2))])
-    data = TextIO("0 1 2 3\n")
-    expected = np.array([(((0, 1), (2, 3)),)], dtype=dtype)
-    assert_array_equal(np.loadtxt(data, dtype=dtype), expected)
-
-
-def test_loadtxt_nested_structured_subarray():
-    # Test from gh-16678
-    point = np.dtype([('x', float), ('y', float)])
-    dt = np.dtype([('code', int), ('points', point, (2,))])
-    data = TextIO("100,1,2,3,4\n200,5,6,7,8\n")
-    expected = np.array(
-        [
-            (100, [(1., 2.), (3., 4.)]),
-            (200, [(5., 6.), (7., 8.)]),
-        ],
-        dtype=dt
-    )
-    assert_array_equal(np.loadtxt(data, dtype=dt, delimiter=","), expected)
-
-
-def test_loadtxt_structured_dtype_offsets():
-    # An aligned structured dtype will have additional padding
-    dt = np.dtype("i1, i4, i1, i4, i1, i4", align=True)
-    data = TextIO("1,2,3,4,5,6\n7,8,9,10,11,12\n")
-    expected = np.array([(1, 2, 3, 4, 5, 6), (7, 8, 9, 10, 11, 12)], dtype=dt)
-    assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dt), expected)
-
-
-@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
-def test_loadtxt_exception_negative_row_limits(param):
-    """skiprows and max_rows should raise for negative parameters."""
-    with pytest.raises(ValueError, match="argument must be nonnegative"):
-        np.loadtxt("foo.bar", **{param: -3})
-
-
-@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
-def test_loadtxt_exception_noninteger_row_limits(param):
-    with pytest.raises(TypeError, match="argument must be an integer"):
-        np.loadtxt("foo.bar", **{param: 1.0})
-
-
-@pytest.mark.parametrize(
-    "data, shape",
-    [
-        ("1 2 3 4 5\n", (1, 5)),  # Single row
-        ("1\n2\n3\n4\n5\n", (5, 1)),  # Single column
-    ]
-)
-def test_loadtxt_ndmin_single_row_or_col(data, shape):
-    arr = np.array([1, 2, 3, 4, 5])
-    arr2d = arr.reshape(shape)
-
-    assert_array_equal(np.loadtxt(TextIO(data), dtype=int), arr)
-    assert_array_equal(np.loadtxt(TextIO(data), dtype=int, ndmin=0), arr)
-    assert_array_equal(np.loadtxt(TextIO(data), dtype=int, ndmin=1), arr)
-    assert_array_equal(np.loadtxt(TextIO(data), dtype=int, ndmin=2), arr2d)
-
-
-@pytest.mark.parametrize("badval", [-1, 3, None, "plate of shrimp"])
-def test_loadtxt_bad_ndmin(badval):
-    with pytest.raises(ValueError, match="Illegal value of ndmin keyword"):
-        np.loadtxt("foo.bar", ndmin=badval)
-
-
-@pytest.mark.parametrize(
-    "ws",
-    (
-        "\t",  # tab
-        "\u2003",  # em
-        "\u00A0",  # non-break
-        "\u3000",  # ideographic space
-    )
-)
-def test_loadtxt_blank_lines_spaces_delimit(ws):
-    txt = StringIO(
-        f"1 2{ws}30\n\n4 5 60\n  {ws}  \n7 8 {ws} 90\n  # comment\n3 2 1"
-    )
-    # NOTE: It is unclear that the `  # comment` should succeed. Except
-    #       for delimiter=None, which should use any whitespace (and maybe
-    #       should just be implemented closer to Python
-    expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
-    assert_equal(
-        np.loadtxt(txt, dtype=int, delimiter=None, comments="#"), expected
-    )
-
-
-def test_loadtxt_blank_lines_normal_delimiter():
-    txt = StringIO('1,2,30\n\n4,5,60\n\n7,8,90\n# comment\n3,2,1')
-    expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
-    assert_equal(
-        np.loadtxt(txt, dtype=int, delimiter=',', comments="#"), expected
-    )
-
-
-@pytest.mark.parametrize("dtype", (float, object))
-def test_loadtxt_maxrows_no_blank_lines(dtype):
-    txt = TextIO("1.5,2.5\n3.0,4.0\n5.5,6.0")
-    res = np.loadtxt(txt, dtype=dtype, delimiter=",", max_rows=2)
-    assert_equal(res.dtype, dtype)
-    assert_equal(res, np.array([["1.5", "2.5"], ["3.0", "4.0"]], dtype=dtype))
-
-
-@pytest.mark.parametrize("dtype", (np.dtype("f8"), np.dtype("i2")))
-def test_loadtxt_exception_message_bad_values(dtype):
-    txt = TextIO("1,2\n3,XXX\n5,6")
-    msg = f"could not convert string 'XXX' to {dtype} at row 1, column 2"
-    with pytest.raises(ValueError, match=msg):
-        np.loadtxt(txt, dtype=dtype, delimiter=",")
-
-
-def test_loadtxt_converters_negative_indices():
-    txt = TextIO('1.5,2.5\n3.0,XXX\n5.5,6.0')
-    conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
-    expected = np.array([[1.5, 2.5], [3.0, np.nan], [5.5, 6.0]])
-    res = np.loadtxt(
-        txt, dtype=np.float64, delimiter=",", converters=conv, encoding=None
-    )
-    assert_equal(res, expected)
-
-
-def test_loadtxt_converters_negative_indices_with_usecols():
-    txt = TextIO('1.5,2.5,3.5\n3.0,4.0,XXX\n5.5,6.0,7.5\n')
-    conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
-    expected = np.array([[1.5, 3.5], [3.0, np.nan], [5.5, 7.5]])
-    res = np.loadtxt(
-        txt,
-        dtype=np.float64,
-        delimiter=",",
-        converters=conv,
-        usecols=[0, -1],
-        encoding=None,
-    )
-    assert_equal(res, expected)
-
-
-def test_loadtxt_ragged_usecols():
-    # usecols, and negative ones, work even with varying number of columns.
-    txt = TextIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
-    expected = np.array([[0, 0], [0, 0], [0, 0]])
-    res = np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
-    assert_equal(res, expected)
-
-
-def test_loadtxt_empty_usecols():
-    txt = TextIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
-    res = np.loadtxt(txt, dtype=np.dtype([]), delimiter=",", usecols=[])
-    assert res.shape == (3,)
-    assert res.dtype == np.dtype([])
-
-
-@pytest.mark.parametrize("c1", ["a", "の", "🫕"])
-@pytest.mark.parametrize("c2", ["a", "の", "🫕"])
-def test_loadtxt_large_unicode_characters(c1, c2):
-    # c1 and c2 span ascii, 16bit and 32bit range.
-    txt = StringIO(f"a,{c1},c,1.0\ne,{c2},2.0,g")
-    res = np.loadtxt(txt, dtype=np.dtype('U12'), delimiter=",")
-    expected = np.array(
-        [f"a,{c1},c,1.0".split(","), f"e,{c2},2.0,g".split(",")],
-        dtype=np.dtype('U12')
-    )
-    assert_equal(res, expected)
-
-
-def test_loadtxt_unicode_with_converter():
-    txt = StringIO("cat,dog\nαβγ,δεζ\nabc,def\n")
-    conv = {0: lambda s: s.upper()}
-    res = np.loadtxt(
-        txt,
-        dtype=np.dtype("U12"),
-        converters=conv,
-        delimiter=",",
-        encoding=None
-    )
-    expected = np.array([['CAT', 'dog'], ['ΑΒΓ', 'δεζ'], ['ABC', 'def']])
-    assert_equal(res, expected)
-
-
-def test_loadtxt_converter_with_structured_dtype():
-    txt = TextIO('1.5,2.5,Abc\n3.0,4.0,dEf\n5.5,6.0,ghI\n')
-    dt = np.dtype([('m', np.int32), ('r', np.float32), ('code', 'U8')])
-    conv = {0: lambda s: int(10*float(s)), -1: lambda s: s.upper()}
-    res = np.loadtxt(txt, dtype=dt, delimiter=",", converters=conv)
-    expected = np.array(
-        [(15, 2.5, 'ABC'), (30, 4.0, 'DEF'), (55, 6.0, 'GHI')], dtype=dt
-    )
-    assert_equal(res, expected)
-
-
-def test_loadtxt_converter_with_unicode_dtype():
-    """
-    With the default 'bytes' encoding, tokens are encoded prior to being passed
-    to the converter. This means that the output of the converter may be bytes
-    instead of unicode as expected by `read_rows`.
-
-    This test checks that outputs from the above scenario are properly decoded
-    prior to parsing by `read_rows`.
-    """
-    txt = StringIO('abc,def\nrst,xyz')
-    conv = bytes.upper
-    res = np.loadtxt(txt, dtype=np.dtype("U3"), converters=conv, delimiter=",")
-    expected = np.array([['ABC', 'DEF'], ['RST', 'XYZ']])
-    assert_equal(res, expected)
-
-
-def test_loadtxt_read_huge_row():
-    row = "1.5, 2.5," * 50000
-    row = row[:-1] + "\n"
-    txt = TextIO(row * 2)
-    res = np.loadtxt(txt, delimiter=",", dtype=float)
-    assert_equal(res, np.tile([1.5, 2.5], (2, 50000)))
-
-
-@pytest.mark.parametrize("dtype", "edfgFDG")
-def test_loadtxt_huge_float(dtype):
-    # Covers a non-optimized path that is rarely taken:
-    field = "0" * 1000 + ".123456789"
-    dtype = np.dtype(dtype)
-    value = np.loadtxt([field], dtype=dtype)[()]
-    assert value == dtype.type("0.123456789")
-
-
-@pytest.mark.parametrize(
-    ("given_dtype", "expected_dtype"),
-    [
-        ("S", np.dtype("S5")),
-        ("U", np.dtype("U5")),
-    ],
-)
-def test_loadtxt_string_no_length_given(given_dtype, expected_dtype):
-    """
-    The given dtype is just 'S' or 'U' with no length. In these cases, the
-    length of the resulting dtype is determined by the longest string found
-    in the file.
-    """
-    txt = TextIO("AAA,5-1\nBBBBB,0-3\nC,4-9\n")
-    res = np.loadtxt(txt, dtype=given_dtype, delimiter=",")
-    expected = np.array(
-        [['AAA', '5-1'], ['BBBBB', '0-3'], ['C', '4-9']], dtype=expected_dtype
-    )
-    assert_equal(res, expected)
-    assert_equal(res.dtype, expected_dtype)
-
-
-def test_loadtxt_float_conversion():
-    """
-    Some tests that the conversion to float64 works as accurately as the Python
-    built-in `float` function. In a naive version of the float parser, these
-    strings resulted in values that were off by an ULP or two.
-    """
-    strings = [
-        '0.9999999999999999',
-        '9876543210.123456',
-        '5.43215432154321e+300',
-        '0.901',
-        '0.333',
-    ]
-    txt = TextIO('\n'.join(strings))
-    res = np.loadtxt(txt)
-    expected = np.array([float(s) for s in strings])
-    assert_equal(res, expected)
-
-
-def test_loadtxt_bool():
-    # Simple test for bool via integer
-    txt = TextIO("1, 0\n10, -1")
-    res = np.loadtxt(txt, dtype=bool, delimiter=",")
-    assert res.dtype == bool
-    assert_array_equal(res, [[True, False], [True, True]])
-    # Make sure we use only 1 and 0 on the byte level:
-    assert_array_equal(res.view(np.uint8), [[1, 0], [1, 1]])
-
-
-@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
-def test_loadtxt_integer_signs(dtype):
-    dtype = np.dtype(dtype)
-    assert np.loadtxt(["+2"], dtype=dtype) == 2
-    if dtype.kind == "u":
-        with pytest.raises(ValueError):
-            np.loadtxt(["-1\n"], dtype=dtype)
-    else:
-        assert np.loadtxt(["-2\n"], dtype=dtype) == -2
-
-    for sign in ["++", "+-", "--", "-+"]:
-        with pytest.raises(ValueError):
-            np.loadtxt([f"{sign}2\n"], dtype=dtype)
-
-
-@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
-def test_loadtxt_implicit_cast_float_to_int_fails(dtype):
-    txt = TextIO("1.0, 2.1, 3.7\n4, 5, 6")
-    with pytest.raises(ValueError):
-        np.loadtxt(txt, dtype=dtype, delimiter=",")
-
-@pytest.mark.parametrize("dtype", (np.complex64, np.complex128))
-@pytest.mark.parametrize("with_parens", (False, True))
-def test_loadtxt_complex_parsing(dtype, with_parens):
-    s = "(1.0-2.5j),3.75,(7+-5.0j)\n(4),(-19e2j),(0)"
-    if not with_parens:
-        s = s.replace("(", "").replace(")", "")
-
-    res = np.loadtxt(TextIO(s), dtype=dtype, delimiter=",")
-    expected = np.array(
-        [[1.0-2.5j, 3.75, 7-5j], [4.0, -1900j, 0]], dtype=dtype
-    )
-    assert_equal(res, expected)
-
-
-def test_loadtxt_read_from_generator():
-    def gen():
-        for i in range(4):
-            yield f"{i},{2*i},{i**2}"
-
-    res = np.loadtxt(gen(), dtype=int, delimiter=",")
-    expected = np.array([[0, 0, 0], [1, 2, 1], [2, 4, 4], [3, 6, 9]])
-    assert_equal(res, expected)
-
-
-def test_loadtxt_read_from_generator_multitype():
-    def gen():
-        for i in range(3):
-            yield f"{i} {i / 4}"
-
-    res = np.loadtxt(gen(), dtype="i, d", delimiter=" ")
-    expected = np.array([(0, 0.0), (1, 0.25), (2, 0.5)], dtype="i, d")
-    assert_equal(res, expected)
-
-
-def test_loadtxt_read_from_bad_generator():
-    def gen():
-        for entry in ["1,2", b"3, 5", 12738]:
-            yield entry
-
-    with pytest.raises(
-        TypeError, match=r"non-string returned while reading data"
-    ):
-        np.loadtxt(gen(), dtype="i, i", delimiter=",")
-
-
-@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
-def test_loadtxt_object_cleanup_on_read_error():
-    sentinel = object()
-
-    already_read = 0
-    def conv(x):
-        nonlocal already_read
-        if already_read > 4999:
-            raise ValueError("failed half-way through!")
-        already_read += 1
-        return sentinel
-
-    txt = TextIO("x\n" * 10000)
-
-    with pytest.raises(ValueError, match="at row 5000, column 1"):
-        np.loadtxt(txt, dtype=object, converters={0: conv})
-
-    assert sys.getrefcount(sentinel) == 2
-
-
-def test_loadtxt_character_not_bytes_compatible():
-    """Test exception when a character cannot be encoded as 'S'."""
-    data = StringIO("–")  # == \u2013
-    with pytest.raises(ValueError):
-        np.loadtxt(data, dtype="S5")
-
-
-@pytest.mark.parametrize("conv", (0, [float], ""))
-def test_loadtxt_invalid_converter(conv):
-    msg = (
-        "converters must be a dictionary mapping columns to converter "
-        "functions or a single callable."
-    )
-    with pytest.raises(TypeError, match=msg):
-        np.loadtxt(TextIO("1 2\n3 4"), converters=conv)
-
-
-def test_loadtxt_converters_dict_raises_non_integer_key():
-    with pytest.raises(TypeError, match="keys of the converters dict"):
-        np.loadtxt(TextIO("1 2\n3 4"), converters={"a": int})
-    with pytest.raises(TypeError, match="keys of the converters dict"):
-        np.loadtxt(TextIO("1 2\n3 4"), converters={"a": int}, usecols=0)
-
-
-@pytest.mark.parametrize("bad_col_ind", (3, -3))
-def test_loadtxt_converters_dict_raises_non_col_key(bad_col_ind):
-    data = TextIO("1 2\n3 4")
-    with pytest.raises(ValueError, match="converter specified for column"):
-        np.loadtxt(data, converters={bad_col_ind: int})
-
-
-def test_loadtxt_converters_dict_raises_val_not_callable():
-    with pytest.raises(
-        TypeError, match="values of the converters dictionary must be callable"
-    ):
-        np.loadtxt(StringIO("1 2\n3 4"), converters={0: 1})
-
-
-@pytest.mark.parametrize("q", ('"', "'", "`"))
-def test_loadtxt_quoted_field(q):
-    txt = TextIO(
-        f"{q}alpha, x{q}, 2.5\n{q}beta, y{q}, 4.5\n{q}gamma, z{q}, 5.0\n"
-    )
-    dtype = np.dtype([('f0', 'U8'), ('f1', np.float64)])
-    expected = np.array(
-        [("alpha, x", 2.5), ("beta, y", 4.5), ("gamma, z", 5.0)], dtype=dtype
-    )
-
-    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar=q)
-    assert_array_equal(res, expected)
-
-
-def test_loadtxt_quote_support_default():
-    """Support for quoted fields is disabled by default."""
-    txt = TextIO('"lat,long", 45, 30\n')
-    dtype = np.dtype([('f0', 'U24'), ('f1', np.float64), ('f2', np.float64)])
-
-    with pytest.raises(ValueError, match="the number of columns changed"):
-        np.loadtxt(txt, dtype=dtype, delimiter=",")
-
-    # Enable quoting support with non-None value for quotechar param
-    txt.seek(0)
-    expected = np.array([("lat,long", 45., 30.)], dtype=dtype)
-
-    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar='"')
-    assert_array_equal(res, expected)
-
-
-def test_loadtxt_quotechar_multichar_error():
-    txt = StringIO("1,2\n3,4")
-    msg = r".*must be a single unicode character or None"
-    with pytest.raises(TypeError, match=msg):
-        np.loadtxt(txt, delimiter=",", quotechar="''")
-
-
-def test_loadtxt_comment_multichar_error_with_quote():
-    txt = StringIO("1,2\n3,4")
-    msg = (
-        "when multiple comments or a multi-character comment is given, "
-        "quotes are not supported."
-    )
-    with pytest.raises(ValueError, match=msg):
-        np.loadtxt(txt, delimiter=",", comments="123", quotechar='"')
-    with pytest.raises(ValueError, match=msg):
-        np.loadtxt(txt, delimiter=",", comments=["#", "%"], quotechar='"')
-
-    # A single character string in a tuple is unpacked though:
-    res = np.loadtxt(txt, delimiter=",", comments=("#",), quotechar="'")
-    assert_equal(res, [[1, 2], [3, 4]])
-
-
-def test_loadtxt_structured_dtype_with_quotes():
-    data = TextIO(
-        (
-            "1000;2.4;'alpha';-34\n"
-            "2000;3.1;'beta';29\n"
-            "3500;9.9;'gamma';120\n"
-            "4090;8.1;'delta';0\n"
-            "5001;4.4;'epsilon';-99\n"
-            "6543;7.8;'omega';-1\n"
-        )
-    )
-    dtype = np.dtype(
-        [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
-    )
-    expected = np.array(
-        [
-            (1000, 2.4, "alpha", -34),
-            (2000, 3.1, "beta", 29),
-            (3500, 9.9, "gamma", 120),
-            (4090, 8.1, "delta", 0),
-            (5001, 4.4, "epsilon", -99),
-            (6543, 7.8, "omega", -1)
-        ],
-        dtype=dtype
-    )
-    res = np.loadtxt(data, dtype=dtype, delimiter=";", quotechar="'")
-    assert_array_equal(res, expected)
-
-
-def test_loadtxt_quoted_field_is_not_empty():
-    txt = StringIO('1\n\n"4"\n""')
-    expected = np.array(["1", "4", ""], dtype="U1")
-    res = np.loadtxt(txt, delimiter=",", dtype="U1", quotechar='"')
-    assert_equal(res, expected)
-
-
-def test_loadtxt_consecutive_quotechar_escaped():
-    txt = TextIO('"Hello, my name is ""Monty""!"')
-    expected = np.array('Hello, my name is "Monty"!', dtype="U40")
-    res = np.loadtxt(txt, dtype="U40", delimiter=",", quotechar='"')
-    assert_equal(res, expected)
-
-
-@pytest.mark.parametrize("data", ("", "\n\n\n", "# 1 2 3\n# 4 5 6\n"))
-@pytest.mark.parametrize("ndmin", (0, 1, 2))
-@pytest.mark.parametrize("usecols", [None, (1, 2, 3)])
-def test_loadtxt_warn_on_no_data(data, ndmin, usecols):
-    """Check that a UserWarning is emitted when no data is read from input."""
-    if usecols is not None:
-        expected_shape = (0, 3)
-    elif ndmin == 2:
-        expected_shape = (0, 1)  # guess a single column?!
-    else:
-        expected_shape = (0,)
-
-    txt = TextIO(data)
-    with pytest.warns(UserWarning, match="input contained no data"):
-        res = np.loadtxt(txt, ndmin=ndmin, usecols=usecols)
-    assert res.shape == expected_shape
-
-    with NamedTemporaryFile(mode="w") as fh:
-        fh.write(data)
-        fh.seek(0)
-        with pytest.warns(UserWarning, match="input contained no data"):
-            res = np.loadtxt(txt, ndmin=ndmin, usecols=usecols)
-        assert res.shape == expected_shape
-
-@pytest.mark.parametrize("skiprows", (2, 3))
-def test_loadtxt_warn_on_skipped_data(skiprows):
-    data = "1 2 3\n4 5 6"
-    txt = TextIO(data)
-    with pytest.warns(UserWarning, match="input contained no data"):
-        np.loadtxt(txt, skiprows=skiprows)
-
-@pytest.mark.parametrize("dtype",
-        list(np.typecodes["AllInteger"] + np.typecodes["AllFloat"]) + ["U2"])
-@pytest.mark.parametrize("swap", [True, False])
-def test_loadtxt_byteswapping_and_unaligned(dtype, swap):
-    data = ["x,1\n"]  # no need for complicated data
-    dtype = np.dtype(dtype)
-    if swap:
-        dtype = dtype.newbyteorder()
-    full_dt = np.dtype([("a", "S1"), ("b", dtype)], align=False)
-    # The above ensures that the interesting "b" field is unaligned:
-    assert full_dt.fields["b"][1] == 1
-    res = np.loadtxt(data, dtype=full_dt, delimiter=",")
-    assert res["b"] == dtype.type(1)
-
-@pytest.mark.parametrize("dtype",
-        np.typecodes["AllInteger"] + "efdFD" + "?")
-def test_loadtxt_unicode_whitespace_stripping(dtype):
-    # Test that all numeric types (and bool) strip whitespace correctly
-    # \u202F is a narrow no-break space, `\n` is just a whitespace if quoted.
-    # Currently, skip float128 as it did not always support this and has no
-    # "custom" parsing:
-    txt = StringIO(' 3 ,"\u202F2\n"')
-    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar='"')
-    assert_array_equal(res, np.array([3, 2]).astype(dtype))
-
-@pytest.mark.parametrize("dtype", "FD")
-def test_loadtxt_unicode_whitespace_stripping_complex(dtype):
-    # Complex has a few extra cases since it has two components and parentheses
-    line = " 1 , 2+3j , ( 4+5j ), ( 6+-7j )  , 8j , ( 9j ) \n"
-    data = [line, line.replace(" ", "\u202F")]
-    res = np.loadtxt(data, dtype=dtype, delimiter=',')
-    assert_array_equal(res, np.array([[1, 2+3j, 4+5j, 6-7j, 8j, 9j]] * 2))
-
-@pytest.mark.parametrize("dtype", "FD")
-@pytest.mark.parametrize("field",
-        ["1 +2j", "1+ 2j", "1+2 j", "1+-+3", "(1j", "(1", "(1+2j", "1+2j)"])
-def test_loadtxt_bad_complex(dtype, field):
-    with pytest.raises(ValueError):
-        np.loadtxt([field + "\n"], dtype=dtype, delimiter=",")
-
-
-@pytest.mark.parametrize("data", [
-        ["1,2\n", "2\n,3\n"],
-        ["1,2\n", "2\r,3\n"]])
-def test_loadtxt_bad_newline_in_iterator(data):
-    # In NumPy <=1.22 this was accepted, because newlines were completely
-    # ignored when the input was an iterable.  This could be changed, but right
-    # now, we raise an error.
-    with pytest.raises(ValueError,
-            match="Found an unquoted embedded newline within a single line"):
-        np.loadtxt(data, delimiter=",")
-
-@pytest.mark.parametrize("data", [
-    ["1,2\n", "2,3\r\n"],  # a universal newline
-    ["1,2\n", "'2\n',3\n"],  # a quoted newline
-    ["1,2\n", "'2\r',3\n"],
-    ["1,2\n", "'2\r\n',3\n"],
-])
-def test_loadtxt_good_newline_in_iterator(data):
-    # The quoted newlines will be untransformed here, but are just whitespace.
-    res = np.loadtxt(data, delimiter=",", quotechar="'")
-    assert_array_equal(res, [[1., 2.], [2., 3.]])
-
-
-@pytest.mark.parametrize("newline", ["\n", "\r", "\r\n"])
-def test_unviersal_newlines_quoted(newline):
-    # Check that universal newline support within the tokenizer is not applied
-    # to quoted fields.  (note that lines must end in newline or quoted
-    # fields will not include a newline at all)
-    data = ['1,"2\n"\n', '3,"4\n', '1"\n']
-    data = [row.replace("\n", newline) for row in data]
-    res = np.loadtxt(data, dtype=object, delimiter=",", quotechar='"')
-    assert_array_equal(res, [['1', f'2{newline}'], ['3', f'4{newline}1']])
-
-
-def test_loadtxt_iterator_fails_getting_next_line():
-    class BadSequence:
-        def __len__(self):
-            return 100
-
-        def __getitem__(self, item):
-            if item == 50:
-                raise RuntimeError("Bad things happened!")
-            return f"{item}, {item+1}"
-
-    with pytest.raises(RuntimeError, match="Bad things happened!"):
-        np.loadtxt(BadSequence(), dtype=int, delimiter=",")
-
-
-class TestCReaderUnitTests:
-    # These are internal tests for path that should not be possible to hit
-    # unless things go very very wrong somewhere.
-    def test_not_an_filelike(self):
-        with pytest.raises(AttributeError, match=".*read"):
-            np.core._multiarray_umath._load_from_filelike(
-                object(), dtype=np.dtype("i"), filelike=True)
-
-    def test_filelike_read_fails(self):
-        # Can only be reached if loadtxt opens the file, so it is hard to do
-        # via the public interface (although maybe not impossible considering
-        # the current "DataClass" backing).
-        class BadFileLike:
-            counter = 0
-            def read(self, size):
-                self.counter += 1
-                if self.counter > 20:
-                    raise RuntimeError("Bad bad bad!")
-                return "1,2,3\n"
-
-        with pytest.raises(RuntimeError, match="Bad bad bad!"):
-            np.core._multiarray_umath._load_from_filelike(
-                BadFileLike(), dtype=np.dtype("i"), filelike=True)
-
-    def test_filelike_bad_read(self):
-        # Can only be reached if loadtxt opens the file, so it is hard to do
-        # via the public interface (although maybe not impossible considering
-        # the current "DataClass" backing).
-        class BadFileLike:
-            counter = 0
-            def read(self, size):
-                return 1234  # not a string!
-
-        with pytest.raises(TypeError,
-                    match="non-string returned while reading data"):
-            np.core._multiarray_umath._load_from_filelike(
-                BadFileLike(), dtype=np.dtype("i"), filelike=True)
-
-    def test_not_an_iter(self):
-        with pytest.raises(TypeError,
-                match="error reading from object, expected an iterable"):
-            np.core._multiarray_umath._load_from_filelike(
-                object(), dtype=np.dtype("i"), filelike=False)
-
-    def test_bad_type(self):
-        with pytest.raises(TypeError, match="internal error: dtype must"):
-            np.core._multiarray_umath._load_from_filelike(
-                object(), dtype="i", filelike=False)
-
-    def test_bad_encoding(self):
-        with pytest.raises(TypeError, match="encoding must be a unicode"):
-            np.core._multiarray_umath._load_from_filelike(
-                object(), dtype=np.dtype("i"), filelike=False, encoding=123)
-
-    @pytest.mark.parametrize("newline", ["\r", "\n", "\r\n"])
-    def test_manual_universal_newlines(self, newline):
-        # This is currently not available to users, because we should always
-        # open files with universal newlines enabled `newlines=None`.
-        # (And reading from an iterator uses slightly different code paths.)
-        # We have no real support for `newline="\r"` or `newline="\n" as the
-        # user cannot specify those options.
-        data = StringIO('0\n1\n"2\n"\n3\n4 #\n'.replace("\n", newline),
-                        newline="")
-
-        res = np.core._multiarray_umath._load_from_filelike(
-                data, dtype=np.dtype("U10"), filelike=True,
-                quote='"', comment="#", skiplines=1)
-        assert_array_equal(res[:, 0], ["1", f"2{newline}", "3", "4 "])
diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py
new file mode 100644
index 000000000..b8fd9a796
--- /dev/null
+++ b/numpy/lib/tests/test_loadtxt.py
@@ -0,0 +1,836 @@
+"""
+Tests specific to `np.loadtxt` added during the move of loadtxt to be backed
+by C code.
+These tests complement those found in `test_io.py`.
+"""
+
+import sys
+import pytest
+from tempfile import NamedTemporaryFile
+from io import StringIO
+
+import numpy as np
+from numpy.ma.testutils import assert_equal
+from numpy.testing import assert_array_equal,  HAS_REFCOUNT
+
+
+def test_scientific_notation():
+    """Test that both 'e' and 'E' are parsed correctly."""
+    data = StringIO(
+        (
+            "1.0e-1,2.0E1,3.0\n"
+            "4.0e-2,5.0E-1,6.0\n"
+            "7.0e-3,8.0E1,9.0\n"
+            "0.0e-4,1.0E-1,2.0"
+        )
+    )
+    expected = np.array(
+        [[0.1, 20., 3.0], [0.04, 0.5, 6], [0.007, 80., 9], [0, 0.1, 2]]
+    )
+    assert_array_equal(np.loadtxt(data, delimiter=","), expected)
+
+
+@pytest.mark.parametrize("comment", ["..", "//", "@-", "this is a comment:"])
+def test_comment_multiple_chars(comment):
+    content = "# IGNORE\n1.5, 2.5# ABC\n3.0,4.0# XXX\n5.5,6.0\n"
+    txt = StringIO(content.replace("#", comment))
+    a = np.loadtxt(txt, delimiter=",", comments=comment)
+    assert_equal(a, [[1.5, 2.5], [3.0, 4.0], [5.5, 6.0]])
+
+
+@pytest.fixture
+def mixed_types_structured():
+    """
+    Fixture providing hetergeneous input data with a structured dtype, along
+    with the associated structured array.
+    """
+    data = StringIO(
+        (
+            "1000;2.4;alpha;-34\n"
+            "2000;3.1;beta;29\n"
+            "3500;9.9;gamma;120\n"
+            "4090;8.1;delta;0\n"
+            "5001;4.4;epsilon;-99\n"
+            "6543;7.8;omega;-1\n"
+        )
+    )
+    dtype = np.dtype(
+        [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
+    )
+    expected = np.array(
+        [
+            (1000, 2.4, "alpha", -34),
+            (2000, 3.1, "beta", 29),
+            (3500, 9.9, "gamma", 120),
+            (4090, 8.1, "delta", 0),
+            (5001, 4.4, "epsilon", -99),
+            (6543, 7.8, "omega", -1)
+        ],
+        dtype=dtype
+    )
+    return data, dtype, expected
+
+
+@pytest.mark.parametrize('skiprows', [0, 1, 2, 3])
+def test_structured_dtype_and_skiprows_no_empty_lines(
+        skiprows, mixed_types_structured):
+    data, dtype, expected = mixed_types_structured
+    a = np.loadtxt(data, dtype=dtype, delimiter=";", skiprows=skiprows)
+    assert_array_equal(a, expected[skiprows:])
+
+
+def test_unpack_structured(mixed_types_structured):
+    data, dtype, expected = mixed_types_structured
+
+    a, b, c, d = np.loadtxt(data, dtype=dtype, delimiter=";", unpack=True)
+    assert_array_equal(a, expected["f0"])
+    assert_array_equal(b, expected["f1"])
+    assert_array_equal(c, expected["f2"])
+    assert_array_equal(d, expected["f3"])
+
+
+def test_structured_dtype_with_shape():
+    dtype = np.dtype([("a", "u1", 2), ("b", "u1", 2)])
+    data = StringIO("0,1,2,3\n6,7,8,9\n")
+    expected = np.array([((0, 1), (2, 3)), ((6, 7), (8, 9))], dtype=dtype)
+    assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dtype), expected)
+
+
+def test_structured_dtype_with_multi_shape():
+    dtype = np.dtype([("a", "u1", (2, 2))])
+    data = StringIO("0 1 2 3\n")
+    expected = np.array([(((0, 1), (2, 3)),)], dtype=dtype)
+    assert_array_equal(np.loadtxt(data, dtype=dtype), expected)
+
+
+def test_nested_structured_subarray():
+    # Test from gh-16678
+    point = np.dtype([('x', float), ('y', float)])
+    dt = np.dtype([('code', int), ('points', point, (2,))])
+    data = StringIO("100,1,2,3,4\n200,5,6,7,8\n")
+    expected = np.array(
+        [
+            (100, [(1., 2.), (3., 4.)]),
+            (200, [(5., 6.), (7., 8.)]),
+        ],
+        dtype=dt
+    )
+    assert_array_equal(np.loadtxt(data, dtype=dt, delimiter=","), expected)
+
+
+def test_structured_dtype_offsets():
+    # An aligned structured dtype will have additional padding
+    dt = np.dtype("i1, i4, i1, i4, i1, i4", align=True)
+    data = StringIO("1,2,3,4,5,6\n7,8,9,10,11,12\n")
+    expected = np.array([(1, 2, 3, 4, 5, 6), (7, 8, 9, 10, 11, 12)], dtype=dt)
+    assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dt), expected)
+
+
+@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
+def test_exception_negative_row_limits(param):
+    """skiprows and max_rows should raise for negative parameters."""
+    with pytest.raises(ValueError, match="argument must be nonnegative"):
+        np.loadtxt("foo.bar", **{param: -3})
+
+
+@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
+def test_exception_noninteger_row_limits(param):
+    with pytest.raises(TypeError, match="argument must be an integer"):
+        np.loadtxt("foo.bar", **{param: 1.0})
+
+
+@pytest.mark.parametrize(
+    "data, shape",
+    [
+        ("1 2 3 4 5\n", (1, 5)),  # Single row
+        ("1\n2\n3\n4\n5\n", (5, 1)),  # Single column
+    ]
+)
+def test_ndmin_single_row_or_col(data, shape):
+    arr = np.array([1, 2, 3, 4, 5])
+    arr2d = arr.reshape(shape)
+
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int), arr)
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int, ndmin=0), arr)
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int, ndmin=1), arr)
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int, ndmin=2), arr2d)
+
+
+@pytest.mark.parametrize("badval", [-1, 3, None, "plate of shrimp"])
+def test_bad_ndmin(badval):
+    with pytest.raises(ValueError, match="Illegal value of ndmin keyword"):
+        np.loadtxt("foo.bar", ndmin=badval)
+
+
+@pytest.mark.parametrize(
+    "ws",
+    (
+            "\t",  # tab
+            "\u2003",  # em
+            "\u00A0",  # non-break
+            "\u3000",  # ideographic space
+    )
+)
+def test_blank_lines_spaces_delimit(ws):
+    txt = StringIO(
+        f"1 2{ws}30\n\n4 5 60\n  {ws}  \n7 8 {ws} 90\n  # comment\n3 2 1"
+    )
+    # NOTE: It is unclear that the `  # comment` should succeed. Except
+    #       for delimiter=None, which should use any whitespace (and maybe
+    #       should just be implemented closer to Python
+    expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
+    assert_equal(
+        np.loadtxt(txt, dtype=int, delimiter=None, comments="#"), expected
+    )
+
+
+def test_blank_lines_normal_delimiter():
+    txt = StringIO('1,2,30\n\n4,5,60\n\n7,8,90\n# comment\n3,2,1')
+    expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
+    assert_equal(
+        np.loadtxt(txt, dtype=int, delimiter=',', comments="#"), expected
+    )
+
+
+@pytest.mark.parametrize("dtype", (float, object))
+def test_maxrows_no_blank_lines(dtype):
+    txt = StringIO("1.5,2.5\n3.0,4.0\n5.5,6.0")
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", max_rows=2)
+    assert_equal(res.dtype, dtype)
+    assert_equal(res, np.array([["1.5", "2.5"], ["3.0", "4.0"]], dtype=dtype))
+
+
+@pytest.mark.parametrize("dtype", (np.dtype("f8"), np.dtype("i2")))
+def test_exception_message_bad_values(dtype):
+    txt = StringIO("1,2\n3,XXX\n5,6")
+    msg = f"could not convert string 'XXX' to {dtype} at row 1, column 2"
+    with pytest.raises(ValueError, match=msg):
+        np.loadtxt(txt, dtype=dtype, delimiter=",")
+
+
+def test_converters_negative_indices():
+    txt = StringIO('1.5,2.5\n3.0,XXX\n5.5,6.0')
+    conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
+    expected = np.array([[1.5, 2.5], [3.0, np.nan], [5.5, 6.0]])
+    res = np.loadtxt(
+        txt, dtype=np.float64, delimiter=",", converters=conv, encoding=None
+    )
+    assert_equal(res, expected)
+
+
+def test_converters_negative_indices_with_usecols():
+    txt = StringIO('1.5,2.5,3.5\n3.0,4.0,XXX\n5.5,6.0,7.5\n')
+    conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
+    expected = np.array([[1.5, 3.5], [3.0, np.nan], [5.5, 7.5]])
+    res = np.loadtxt(
+        txt,
+        dtype=np.float64,
+        delimiter=",",
+        converters=conv,
+        usecols=[0, -1],
+        encoding=None,
+    )
+    assert_equal(res, expected)
+
+
+def test_ragged_usecols():
+    # usecols, and negative ones, work even with varying number of columns.
+    txt = StringIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
+    expected = np.array([[0, 0], [0, 0], [0, 0]])
+    res = np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
+    assert_equal(res, expected)
+
+    txt = StringIO("0,0,XXX\n0\n0,XXX,XXX,0,XXX\n")
+    with pytest.raises(ValueError,
+                match="invalid column index -2 at row 1 with 2 columns"):
+        # There is no -2 column in the second row:
+        np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
+
+
+def test_empty_usecols():
+    txt = StringIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
+    res = np.loadtxt(txt, dtype=np.dtype([]), delimiter=",", usecols=[])
+    assert res.shape == (3,)
+    assert res.dtype == np.dtype([])
+
+
+@pytest.mark.parametrize("c1", ["a", "の", "🫕"])
+@pytest.mark.parametrize("c2", ["a", "の", "🫕"])
+def test_large_unicode_characters(c1, c2):
+    # c1 and c2 span ascii, 16bit and 32bit range.
+    txt = StringIO(f"a,{c1},c,1.0\ne,{c2},2.0,g")
+    res = np.loadtxt(txt, dtype=np.dtype('U12'), delimiter=",")
+    expected = np.array(
+        [f"a,{c1},c,1.0".split(","), f"e,{c2},2.0,g".split(",")],
+        dtype=np.dtype('U12')
+    )
+    assert_equal(res, expected)
+
+
+def test_unicode_with_converter():
+    txt = StringIO("cat,dog\nαβγ,δεζ\nabc,def\n")
+    conv = {0: lambda s: s.upper()}
+    res = np.loadtxt(
+        txt,
+        dtype=np.dtype("U12"),
+        converters=conv,
+        delimiter=",",
+        encoding=None
+    )
+    expected = np.array([['CAT', 'dog'], ['ΑΒΓ', 'δεζ'], ['ABC', 'def']])
+    assert_equal(res, expected)
+
+
+def test_converter_with_structured_dtype():
+    txt = StringIO('1.5,2.5,Abc\n3.0,4.0,dEf\n5.5,6.0,ghI\n')
+    dt = np.dtype([('m', np.int32), ('r', np.float32), ('code', 'U8')])
+    conv = {0: lambda s: int(10*float(s)), -1: lambda s: s.upper()}
+    res = np.loadtxt(txt, dtype=dt, delimiter=",", converters=conv)
+    expected = np.array(
+        [(15, 2.5, 'ABC'), (30, 4.0, 'DEF'), (55, 6.0, 'GHI')], dtype=dt
+    )
+    assert_equal(res, expected)
+
+
+def test_converter_with_unicode_dtype():
+    """
+    With the default 'bytes' encoding, tokens are encoded prior to being passed
+    to the converter. This means that the output of the converter may be bytes
+    instead of unicode as expected by `read_rows`.
+
+    This test checks that outputs from the above scenario are properly decoded
+    prior to parsing by `read_rows`.
+    """
+    txt = StringIO('abc,def\nrst,xyz')
+    conv = bytes.upper
+    res = np.loadtxt(txt, dtype=np.dtype("U3"), converters=conv, delimiter=",")
+    expected = np.array([['ABC', 'DEF'], ['RST', 'XYZ']])
+    assert_equal(res, expected)
+
+
+def test_read_huge_row():
+    row = "1.5, 2.5," * 50000
+    row = row[:-1] + "\n"
+    txt = StringIO(row * 2)
+    res = np.loadtxt(txt, delimiter=",", dtype=float)
+    assert_equal(res, np.tile([1.5, 2.5], (2, 50000)))
+
+
+@pytest.mark.parametrize("dtype", "edfgFDG")
+def test_huge_float(dtype):
+    # Covers a non-optimized path that is rarely taken:
+    field = "0" * 1000 + ".123456789"
+    dtype = np.dtype(dtype)
+    value = np.loadtxt([field], dtype=dtype)[()]
+    assert value == dtype.type("0.123456789")
+
+
+@pytest.mark.parametrize(
+    ("given_dtype", "expected_dtype"),
+    [
+        ("S", np.dtype("S5")),
+        ("U", np.dtype("U5")),
+    ],
+)
+def test_string_no_length_given(given_dtype, expected_dtype):
+    """
+    The given dtype is just 'S' or 'U' with no length. In these cases, the
+    length of the resulting dtype is determined by the longest string found
+    in the file.
+    """
+    txt = StringIO("AAA,5-1\nBBBBB,0-3\nC,4-9\n")
+    res = np.loadtxt(txt, dtype=given_dtype, delimiter=",")
+    expected = np.array(
+        [['AAA', '5-1'], ['BBBBB', '0-3'], ['C', '4-9']], dtype=expected_dtype
+    )
+    assert_equal(res, expected)
+    assert_equal(res.dtype, expected_dtype)
+
+
+def test_float_conversion():
+    """
+    Some tests that the conversion to float64 works as accurately as the Python
+    built-in `float` function. In a naive version of the float parser, these
+    strings resulted in values that were off by an ULP or two.
+    """
+    strings = [
+        '0.9999999999999999',
+        '9876543210.123456',
+        '5.43215432154321e+300',
+        '0.901',
+        '0.333',
+    ]
+    txt = StringIO('\n'.join(strings))
+    res = np.loadtxt(txt)
+    expected = np.array([float(s) for s in strings])
+    assert_equal(res, expected)
+
+
+def test_bool():
+    # Simple test for bool via integer
+    txt = StringIO("1, 0\n10, -1")
+    res = np.loadtxt(txt, dtype=bool, delimiter=",")
+    assert res.dtype == bool
+    assert_array_equal(res, [[True, False], [True, True]])
+    # Make sure we use only 1 and 0 on the byte level:
+    assert_array_equal(res.view(np.uint8), [[1, 0], [1, 1]])
+
+
+@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
+def test_integer_signs(dtype):
+    dtype = np.dtype(dtype)
+    assert np.loadtxt(["+2"], dtype=dtype) == 2
+    if dtype.kind == "u":
+        with pytest.raises(ValueError):
+            np.loadtxt(["-1\n"], dtype=dtype)
+    else:
+        assert np.loadtxt(["-2\n"], dtype=dtype) == -2
+
+    for sign in ["++", "+-", "--", "-+"]:
+        with pytest.raises(ValueError):
+            np.loadtxt([f"{sign}2\n"], dtype=dtype)
+
+
+@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
+def test_implicit_cast_float_to_int_fails(dtype):
+    txt = StringIO("1.0, 2.1, 3.7\n4, 5, 6")
+    with pytest.raises(ValueError):
+        np.loadtxt(txt, dtype=dtype, delimiter=",")
+
+@pytest.mark.parametrize("dtype", (np.complex64, np.complex128))
+@pytest.mark.parametrize("with_parens", (False, True))
+def test_complex_parsing(dtype, with_parens):
+    s = "(1.0-2.5j),3.75,(7+-5.0j)\n(4),(-19e2j),(0)"
+    if not with_parens:
+        s = s.replace("(", "").replace(")", "")
+
+    res = np.loadtxt(StringIO(s), dtype=dtype, delimiter=",")
+    expected = np.array(
+        [[1.0-2.5j, 3.75, 7-5j], [4.0, -1900j, 0]], dtype=dtype
+    )
+    assert_equal(res, expected)
+
+
+def test_read_from_generator():
+    def gen():
+        for i in range(4):
+            yield f"{i},{2*i},{i**2}"
+
+    res = np.loadtxt(gen(), dtype=int, delimiter=",")
+    expected = np.array([[0, 0, 0], [1, 2, 1], [2, 4, 4], [3, 6, 9]])
+    assert_equal(res, expected)
+
+
+def test_read_from_generator_multitype():
+    def gen():
+        for i in range(3):
+            yield f"{i} {i / 4}"
+
+    res = np.loadtxt(gen(), dtype="i, d", delimiter=" ")
+    expected = np.array([(0, 0.0), (1, 0.25), (2, 0.5)], dtype="i, d")
+    assert_equal(res, expected)
+
+
+def test_read_from_bad_generator():
+    def gen():
+        for entry in ["1,2", b"3, 5", 12738]:
+            yield entry
+
+    with pytest.raises(
+            TypeError, match=r"non-string returned while reading data"
+    ):
+        np.loadtxt(gen(), dtype="i, i", delimiter=",")
+
+
+@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
+def test_object_cleanup_on_read_error():
+    sentinel = object()
+
+    already_read = 0
+    def conv(x):
+        nonlocal already_read
+        if already_read > 4999:
+            raise ValueError("failed half-way through!")
+        already_read += 1
+        return sentinel
+
+    txt = StringIO("x\n" * 10000)
+
+    with pytest.raises(ValueError, match="at row 5000, column 1"):
+        np.loadtxt(txt, dtype=object, converters={0: conv})
+
+    assert sys.getrefcount(sentinel) == 2
+
+
+def test_character_not_bytes_compatible():
+    """Test exception when a character cannot be encoded as 'S'."""
+    data = StringIO("–")  # == \u2013
+    with pytest.raises(ValueError):
+        np.loadtxt(data, dtype="S5")
+
+
+@pytest.mark.parametrize("conv", (0, [float], ""))
+def test_invalid_converter(conv):
+    msg = (
+        "converters must be a dictionary mapping columns to converter "
+        "functions or a single callable."
+    )
+    with pytest.raises(TypeError, match=msg):
+        np.loadtxt(StringIO("1 2\n3 4"), converters=conv)
+
+
+def test_converters_dict_raises_non_integer_key():
+    with pytest.raises(TypeError, match="keys of the converters dict"):
+        np.loadtxt(StringIO("1 2\n3 4"), converters={"a": int})
+    with pytest.raises(TypeError, match="keys of the converters dict"):
+        np.loadtxt(StringIO("1 2\n3 4"), converters={"a": int}, usecols=0)
+
+
+@pytest.mark.parametrize("bad_col_ind", (3, -3))
+def test_converters_dict_raises_non_col_key(bad_col_ind):
+    data = StringIO("1 2\n3 4")
+    with pytest.raises(ValueError, match="converter specified for column"):
+        np.loadtxt(data, converters={bad_col_ind: int})
+
+
+def test_converters_dict_raises_val_not_callable():
+    with pytest.raises(
+            TypeError, match="values of the converters dictionary must be callable"
+    ):
+        np.loadtxt(StringIO("1 2\n3 4"), converters={0: 1})
+
+
+@pytest.mark.parametrize("q", ('"', "'", "`"))
+def test_quoted_field(q):
+    txt = StringIO(
+        f"{q}alpha, x{q}, 2.5\n{q}beta, y{q}, 4.5\n{q}gamma, z{q}, 5.0\n"
+    )
+    dtype = np.dtype([('f0', 'U8'), ('f1', np.float64)])
+    expected = np.array(
+        [("alpha, x", 2.5), ("beta, y", 4.5), ("gamma, z", 5.0)], dtype=dtype
+    )
+
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar=q)
+    assert_array_equal(res, expected)
+
+
+def test_quote_support_default():
+    """Support for quoted fields is disabled by default."""
+    txt = StringIO('"lat,long", 45, 30\n')
+    dtype = np.dtype([('f0', 'U24'), ('f1', np.float64), ('f2', np.float64)])
+
+    with pytest.raises(ValueError, match="the number of columns changed"):
+        np.loadtxt(txt, dtype=dtype, delimiter=",")
+
+    # Enable quoting support with non-None value for quotechar param
+    txt.seek(0)
+    expected = np.array([("lat,long", 45., 30.)], dtype=dtype)
+
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar='"')
+    assert_array_equal(res, expected)
+
+
+def test_quotechar_multichar_error():
+    txt = StringIO("1,2\n3,4")
+    msg = r".*must be a single unicode character or None"
+    with pytest.raises(TypeError, match=msg):
+        np.loadtxt(txt, delimiter=",", quotechar="''")
+
+
+def test_comment_multichar_error_with_quote():
+    txt = StringIO("1,2\n3,4")
+    msg = (
+        "when multiple comments or a multi-character comment is given, "
+        "quotes are not supported."
+    )
+    with pytest.raises(ValueError, match=msg):
+        np.loadtxt(txt, delimiter=",", comments="123", quotechar='"')
+    with pytest.raises(ValueError, match=msg):
+        np.loadtxt(txt, delimiter=",", comments=["#", "%"], quotechar='"')
+
+    # A single character string in a tuple is unpacked though:
+    res = np.loadtxt(txt, delimiter=",", comments=("#",), quotechar="'")
+    assert_equal(res, [[1, 2], [3, 4]])
+
+
+def test_structured_dtype_with_quotes():
+    data = StringIO(
+        (
+            "1000;2.4;'alpha';-34\n"
+            "2000;3.1;'beta';29\n"
+            "3500;9.9;'gamma';120\n"
+            "4090;8.1;'delta';0\n"
+            "5001;4.4;'epsilon';-99\n"
+            "6543;7.8;'omega';-1\n"
+        )
+    )
+    dtype = np.dtype(
+        [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
+    )
+    expected = np.array(
+        [
+            (1000, 2.4, "alpha", -34),
+            (2000, 3.1, "beta", 29),
+            (3500, 9.9, "gamma", 120),
+            (4090, 8.1, "delta", 0),
+            (5001, 4.4, "epsilon", -99),
+            (6543, 7.8, "omega", -1)
+        ],
+        dtype=dtype
+    )
+    res = np.loadtxt(data, dtype=dtype, delimiter=";", quotechar="'")
+    assert_array_equal(res, expected)
+
+
+def test_quoted_field_is_not_empty():
+    txt = StringIO('1\n\n"4"\n""')
+    expected = np.array(["1", "4", ""], dtype="U1")
+    res = np.loadtxt(txt, delimiter=",", dtype="U1", quotechar='"')
+    assert_equal(res, expected)
+
+def test_quoted_field_is_not_empty_nonstrict():
+    # Same as test_quoted_field_is_not_empty but check that we are not strict
+    # about missing closing quote (this is the `csv.reader` default also)
+    txt = StringIO('1\n\n"4"\n"')
+    expected = np.array(["1", "4", ""], dtype="U1")
+    res = np.loadtxt(txt, delimiter=",", dtype="U1", quotechar='"')
+    assert_equal(res, expected)
+
+def test_consecutive_quotechar_escaped():
+    txt = StringIO('"Hello, my name is ""Monty""!"')
+    expected = np.array('Hello, my name is "Monty"!', dtype="U40")
+    res = np.loadtxt(txt, dtype="U40", delimiter=",", quotechar='"')
+    assert_equal(res, expected)
+
+
+@pytest.mark.parametrize("data", ("", "\n\n\n", "# 1 2 3\n# 4 5 6\n"))
+@pytest.mark.parametrize("ndmin", (0, 1, 2))
+@pytest.mark.parametrize("usecols", [None, (1, 2, 3)])
+def test_warn_on_no_data(data, ndmin, usecols):
+    """Check that a UserWarning is emitted when no data is read from input."""
+    if usecols is not None:
+        expected_shape = (0, 3)
+    elif ndmin == 2:
+        expected_shape = (0, 1)  # guess a single column?!
+    else:
+        expected_shape = (0,)
+
+    txt = StringIO(data)
+    with pytest.warns(UserWarning, match="input contained no data"):
+        res = np.loadtxt(txt, ndmin=ndmin, usecols=usecols)
+    assert res.shape == expected_shape
+
+    with NamedTemporaryFile(mode="w") as fh:
+        fh.write(data)
+        fh.seek(0)
+        with pytest.warns(UserWarning, match="input contained no data"):
+            res = np.loadtxt(txt, ndmin=ndmin, usecols=usecols)
+        assert res.shape == expected_shape
+
+@pytest.mark.parametrize("skiprows", (2, 3))
+def test_warn_on_skipped_data(skiprows):
+    data = "1 2 3\n4 5 6"
+    txt = StringIO(data)
+    with pytest.warns(UserWarning, match="input contained no data"):
+        np.loadtxt(txt, skiprows=skiprows)
+
+
+@pytest.mark.parametrize("dtype",
+        list(np.typecodes["AllInteger"] + np.typecodes["AllFloat"]) + ["U2"])
+@pytest.mark.parametrize("swap", [True, False])
+def test_byteswapping_and_unaligned(dtype, swap):
+    data = ["x,1\n"]  # no need for complicated data
+    dtype = np.dtype(dtype)
+    if swap:
+        dtype = dtype.newbyteorder()
+    full_dt = np.dtype([("a", "S1"), ("b", dtype)], align=False)
+    # The above ensures that the interesting "b" field is unaligned:
+    assert full_dt.fields["b"][1] == 1
+    res = np.loadtxt(data, dtype=full_dt, delimiter=",")
+    assert res["b"] == dtype.type(1)
+
+
+@pytest.mark.parametrize("dtype",
+        np.typecodes["AllInteger"] + "efdFD" + "?")
+def test_unicode_whitespace_stripping(dtype):
+    # Test that all numeric types (and bool) strip whitespace correctly
+    # \u202F is a narrow no-break space, `\n` is just a whitespace if quoted.
+    # Currently, skip float128 as it did not always support this and has no
+    # "custom" parsing:
+    txt = StringIO(' 3 ,"\u202F2\n"')
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar='"')
+    assert_array_equal(res, np.array([3, 2]).astype(dtype))
+
+
+@pytest.mark.parametrize("dtype", "FD")
+def test_unicode_whitespace_stripping_complex(dtype):
+    # Complex has a few extra cases since it has two components and parentheses
+    line = " 1 , 2+3j , ( 4+5j ), ( 6+-7j )  , 8j , ( 9j ) \n"
+    data = [line, line.replace(" ", "\u202F")]
+    res = np.loadtxt(data, dtype=dtype, delimiter=',')
+    assert_array_equal(res, np.array([[1, 2+3j, 4+5j, 6-7j, 8j, 9j]] * 2))
+
+
+@pytest.mark.parametrize("dtype", "FD")
+@pytest.mark.parametrize("field",
+        ["1 +2j", "1+ 2j", "1+2 j", "1+-+3", "(1j", "(1", "(1+2j", "1+2j)"])
+def test_bad_complex(dtype, field):
+    with pytest.raises(ValueError):
+        np.loadtxt([field + "\n"], dtype=dtype, delimiter=",")
+
+
+@pytest.mark.parametrize("dtype",
+            np.typecodes["AllInteger"] + "efgdFDG" + "?")
+def test_nul_character_error(dtype):
+    # Test that a \0 character is correctly recognized as an error even if
+    # what comes before is valid (not everything gets parsed internally).
+    if dtype.lower() == "g":
+        pytest.xfail("longdouble/clongdouble assignment may misbehave.")
+    with pytest.raises(ValueError):
+        np.loadtxt(["1\000"], dtype=dtype, delimiter=",", quotechar='"')
+
+
+@pytest.mark.parametrize("dtype",
+        np.typecodes["AllInteger"] + "efgdFDG" + "?")
+def test_no_thousands_support(dtype):
+    # Mainly to document behaviour, Python supports thousands like 1_1.
+    # (e and G may end up using different conversion and support it, this is
+    # a bug but happens...)
+    if dtype == "e":
+        pytest.skip("half assignment currently uses Python float converter")
+    if dtype in "eG":
+        pytest.xfail("clongdouble assignment is buggy (uses `complex` always).")
+
+    assert int("1_1") == float("1_1") == complex("1_1") == 11
+    with pytest.raises(ValueError):
+        np.loadtxt(["1_1\n"], dtype=dtype)
+
+
+@pytest.mark.parametrize("data", [
+    ["1,2\n", "2\n,3\n"],
+    ["1,2\n", "2\r,3\n"]])
+def test_bad_newline_in_iterator(data):
+    # In NumPy <=1.22 this was accepted, because newlines were completely
+    # ignored when the input was an iterable.  This could be changed, but right
+    # now, we raise an error.
+    with pytest.raises(ValueError,
+                       match="Found an unquoted embedded newline within a single line"):
+        np.loadtxt(data, delimiter=",")
+
+
+@pytest.mark.parametrize("data", [
+    ["1,2\n", "2,3\r\n"],  # a universal newline
+    ["1,2\n", "'2\n',3\n"],  # a quoted newline
+    ["1,2\n", "'2\r',3\n"],
+    ["1,2\n", "'2\r\n',3\n"],
+])
+def test_good_newline_in_iterator(data):
+    # The quoted newlines will be untransformed here, but are just whitespace.
+    res = np.loadtxt(data, delimiter=",", quotechar="'")
+    assert_array_equal(res, [[1., 2.], [2., 3.]])
+
+
+@pytest.mark.parametrize("newline", ["\n", "\r", "\r\n"])
+def test_universal_newlines_quoted(newline):
+    # Check that universal newline support within the tokenizer is not applied
+    # to quoted fields.  (note that lines must end in newline or quoted
+    # fields will not include a newline at all)
+    data = ['1,"2\n"\n', '3,"4\n', '1"\n']
+    data = [row.replace("\n", newline) for row in data]
+    res = np.loadtxt(data, dtype=object, delimiter=",", quotechar='"')
+    assert_array_equal(res, [['1', f'2{newline}'], ['3', f'4{newline}1']])
+
+
+def test_null_character():
+    # Basic tests to check that the NUL character is not special:
+    res = np.loadtxt(["1\0002\0003\n", "4\0005\0006"], delimiter="\000")
+    assert_array_equal(res, [[1, 2, 3], [4, 5, 6]])
+
+    # Also not as part of a field (avoid unicode/arrays as unicode strips \0)
+    res = np.loadtxt(["1\000,2\000,3\n", "4\000,5\000,6"],
+                     delimiter=",", dtype=object)
+    assert res.tolist() == [["1\000", "2\000", "3"], ["4\000", "5\000", "6"]]
+
+
+def test_iterator_fails_getting_next_line():
+    class BadSequence:
+        def __len__(self):
+            return 100
+
+        def __getitem__(self, item):
+            if item == 50:
+                raise RuntimeError("Bad things happened!")
+            return f"{item}, {item+1}"
+
+    with pytest.raises(RuntimeError, match="Bad things happened!"):
+        np.loadtxt(BadSequence(), dtype=int, delimiter=",")
+
+
+class TestCReaderUnitTests:
+    # These are internal tests for path that should not be possible to hit
+    # unless things go very very wrong somewhere.
+    def test_not_an_filelike(self):
+        with pytest.raises(AttributeError, match=".*read"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype=np.dtype("i"), filelike=True)
+
+    def test_filelike_read_fails(self):
+        # Can only be reached if loadtxt opens the file, so it is hard to do
+        # via the public interface (although maybe not impossible considering
+        # the current "DataClass" backing).
+        class BadFileLike:
+            counter = 0
+            def read(self, size):
+                self.counter += 1
+                if self.counter > 20:
+                    raise RuntimeError("Bad bad bad!")
+                return "1,2,3\n"
+
+        with pytest.raises(RuntimeError, match="Bad bad bad!"):
+            np.core._multiarray_umath._load_from_filelike(
+                BadFileLike(), dtype=np.dtype("i"), filelike=True)
+
+    def test_filelike_bad_read(self):
+        # Can only be reached if loadtxt opens the file, so it is hard to do
+        # via the public interface (although maybe not impossible considering
+        # the current "DataClass" backing).
+        class BadFileLike:
+            counter = 0
+            def read(self, size):
+                return 1234  # not a string!
+
+        with pytest.raises(TypeError,
+                           match="non-string returned while reading data"):
+            np.core._multiarray_umath._load_from_filelike(
+                BadFileLike(), dtype=np.dtype("i"), filelike=True)
+
+    def test_not_an_iter(self):
+        with pytest.raises(TypeError,
+                           match="error reading from object, expected an iterable"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype=np.dtype("i"), filelike=False)
+
+    def test_bad_type(self):
+        with pytest.raises(TypeError, match="internal error: dtype must"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype="i", filelike=False)
+
+    def test_bad_encoding(self):
+        with pytest.raises(TypeError, match="encoding must be a unicode"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype=np.dtype("i"), filelike=False, encoding=123)
+
+    @pytest.mark.parametrize("newline", ["\r", "\n", "\r\n"])
+    def test_manual_universal_newlines(self, newline):
+        # This is currently not available to users, because we should always
+        # open files with universal newlines enabled `newlines=None`.
+        # (And reading from an iterator uses slightly different code paths.)
+        # We have no real support for `newline="\r"` or `newline="\n" as the
+        # user cannot specify those options.
+        data = StringIO('0\n1\n"2\n"\n3\n4 #\n'.replace("\n", newline),
+                        newline="")
+
+        res = np.core._multiarray_umath._load_from_filelike(
+            data, dtype=np.dtype("U10"), filelike=True,
+            quote='"', comment="#", skiplines=1)
+        assert_array_equal(res[:, 0], ["1", f"2{newline}", "3", "4 "])
author	Sebastian Berg <sebastian@sipsolutions.net>	2022-01-14 19:04:16 -0600
committer	Sebastian Berg <sebastian@sipsolutions.net>	2022-01-14 20:07:07 -0600
commit	90c71f0a8a84d9f17243e28e01527b5fd1ecdbb9 (patch)
tree	311c223111667426615e083d08618f2bf91985e2 /numpy/lib/tests
parent	0cb6bdcf2a28e8a3a74a302d0807cd054a15925f (diff)
download	numpy-90c71f0a8a84d9f17243e28e01527b5fd1ecdbb9.tar.gz