Move loadtxt bytes/str detection much earlier.

author: Antony Lee <anntzer.lee@gmail.com> 2021-08-06 20:15:37 +0200
committer: Antony Lee <anntzer.lee@gmail.com> 2021-08-06 20:15:37 +0200
commit: 3affc1738f656445493b8976a489f562a97cfb0c (patch)
tree: 9988922f82b0901fc629cbbf684dd055e6854d0a /numpy/lib/npyio.py
parent: 6b681d938df505c9a669a1892186b378085b7ee8 (diff)
download: numpy-3affc1738f656445493b8976a489f562a97cfb0c.tar.gz
1 files changed, 21 insertions, 14 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 98b4eafe2..032e829b0 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1073,11 +1073,24 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         if _is_string_like(fname):
             fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
             fencoding = getattr(fh, 'encoding', 'latin1')
-            fh = iter(fh)
+            line_iter = iter(fh)
             fown = True
         else:
-            fh = iter(fname)
+            line_iter = iter(fname)
             fencoding = getattr(fname, 'encoding', 'latin1')
+            try:
+                first_line = next(line_iter)
+            except StopIteration:
+                pass  # Nothing matters if line_iter is empty.
+            else:
+                # Put first_line back.
+                line_iter = itertools.chain([first_line], line_iter)
+                if isinstance(first_line, bytes):
+                    # Using latin1 matches _decode_line's behavior.
+                    decoder = methodcaller(
+                        "decode",
+                        encoding if encoding is not None else "latin1")
+                    line_iter = map(decoder, line_iter)
     except TypeError as e:
         raise ValueError(
             f"fname must be a string, filehandle, list of strings,\n"
@@ -1096,28 +1109,22 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     try:
         # Skip the first `skiprows` lines
         for i in range(skiprows):
-            next(fh)
+            next(line_iter)
 
         # Read until we find a line with some values, and use it to determine
         # the need for decoding and estimate the number of columns.
-        for first_line in fh:
-            ncols = len(usecols
-                        or split_line(_decode_line(first_line, encoding)))
+        for first_line in line_iter:
+            ncols = len(usecols or split_line(first_line))
             if ncols:
+                # Put first_line back.
+                line_iter = itertools.chain([first_line], line_iter)
                 break
         else:  # End of lines reached
-            first_line = ''
             ncols = len(usecols or [])
             warnings.warn('loadtxt: Empty input file: "%s"' % fname,
                           stacklevel=2)
 
-        line_iter = itertools.islice(
-            itertools.chain([first_line], fh), max_rows)
-        if isinstance(first_line, bytes):
-            decoder = methodcaller(  # latin1 matches _decode_line's behavior.
-                "decode", encoding if encoding is not None else "latin1")
-            line_iter = map(decoder, line_iter)
-
+        line_iter = itertools.islice(line_iter, max_rows)
         lineno_words_iter = filter(
             itemgetter(1),  # item[1] is words; filter skips empty lines.
             enumerate(map(split_line, line_iter), 1 + skiprows))
author	Antony Lee <anntzer.lee@gmail.com>	2021-08-06 20:15:37 +0200
committer	Antony Lee <anntzer.lee@gmail.com>	2021-08-06 20:15:37 +0200
commit	3affc1738f656445493b8976a489f562a97cfb0c (patch)
tree	9988922f82b0901fc629cbbf684dd055e6854d0a /numpy/lib/npyio.py
parent	6b681d938df505c9a669a1892186b378085b7ee8 (diff)
download	numpy-3affc1738f656445493b8976a489f562a97cfb0c.tar.gz