Merge pull request #16633 from WarrenWeckesser/maint-loadtxt

MAINT: lib: Some code clean up in loadtxt
author: Matti Picus <matti.picus@gmail.com> 2020-06-18 17:06:03 +0300
committer: GitHub <noreply@github.com> 2020-06-18 17:06:03 +0300
commit: b3f7132e0bf378eaf21af9194f447036b66c70b9 (patch)
tree: 660da8be5e87f2308f1c594c03b056ed0a95e8aa
parent: cdf83879fd0d050cade0fe7852550fcbcc177648 (diff)
parent: 431f0ef3800a5be5df24c3adeb84251ff144faf4 (diff)
download: numpy-b3f7132e0bf378eaf21af9194f447036b66c70b9.tar.gz
1 files changed, 87 insertions, 71 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index f5a548433..6d6222d3e 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -784,6 +784,7 @@ def _getconv(dtype):
     else:
         return asstr
 
+
 # amount of lines loadtxt reads in one chunk, can be overridden for testing
 _loadtxt_chunksize = 50000
 
@@ -914,68 +915,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
            [ 19.22,  64.31],
            [-17.57,  63.94]])
     """
-    # Type conversions for Py3 convenience
-    if comments is not None:
-        if isinstance(comments, (str, bytes)):
-            comments = [comments]
-        comments = [_decode_line(x) for x in comments]
-        # Compile regex for comments beforehand
-        comments = (re.escape(comment) for comment in comments)
-        regex_comments = re.compile('|'.join(comments))
-
-    if delimiter is not None:
-        delimiter = _decode_line(delimiter)
-
-    user_converters = converters
-
-    if encoding == 'bytes':
-        encoding = None
-        byte_converters = True
-    else:
-        byte_converters = False
-
-    if usecols is not None:
-        # Allow usecols to be a single int or a sequence of ints
-        try:
-            usecols_as_list = list(usecols)
-        except TypeError:
-            usecols_as_list = [usecols]
-        for col_idx in usecols_as_list:
-            try:
-                opindex(col_idx)
-            except TypeError as e:
-                e.args = (
-                    "usecols must be an int or a sequence of ints but "
-                    "it contains at least one element of type %s" %
-                    type(col_idx),
-                    )
-                raise
-        # Fall back to existing code
-        usecols = usecols_as_list
-
-    fown = False
-    try:
-        if isinstance(fname, os_PathLike):
-            fname = os_fspath(fname)
-        if _is_string_like(fname):
-            fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
-            fencoding = getattr(fh, 'encoding', 'latin1')
-            fh = iter(fh)
-            fown = True
-        else:
-            fh = iter(fname)
-            fencoding = getattr(fname, 'encoding', 'latin1')
-    except TypeError:
-        raise ValueError('fname must be a string, file handle, or generator')
 
-    # input may be a python2 io stream
-    if encoding is not None:
-        fencoding = encoding
-    # we must assume local encoding
-    # TODO emit portability warning?
-    elif fencoding is None:
-        import locale
-        fencoding = locale.getpreferredencoding()
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # Nested functions used by loadtxt.
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
     # not to be confused with the flatten_dtype we import...
     @recursive
@@ -1075,11 +1018,84 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         if X:
             yield X
 
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # Main body of loadtxt.
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    # Check correctness of the values of `ndmin`
+    if ndmin not in [0, 1, 2]:
+        raise ValueError('Illegal value of ndmin keyword: %s' % ndmin)
+
+    # Type conversions for Py3 convenience
+    if comments is not None:
+        if isinstance(comments, (str, bytes)):
+            comments = [comments]
+        comments = [_decode_line(x) for x in comments]
+        # Compile regex for comments beforehand
+        comments = (re.escape(comment) for comment in comments)
+        regex_comments = re.compile('|'.join(comments))
+
+    if delimiter is not None:
+        delimiter = _decode_line(delimiter)
+
+    user_converters = converters
+
+    if encoding == 'bytes':
+        encoding = None
+        byte_converters = True
+    else:
+        byte_converters = False
+
+    if usecols is not None:
+        # Allow usecols to be a single int or a sequence of ints
+        try:
+            usecols_as_list = list(usecols)
+        except TypeError:
+            usecols_as_list = [usecols]
+        for col_idx in usecols_as_list:
+            try:
+                opindex(col_idx)
+            except TypeError as e:
+                e.args = (
+                    "usecols must be an int or a sequence of ints but "
+                    "it contains at least one element of type %s" %
+                    type(col_idx),
+                    )
+                raise
+        # Fall back to existing code
+        usecols = usecols_as_list
+
+    # Make sure we're dealing with a proper dtype
+    dtype = np.dtype(dtype)
+    defconv = _getconv(dtype)
+
+    dtype_types, packing = flatten_dtype_internal(dtype)
+
+    fown = False
     try:
-        # Make sure we're dealing with a proper dtype
-        dtype = np.dtype(dtype)
-        defconv = _getconv(dtype)
+        if isinstance(fname, os_PathLike):
+            fname = os_fspath(fname)
+        if _is_string_like(fname):
+            fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
+            fencoding = getattr(fh, 'encoding', 'latin1')
+            fh = iter(fh)
+            fown = True
+        else:
+            fh = iter(fname)
+            fencoding = getattr(fname, 'encoding', 'latin1')
+    except TypeError:
+        raise ValueError('fname must be a string, file handle, or generator')
 
+    # input may be a python2 io stream
+    if encoding is not None:
+        fencoding = encoding
+    # we must assume local encoding
+    # TODO emit portability warning?
+    elif fencoding is None:
+        import locale
+        fencoding = locale.getpreferredencoding()
+
+    try:
         # Skip the first `skiprows` lines
         for i in range(skiprows):
             next(fh)
@@ -1095,10 +1111,12 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             # End of lines reached
             first_line = ''
             first_vals = []
-            warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2)
+            warnings.warn('loadtxt: Empty input file: "%s"' % fname,
+                          stacklevel=2)
         N = len(usecols or first_vals)
 
-        dtype_types, packing = flatten_dtype_internal(dtype)
+        # Now that we know N, create the default converters list, and
+        # set packing, if necessary.
         if len(dtype_types) > 1:
             # We're dealing with a structured array, each field of
             # the dtype matches a column
@@ -1118,8 +1136,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
                     # Unused converter specified
                     continue
             if byte_converters:
-                # converters may use decode to workaround numpy's old behaviour,
-                # so encode the string again before passing to the user converter
+                # converters may use decode to workaround numpy's old
+                # behaviour, so encode the string again before passing to
+                # the user converter
                 def tobytes_first(x, conv):
                     if type(x) is bytes:
                         return conv(x)
@@ -1158,9 +1177,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         X.shape = (1, -1)
 
     # Verify that the array has at least dimensions `ndmin`.
-    # Check correctness of the values of `ndmin`
-    if ndmin not in [0, 1, 2]:
-        raise ValueError('Illegal value of ndmin keyword: %s' % ndmin)
     # Tweak the size and shape of the arrays - remove extraneous dimensions
     if X.ndim > ndmin:
         X = np.squeeze(X)
author	Matti Picus <matti.picus@gmail.com>	2020-06-18 17:06:03 +0300
committer	GitHub <noreply@github.com>	2020-06-18 17:06:03 +0300
commit	b3f7132e0bf378eaf21af9194f447036b66c70b9 (patch)
tree	660da8be5e87f2308f1c594c03b056ed0a95e8aa
parent	cdf83879fd0d050cade0fe7852550fcbcc177648 (diff)
parent	431f0ef3800a5be5df24c3adeb84251ff144faf4 (diff)
download	numpy-b3f7132e0bf378eaf21af9194f447036b66c70b9.tar.gz