1 files changed, 34 insertions, 25 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 02d68bc9e..959574594 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -797,22 +797,23 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         The string used to separate values. For backwards compatibility, byte
         strings will be decoded as 'latin1'. The default is whitespace.
     converters : dict, optional
-        A dictionary mapping column number to a function that will convert
-        that column to a float.  E.g., if column 0 is a date string:
-        ``converters = {0: datestr2num}``.  Converters can also be used to
-        provide a default value for missing data (but see also `genfromtxt`):
-        ``converters = {3: lambda s: float(s.strip() or 0)}``.  Default: None.
+        A dictionary mapping column number to a function that will parse the
+        column string into the desired value.  E.g., if column 0 is a date
+        string: ``converters = {0: datestr2num}``.  Converters can also be
+        used to provide a default value for missing data (but see also
+        `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``.
+        Default: None.
     skiprows : int, optional
         Skip the first `skiprows` lines; default: 0.
     usecols : int or sequence, optional
         Which columns to read, with 0 being the first. For example,
-        usecols = (1,4,5) will extract the 2nd, 5th and 6th columns.
+        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
         The default, None, results in all columns being read.
 
         .. versionchanged:: 1.11.0
             When a single column has to be read it is possible to use
             an integer instead of a tuple. E.g ``usecols = 3`` reads the
-            fourth column the same way as `usecols = (3,)`` would.
+            fourth column the same way as ``usecols = (3,)`` would.
     unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
         unpacked using ``x, y, z = loadtxt(...)``.  When used with a structured
@@ -827,7 +828,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         Encoding used to decode the inputfile. Does not apply to input streams.
         The special value 'bytes' enables backward compatibility workarounds
         that ensures you receive byte arrays as results if possible and passes
-        latin1 encoded strings to converters. Override this value to receive
+        'latin1' encoded strings to converters. Override this value to receive
         unicode arrays and pass strings as input to converters.  If set to None
         the system default is used. The default value is 'bytes'.
 
@@ -1108,6 +1109,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     finally:
         if fown:
             fh.close()
+        # recursive closures have a cyclic reference to themselves, which
+        # requires gc to collect (gh-10620). To avoid this problem, for
+        # performance and PyPy friendliness, we break the cycle:
+        flatten_dtype_internal = None
+        pack_items = None
 
     if X is None:
         X = np.array([], dtype)
@@ -2049,7 +2055,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         strcolidx = [i for (i, v) in enumerate(column_types)
                      if v == np.unicode_]
 
-        type_str = np.unicode_
         if byte_converters and strcolidx:
             # convert strings back to bytes for backward compatibility
             warnings.warn(
@@ -2065,33 +2070,37 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
             try:
                 data = [encode_unicode_cols(r) for r in data]
-                type_str = np.bytes_
             except UnicodeEncodeError:
                 pass
+            else:
+                for i in strcolidx:
+                    column_types[i] = np.bytes_
 
+        # Update string types to be the right length
+        sized_column_types = column_types[:]
+        for i, col_type in enumerate(column_types):
+            if np.issubdtype(col_type, np.character):
+                n_chars = max(len(row[i]) for row in data)
+                sized_column_types[i] = (col_type, n_chars)
 
-        # ... and take the largest number of chars.
-        for i in strcolidx:
-            max_line_length = max(len(row[i]) for row in data)
-            column_types[i] = np.dtype((type_str, max_line_length))
-        #
         if names is None:
-            # If the dtype is uniform, don't define names, else use ''
-            base = set([c.type for c in converters if c._checked])
+            # If the dtype is uniform (before sizing strings)
+            base = set([
+                c_type
+                for c, c_type in zip(converters, column_types)
+                if c._checked])
             if len(base) == 1:
-                if strcolidx:
-                    (ddtype, mdtype) = (type_str, bool)
-                else:
-                    (ddtype, mdtype) = (list(base)[0], bool)
+                uniform_type, = base
+                (ddtype, mdtype) = (uniform_type, bool)
             else:
                 ddtype = [(defaultfmt % i, dt)
-                          for (i, dt) in enumerate(column_types)]
+                          for (i, dt) in enumerate(sized_column_types)]
                 if usemask:
                     mdtype = [(defaultfmt % i, bool)
-                              for (i, dt) in enumerate(column_types)]
+                              for (i, dt) in enumerate(sized_column_types)]
         else:
-            ddtype = list(zip(names, column_types))
-            mdtype = list(zip(names, [bool] * len(column_types)))
+            ddtype = list(zip(names, sized_column_types))
+            mdtype = list(zip(names, [bool] * len(sized_column_types)))
         output = np.array(data, dtype=ddtype)
         if usemask:
             outputmask = np.array(masks, dtype=mdtype)