diff options
| author | Matti Picus <matti.picus@gmail.com> | 2020-06-18 17:06:03 +0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-06-18 17:06:03 +0300 |
| commit | b3f7132e0bf378eaf21af9194f447036b66c70b9 (patch) | |
| tree | 660da8be5e87f2308f1c594c03b056ed0a95e8aa | |
| parent | cdf83879fd0d050cade0fe7852550fcbcc177648 (diff) | |
| parent | 431f0ef3800a5be5df24c3adeb84251ff144faf4 (diff) | |
| download | numpy-b3f7132e0bf378eaf21af9194f447036b66c70b9.tar.gz | |
Merge pull request #16633 from WarrenWeckesser/maint-loadtxt
MAINT: lib: Some code clean up in loadtxt
| -rw-r--r-- | numpy/lib/npyio.py | 158 |
1 files changed, 87 insertions, 71 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index f5a548433..6d6222d3e 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -784,6 +784,7 @@ def _getconv(dtype): else: return asstr + # amount of lines loadtxt reads in one chunk, can be overridden for testing _loadtxt_chunksize = 50000 @@ -914,68 +915,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, [ 19.22, 64.31], [-17.57, 63.94]]) """ - # Type conversions for Py3 convenience - if comments is not None: - if isinstance(comments, (str, bytes)): - comments = [comments] - comments = [_decode_line(x) for x in comments] - # Compile regex for comments beforehand - comments = (re.escape(comment) for comment in comments) - regex_comments = re.compile('|'.join(comments)) - - if delimiter is not None: - delimiter = _decode_line(delimiter) - - user_converters = converters - - if encoding == 'bytes': - encoding = None - byte_converters = True - else: - byte_converters = False - - if usecols is not None: - # Allow usecols to be a single int or a sequence of ints - try: - usecols_as_list = list(usecols) - except TypeError: - usecols_as_list = [usecols] - for col_idx in usecols_as_list: - try: - opindex(col_idx) - except TypeError as e: - e.args = ( - "usecols must be an int or a sequence of ints but " - "it contains at least one element of type %s" % - type(col_idx), - ) - raise - # Fall back to existing code - usecols = usecols_as_list - - fown = False - try: - if isinstance(fname, os_PathLike): - fname = os_fspath(fname) - if _is_string_like(fname): - fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) - fencoding = getattr(fh, 'encoding', 'latin1') - fh = iter(fh) - fown = True - else: - fh = iter(fname) - fencoding = getattr(fname, 'encoding', 'latin1') - except TypeError: - raise ValueError('fname must be a string, file handle, or generator') - # input may be a python2 io stream - if encoding is not None: - fencoding = encoding - # we must assume local encoding - # TODO emit portability warning? - elif fencoding is None: - import locale - fencoding = locale.getpreferredencoding() + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Nested functions used by loadtxt. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # not to be confused with the flatten_dtype we import... @recursive @@ -1075,11 +1018,84 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if X: yield X + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Main body of loadtxt. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + # Check correctness of the values of `ndmin` + if ndmin not in [0, 1, 2]: + raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) + + # Type conversions for Py3 convenience + if comments is not None: + if isinstance(comments, (str, bytes)): + comments = [comments] + comments = [_decode_line(x) for x in comments] + # Compile regex for comments beforehand + comments = (re.escape(comment) for comment in comments) + regex_comments = re.compile('|'.join(comments)) + + if delimiter is not None: + delimiter = _decode_line(delimiter) + + user_converters = converters + + if encoding == 'bytes': + encoding = None + byte_converters = True + else: + byte_converters = False + + if usecols is not None: + # Allow usecols to be a single int or a sequence of ints + try: + usecols_as_list = list(usecols) + except TypeError: + usecols_as_list = [usecols] + for col_idx in usecols_as_list: + try: + opindex(col_idx) + except TypeError as e: + e.args = ( + "usecols must be an int or a sequence of ints but " + "it contains at least one element of type %s" % + type(col_idx), + ) + raise + # Fall back to existing code + usecols = usecols_as_list + + # Make sure we're dealing with a proper dtype + dtype = np.dtype(dtype) + defconv = _getconv(dtype) + + dtype_types, packing = flatten_dtype_internal(dtype) + + fown = False try: - # Make sure we're dealing with a proper dtype - dtype = np.dtype(dtype) - defconv = _getconv(dtype) + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) + if _is_string_like(fname): + fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fencoding = getattr(fh, 'encoding', 'latin1') + fh = iter(fh) + fown = True + else: + fh = iter(fname) + fencoding = getattr(fname, 'encoding', 'latin1') + except TypeError: + raise ValueError('fname must be a string, file handle, or generator') + # input may be a python2 io stream + if encoding is not None: + fencoding = encoding + # we must assume local encoding + # TODO emit portability warning? + elif fencoding is None: + import locale + fencoding = locale.getpreferredencoding() + + try: # Skip the first `skiprows` lines for i in range(skiprows): next(fh) @@ -1095,10 +1111,12 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # End of lines reached first_line = '' first_vals = [] - warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2) + warnings.warn('loadtxt: Empty input file: "%s"' % fname, + stacklevel=2) N = len(usecols or first_vals) - dtype_types, packing = flatten_dtype_internal(dtype) + # Now that we know N, create the default converters list, and + # set packing, if necessary. if len(dtype_types) > 1: # We're dealing with a structured array, each field of # the dtype matches a column @@ -1118,8 +1136,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Unused converter specified continue if byte_converters: - # converters may use decode to workaround numpy's old behaviour, - # so encode the string again before passing to the user converter + # converters may use decode to workaround numpy's old + # behaviour, so encode the string again before passing to + # the user converter def tobytes_first(x, conv): if type(x) is bytes: return conv(x) @@ -1158,9 +1177,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, X.shape = (1, -1) # Verify that the array has at least dimensions `ndmin`. - # Check correctness of the values of `ndmin` - if ndmin not in [0, 1, 2]: - raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) # Tweak the size and shape of the arrays - remove extraneous dimensions if X.ndim > ndmin: X = np.squeeze(X) |
