From 06c7142f45a27d3e2e4e9ad4a01715e3da11f67b Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 22 Mar 2017 16:24:28 +0000 Subject: MAINT: Rename function to prevent name-shadowing This file had two functions called flatten_dtype, which did similar but different things. --- numpy/lib/npyio.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 0dee6b333..54a37fbad 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -902,7 +902,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, raise ValueError('fname must be a string, file handle, or generator') X = [] - def flatten_dtype(dt): + # not to be confused with the flatten_dtype we import... + def flatten_dtype_internal(dt): """Unpack a structured data-type, and produce re-packing info.""" if dt.names is None: # If the dtype is flattened, return. @@ -922,7 +923,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, packing = [] for field in dt.names: tp, bytes = dt.fields[field] - flat_dt, flat_packing = flatten_dtype(tp) + flat_dt, flat_packing = flatten_dtype_internal(tp) types.extend(flat_dt) # Avoid extra nesting for subarrays if tp.ndim > 0: @@ -986,7 +987,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2) N = len(usecols or first_vals) - dtype_types, packing = flatten_dtype(dtype) + dtype_types, packing = flatten_dtype_internal(dtype) if len(dtype_types) > 1: # We're dealing with a structured array, each field of # the dtype matches a column -- cgit v1.2.1 From b87fca27261f79be20ab06a222ed2330d60d9f2c Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 25 Mar 2017 10:11:43 +0000 Subject: MAINT: Remove asbytes where a b prefix would suffice Since we only need to support python 2, we can remove any case where we just pass a single string literal and use the b prefix instead. What we can't do is transform asbytes("tests %d" % num), because %-formatting fails on bytes in python 3.x < 3.5. --- numpy/lib/npyio.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 54a37fbad..dc1c951e7 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -397,7 +397,7 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, try: # Code to distinguish from NumPy binary files and pickles. - _ZIP_PREFIX = asbytes('PK\x03\x04') + _ZIP_PREFIX = b'PK\x03\x04' N = len(format.MAGIC_PREFIX) magic = fid.read(N) # If the file size is less than N, we need to make sure not @@ -856,7 +856,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Compile regex for comments beforehand comments = (re.escape(comment) for comment in comments) - regex_comments = re.compile(asbytes('|').join(comments)) + regex_comments = re.compile(b'|'.join(comments)) user_converters = converters if delimiter is not None: delimiter = asbytes(delimiter) @@ -958,7 +958,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, line = asbytes(line) if comments is not None: line = regex_comments.split(asbytes(line), maxsplit=1)[0] - line = line.strip(asbytes('\r\n')) + line = line.strip(b'\r\n') if line: return line.split(delimiter) else: @@ -1576,11 +1576,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if names is True: if comments in first_line: first_line = ( - asbytes('').join(first_line.split(comments)[1:])) + b''.join(first_line.split(comments)[1:])) first_values = split_line(first_line) except StopIteration: # return an empty array if the datafile is empty - first_line = asbytes('') + first_line = b'' first_values = [] warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) @@ -1605,7 +1605,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if names is True: names = validate_names([_bytes_to_name(_.strip()) for _ in first_values]) - first_line = asbytes('') + first_line = b'' elif _is_string_like(names): names = validate_names([_.strip() for _ in names.split(',')]) elif names: @@ -1644,7 +1644,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, user_missing_values = missing_values or () # Define the list of missing_values (one column: one list) - missing_values = [list([asbytes('')]) for _ in range(nbcols)] + missing_values = [list([b'']) for _ in range(nbcols)] # We have a dictionary: process it field by field if isinstance(user_missing_values, dict): @@ -1684,7 +1684,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, entry.append(value) # We have a string : apply it to all entries elif isinstance(user_missing_values, bytes): - user_value = user_missing_values.split(asbytes(",")) + user_value = user_missing_values.split(b",") for entry in missing_values: entry.extend(user_value) # We have something else: apply it to all entries @@ -1977,7 +1977,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if usemask and names: for (name, conv) in zip(names or (), converters): missing_values = [conv(_) for _ in conv.missing_values - if _ != asbytes('')] + if _ != b''] for mval in missing_values: outputmask[name] |= (output[name] == mval) # Construct the final array -- cgit v1.2.1 From 1608e53072b035bd40de7a202e75354f0e802120 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 3 Jun 2017 13:41:26 +0100 Subject: BUG: KeyboardInterrupt is swallowed all over the place Bare except is very rarely the right thing --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index dc1c951e7..cb3b7534d 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -424,7 +424,7 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, "non-pickled data") try: return pickle.load(fid, **pickle_kwargs) - except: + except Exception: raise IOError( "Failed to interpret file %s as a pickle" % repr(file)) finally: -- cgit v1.2.1 From c6533b6c386dc0f4009e5f3c5c545dde4d1b48a4 Mon Sep 17 00:00:00 2001 From: Jean Helie Date: Mon, 26 Jun 2017 14:01:15 +0100 Subject: MAINT: Fix alerts from http://lgtm.com (#9292) * make exception raising 2/3 compatible * remove unnecesary else statement after while loop without break clause * ensure file is always enclosed even in the event of an exception * ensure list comprehension variable does not override enclosing loop variable --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index cb3b7534d..187a6722a 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1014,7 +1014,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if len(vals) == 0: continue if usecols: - vals = [vals[i] for i in usecols] + vals = [vals[j] for j in usecols] if len(vals) != N: line_num = i + skiprows + 1 raise ValueError("Wrong number of columns at line %d" -- cgit v1.2.1 From 2b781f8967488dc007f8f0a1e6a7f49208788d12 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 1 Aug 2017 20:29:36 +0000 Subject: MAINT/DOC: Use builtin when np.{x} is builtins.{x}. This is the case for x in {int, bool, str, float, complex, object}. Using the np.{x} version is deceptive as it suggests that there is a difference. This change doesn't affect any external behaviour. The `long` type is missing in python 3, so np.long is still useful --- numpy/lib/npyio.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 187a6722a..17b585ee5 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -737,7 +737,7 @@ def _getconv(dtype): return np.longdouble elif issubclass(typ, np.floating): return floatconv - elif issubclass(typ, np.complex): + elif issubclass(typ, complex): return lambda x: complex(asstr(x)) elif issubclass(typ, np.bytes_): return asbytes @@ -1902,16 +1902,16 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # If the dtype is uniform, don't define names, else use '' base = set([c.type for c in converters if c._checked]) if len(base) == 1: - (ddtype, mdtype) = (list(base)[0], np.bool) + (ddtype, mdtype) = (list(base)[0], bool) else: ddtype = [(defaultfmt % i, dt) for (i, dt) in enumerate(column_types)] if usemask: - mdtype = [(defaultfmt % i, np.bool) + mdtype = [(defaultfmt % i, bool) for (i, dt) in enumerate(column_types)] else: ddtype = list(zip(names, column_types)) - mdtype = list(zip(names, [np.bool] * len(column_types))) + mdtype = list(zip(names, [bool] * len(column_types))) output = np.array(data, dtype=ddtype) if usemask: outputmask = np.array(masks, dtype=mdtype) @@ -1937,7 +1937,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Now, process the rowmasks the same way if usemask: rowmasks = np.array( - masks, dtype=np.dtype([('', np.bool) for t in dtype_flat])) + masks, dtype=np.dtype([('', bool) for t in dtype_flat])) # Construct the new dtype mdtype = make_mask_descr(dtype) outputmask = rowmasks.view(mdtype) @@ -1968,9 +1968,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, output = np.array(data, dtype) if usemask: if dtype.names: - mdtype = [(_, np.bool) for _ in dtype.names] + mdtype = [(_, bool) for _ in dtype.names] else: - mdtype = np.bool + mdtype = bool outputmask = np.array(masks, dtype=mdtype) # Try to take care of the missing data we missed names = output.dtype.names -- cgit v1.2.1 From d8bf05c235e55f08324f1b7e156ef9277f25634c Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Wed, 30 Aug 2017 15:39:50 -0500 Subject: Updates order of parameters in save docstring to match function parameter order --- numpy/lib/npyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 17b585ee5..e7303fc65 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -443,6 +443,8 @@ def save(file, arr, allow_pickle=True, fix_imports=True): then the filename is unchanged. If file is a string or Path, a ``.npy`` extension will be appended to the file name if it does not already have one. + arr : array_like + Array data to be saved. allow_pickle : bool, optional Allow saving object arrays using Python pickles. Reasons for disallowing pickles include security (loading pickled data can execute arbitrary @@ -456,8 +458,6 @@ def save(file, arr, allow_pickle=True, fix_imports=True): pickled in a Python 2 compatible way. If `fix_imports` is True, pickle will try to map the new Python 3 names to the old module names used in Python 2, so that the pickle data stream is readable with Python 2. - arr : array_like - Array data to be saved. See Also -------- -- cgit v1.2.1 From 2f4a965019722c3c56f43433bfa4a99c4c083138 Mon Sep 17 00:00:00 2001 From: Nelle Varoquaux Date: Wed, 20 Sep 2017 15:17:39 -0700 Subject: ENH Better error message for savetxt when X.ndim > 2 savetxt does not support saving arrays of dimension 0 or higher than 2. This pull request improves the message of the error raised. --- numpy/lib/npyio.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index e7303fc65..7598b2c6b 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1071,7 +1071,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. - X : array_like + X : 1D or 2D array_like Data to be saved to a text file. fmt : str or sequence of strs, optional A single format (%10.5f), a sequence of formats, or a @@ -1201,7 +1201,10 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', X = np.asarray(X) # Handle 1-dimensional arrays - if X.ndim == 1: + if X.ndim == 0 or X.ndim > 2: + raise ValueError( + "Expected 1D or 2D array, got %dD array instead" % X.ndim) + elif X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T -- cgit v1.2.1 From 52c1ef6ff7cfc697930f9bf4f1eebc59ee7f538e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 15 Oct 2017 19:37:43 +0300 Subject: ENH: Save to ZIP files without using temporary files. Since Python 3.6 it is possible to write directly to a ZIP file, without creating temporary files. --- numpy/lib/npyio.py | 55 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 22 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 7598b2c6b..96355ebc8 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -661,8 +661,6 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an optional # component of the so-called standard library. import zipfile - # Import deferred for startup time improvement - import tempfile if isinstance(file, basestring): if not file.endswith('.npz'): @@ -686,31 +684,44 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): zipf = zipfile_factory(file, mode="w", compression=compression) - # Stage arrays in a temporary file on disk, before writing to zip. - - # Since target file might be big enough to exceed capacity of a global - # temporary directory, create temp file side-by-side with the target file. - file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp') - fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy') - os.close(fd) - try: + if sys.version_info >= (3, 6): + # Since Python 3.6 it is possible to write directly to a ZIP file. for key, val in namedict.items(): fname = key + '.npy' - fid = open(tmpfile, 'wb') - try: - format.write_array(fid, np.asanyarray(val), + val = np.asanyarray(val) + force_zip64 = val.nbytes >= 2**30 + with zipf.open(fname, 'w', force_zip64=force_zip64) as fid: + format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs) - fid.close() - fid = None - zipf.write(tmpfile, arcname=fname) - except IOError as exc: - raise IOError("Failed to write to %s: %s" % (tmpfile, exc)) - finally: - if fid: + else: + # Stage arrays in a temporary file on disk, before writing to zip. + + # Import deferred for startup time improvement + import tempfile + # Since target file might be big enough to exceed capacity of a global + # temporary directory, create temp file side-by-side with the target file. + file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp') + fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy') + os.close(fd) + try: + for key, val in namedict.items(): + fname = key + '.npy' + fid = open(tmpfile, 'wb') + try: + format.write_array(fid, np.asanyarray(val), + allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) fid.close() - finally: - os.remove(tmpfile) + fid = None + zipf.write(tmpfile, arcname=fname) + except IOError as exc: + raise IOError("Failed to write to %s: %s" % (tmpfile, exc)) + finally: + if fid: + fid.close() + finally: + os.remove(tmpfile) zipf.close() -- cgit v1.2.1 From 97d29b0dcb527eb756d7e62f91158db3f51f61a3 Mon Sep 17 00:00:00 2001 From: Andras Deak Date: Sat, 21 Oct 2017 01:58:46 +0200 Subject: DOC: Unindent enumeration in savetxt docstring The rendered markdown in the online documentation was broken due to the one-character indentation added in the multiline enumerations of the docstring of savetxt. --- numpy/lib/npyio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 96355ebc8..d2b8fb4ab 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1090,12 +1090,12 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', case `delimiter` is ignored. For complex `X`, the legal options for `fmt` are: a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted - like `' (%s+%sj)' % (fmt, fmt)` + like `' (%s+%sj)' % (fmt, fmt)` b) a full string specifying every real and imaginary part, e.g. - `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns + `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns c) a list of specifiers, one per column - in this case, the real - and imaginary part must have separate specifiers, - e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns + and imaginary part must have separate specifiers, + e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns delimiter : str, optional String or character separating columns. newline : str, optional -- cgit v1.2.1 From 0efac01e2b9d9ce40e508f14a1a5d53fef62fbc8 Mon Sep 17 00:00:00 2001 From: David Freese Date: Tue, 24 Oct 2017 06:05:05 -0700 Subject: DOC: Clarify behavior of genfromtxt names field The documentation on the name parameter for npyio.genfromtxt uses the phrase "valid line" which doesn't completely describe it's behavior. This updates the documentation on the names field to indicate the first line, with or without a comment delimeter, will be taken for the names field. fixes #9878 --- numpy/lib/npyio.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index d2b8fb4ab..6de5940d7 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1418,11 +1418,12 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, Which columns to read, with 0 being the first. For example, ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. names : {None, True, str, sequence}, optional - If `names` is True, the field names are read from the first valid line - after the first `skip_header` lines. - If `names` is a sequence or a single-string of comma-separated names, - the names will be used to define the field names in a structured dtype. - If `names` is None, the names of the dtype fields will be used, if any. + If `names` is True, the field names are read from the first line after + the first `skip_header` lines. This line can optionally be proceeded + by a comment delimeter. If `names` is a sequence or a single-string of + comma-separated names, the names will be used to define the field names + in a structured dtype. If `names` is None, the names of the dtype + fields will be used, if any. excludelist : sequence, optional A list of names to exclude. This list is appended to the default list ['return','file','print']. Excluded names are appended an underscore: -- cgit v1.2.1 From d8edc62e8c9e69280fb8a171c7678b2fea929696 Mon Sep 17 00:00:00 2001 From: Julian Taylor Date: Mon, 3 Apr 2017 14:20:36 +0200 Subject: ENH: Add encoding option to numpy text IO. This modifies loadtxt and genfromtxt in several ways intended to add unicode support for text files by adding an `encoding` keyword to np.load, np.genfromtxt, np.savetxt, and np.fromregex. The original treatment of the relevant files was to open them as byte files, whereas they are now opened as text files with an encoding. When read, they are decoded to unicode strings for Python3 compatibility, and when written, they are encoded as specified. For backward compatibility, the default encoding in both cases is latin1. --- numpy/lib/npyio.py | 346 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 246 insertions(+), 100 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 6de5940d7..fe2aa436b 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import io import sys import os import re @@ -15,11 +16,12 @@ from numpy.core.multiarray import packbits, unpackbits from ._iotools import ( LineSplitter, NameValidator, StringConverter, ConverterError, ConverterLockError, ConversionWarning, _is_string_like, - has_nested_fields, flatten_dtype, easy_dtype, _bytes_to_name + has_nested_fields, flatten_dtype, easy_dtype, _decode_line ) from numpy.compat import ( - asbytes, asstr, asbytes_nested, bytes, basestring, unicode, is_pathlib_path + asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, + is_pathlib_path ) if sys.version_info[0] >= 3: @@ -731,7 +733,7 @@ def _getconv(dtype): def floatconv(x): x.lower() - if b'0x' in x: + if '0x' in x: return float.fromhex(asstr(x)) return float(x) @@ -752,13 +754,17 @@ def _getconv(dtype): return lambda x: complex(asstr(x)) elif issubclass(typ, np.bytes_): return asbytes + elif issubclass(typ, np.unicode_): + return asunicode else: return asstr +# amount of lines loadtxt reads in one chunk, can be overriden for testing +_loadtxt_chunksize = 50000 def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, - ndmin=0): + ndmin=0, encoding='bytes'): """ Load data from a text file. @@ -813,6 +819,15 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, Legal values: 0 (default), 1 or 2. .. versionadded:: 1.6.0 + encoding: string, optional + Encoding used to decode the inputfile. Does not apply to input streams. + The special value 'bytes' enables backward compatibility workarounds + that ensures you receive byte arrays as results if possible and passes + latin1 encoded strings to converters. Override this value to receive + unicode arrays and pass strings as input to converters. + If set to None the system default is used. + + .. versionadded:: 1.14.0 Returns ------- @@ -861,16 +876,20 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Type conversions for Py3 convenience if comments is not None: if isinstance(comments, (basestring, bytes)): - comments = [asbytes(comments)] - else: - comments = [asbytes(comment) for comment in comments] + comments = [comments] + + comments = [_decode_line(x) for x in comments] # Compile regex for comments beforehand comments = (re.escape(comment) for comment in comments) - regex_comments = re.compile(b'|'.join(comments)) + regex_comments = re.compile('|'.join(comments)) user_converters = converters - if delimiter is not None: - delimiter = asbytes(delimiter) + + if encoding == 'bytes': + encoding = None + byte_converters = True + else: + byte_converters = False if usecols is not None: # Allow usecols to be a single int or a sequence of ints @@ -896,22 +915,24 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if is_pathlib_path(fname): fname = str(fname) if _is_string_like(fname): + fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fencoding = getattr(fh, 'encoding', 'latin1') + fh = iter(fh) fown = True - if fname.endswith('.gz'): - import gzip - fh = iter(gzip.GzipFile(fname)) - elif fname.endswith('.bz2'): - import bz2 - fh = iter(bz2.BZ2File(fname)) - elif sys.version_info[0] == 2: - fh = iter(open(fname, 'U')) - else: - fh = iter(open(fname)) else: fh = iter(fname) + fencoding = getattr(fname, 'encoding', 'latin1') except TypeError: raise ValueError('fname must be a string, file handle, or generator') - X = [] + + # input may be a python2 io stream + if encoding is not None: + fencoding = encoding + # we must assume local encoding + # TOOD emit portability warning? + elif fencoding is None: + import locale + fencoding = locale.getpreferredencoding() # not to be confused with the flatten_dtype we import... def flatten_dtype_internal(dt): @@ -960,21 +981,43 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, return tuple(ret) def split_line(line): - """Chop off comments, strip, and split at delimiter. - - Note that although the file is opened as text, this function - returns bytes. + """Chop off comments, strip, and split at delimiter. """ + line = _decode_line(line, encoding=encoding) - """ - line = asbytes(line) if comments is not None: - line = regex_comments.split(asbytes(line), maxsplit=1)[0] - line = line.strip(b'\r\n') + line = regex_comments.split(line, maxsplit=1)[0] + line = line.strip('\r\n') if line: return line.split(delimiter) else: return [] + def read_data(chunk_size): + # Parse each line, including the first + X = [] + for i, line in enumerate(itertools.chain([first_line], fh)): + vals = split_line(line) + if len(vals) == 0: + continue + if usecols: + vals = [vals[j] for j in usecols] + if len(vals) != N: + line_num = i + skiprows + 1 + raise ValueError("Wrong number of columns at line %d" + % line_num) + + # Convert each value according to its column and store + items = [conv(val) for (conv, val) in zip(converters, vals)] + + # Then pack it according to the dtype's nesting + items = pack_items(items, packing) + X.append(items) + if len(X) > chunk_size: + yield X + X = [] + if X: + yield X + try: # Make sure we're dealing with a proper dtype dtype = np.dtype(dtype) @@ -1017,30 +1060,42 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, except ValueError: # Unused converter specified continue - converters[i] = conv - - # Parse each line, including the first - for i, line in enumerate(itertools.chain([first_line], fh)): - vals = split_line(line) - if len(vals) == 0: - continue - if usecols: - vals = [vals[j] for j in usecols] - if len(vals) != N: - line_num = i + skiprows + 1 - raise ValueError("Wrong number of columns at line %d" - % line_num) - - # Convert each value according to its column and store - items = [conv(val) for (conv, val) in zip(converters, vals)] - # Then pack it according to the dtype's nesting - items = pack_items(items, packing) - X.append(items) + if byte_converters: + # converters may use decode to workaround numpy's oldd behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + import functools + converters[i] = functools.partial(tobytes_first, conv=conv) + else: + converters[i] = conv + + converters = [conv if conv is not bytes else + lambda x: x.encode(fencoding) for conv in converters] + + # read data in chunks and fill it into an array via resize + # over-allocating and shrinking the array later may be faster but is + # probably not relevant compared to the cost of actually reading and + # converting the data + X = None + for x in read_data(_loadtxt_chunksize): + if X is None: + X = np.array(x, dtype) + else: + nshape = list(X.shape) + pos = nshape[0] + nshape[0] += len(x) + X.resize(nshape) + X[pos:, ...] = x finally: if fown: fh.close() - X = np.array(X, dtype) + if X is None: + X = np.array([], dtype) + # Multicolumn data are returned with shape (1, N, M), i.e. # (1, 1, M) for a single row - remove the singleton dimension there if X.ndim == 3 and X.shape[:2] == (1, 1): @@ -1072,7 +1127,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', - footer='', comments='# '): + footer='', comments='# ', encoding=None): """ Save an array to a text file. @@ -1116,6 +1171,11 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', ``numpy.loadtxt``. .. versionadded:: 1.7.0 + encoding: string, optional + Encoding used to encode the outputfile. Does not apply to output + streams. + + .. versionadded:: 1.14.0 See Also @@ -1190,21 +1250,51 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', fmt = asstr(fmt) delimiter = asstr(delimiter) + class WriteWrap(object): + """ convert to unicode in py2 or to bytes on bytestream inputs """ + def __init__(self, fh, encoding): + self.fh = fh + self.encoding = encoding + self.do_write = self.first_write + + def close(self): + self.fh.close() + + def write(self, v): + self.do_write(v) + + def write_bytes(self, v): + if isinstance(v, bytes): + self.fh.write(v) + else: + self.fh.write(v.encode(self.encoding)) + + def write_normal(self, v): + self.fh.write(asunicode(v)) + + def first_write(self, v): + try: + self.write_normal(v) + self.write = self.write_normal + except TypeError: + # input is probably a bytestream + self.write_bytes(v) + self.write = self.write_bytes + own_fh = False if is_pathlib_path(fname): fname = str(fname) if _is_string_like(fname): + # datasource doesn't support creating a new file ... + open(fname, 'wt').close() + fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) own_fh = True - if fname.endswith('.gz'): - import gzip - fh = gzip.open(fname, 'wb') - else: - if sys.version_info[0] >= 3: - fh = open(fname, 'wb') - else: - fh = open(fname, 'w') + # need to convert str to unicode for text io output + if sys.version_info[0] == 2: + fh = WriteWrap(fh, encoding or 'latin1') elif hasattr(fname, 'write'): - fh = fname + # wrap to handle byte output streams + fh = WriteWrap(fname, encoding or 'latin1') else: raise ValueError('fname must be a string or file handle') @@ -1254,31 +1344,33 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', if len(header) > 0: header = header.replace('\n', '\n' + comments) - fh.write(asbytes(comments + header + newline)) + fh.write(comments + header + newline) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) - fh.write(asbytes(format % tuple(row2) + newline)) + fh.write(format % tuple(row2) + newline) else: for row in X: try: - fh.write(asbytes(format % tuple(row) + newline)) + v = format % tuple(row) + newline except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), format)) + fh.write(v) + if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) - fh.write(asbytes(comments + footer + newline)) + fh.write(comments + footer + newline) finally: if own_fh: fh.close() -def fromregex(file, regexp, dtype): +def fromregex(file, regexp, dtype, encoding=None): """ Construct an array from a text file, using regular expression parsing. @@ -1295,6 +1387,10 @@ def fromregex(file, regexp, dtype): Groups in the regular expression correspond to fields in the dtype. dtype : dtype or list of dtypes Dtype for the structured array. + encoding: string, optional + Encoding used to decode the inputfile. Does not apply to input streams. + + .. versionadded:: 1.14.0 Returns ------- @@ -1335,16 +1431,22 @@ def fromregex(file, regexp, dtype): """ own_fh = False if not hasattr(file, "read"): - file = open(file, 'rb') + file = np.lib._datasource.open(file, 'rt', encoding=encoding) own_fh = True try: - if not hasattr(regexp, 'match'): - regexp = re.compile(asbytes(regexp)) if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) - seq = regexp.findall(file.read()) + content = file.read() + if isinstance(content, bytes) and not isinstance(regexp, bytes): + regexp = asbytes(regexp) + elif not isinstance(content, bytes) and isinstance(regexp, bytes): + regexp = asstr(regexp) + + if not hasattr(regexp, 'match'): + regexp = re.compile(regexp) + seq = regexp.findall(content) if seq and not isinstance(seq[0], tuple): # Only one group is in the regexp. # Create the new array as a single data-type and then @@ -1372,7 +1474,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt="f%i", unpack=None, usemask=False, loose=True, - invalid_raise=True, max_rows=None): + invalid_raise=True, max_rows=None, encoding='bytes'): """ Load data from a text file, with missing values handled as specified. @@ -1460,6 +1562,15 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, to read the entire file. .. versionadded:: 1.10.0 + encoding: string, optional + Encoding used to decode the inputfile. Does not apply to input streams. + The special value 'bytes' enables backward compatibility workarounds + that ensures you receive byte arrays as results if possible and passes + latin1 encoded strings to converters. Override this value to receive + unicode arrays and pass strings as input to converters. + If set to None the system default is used. + + .. versionadded:: 1.14.0 Returns ------- @@ -1536,15 +1647,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if max_rows < 1: raise ValueError("'max_rows' must be at least 1.") - # Py3 data conversions to bytes, for convenience - if comments is not None: - comments = asbytes(comments) - if isinstance(delimiter, unicode): - delimiter = asbytes(delimiter) - if isinstance(missing_values, (unicode, list, tuple)): - missing_values = asbytes_nested(missing_values) - - # if usemask: from numpy.ma import MaskedArray, make_mask_descr # Check the input dictionary of converters @@ -1554,16 +1656,19 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, "The input argument 'converter' should be a valid dictionary " "(got '%s' instead)" % type(user_converters)) + if encoding == 'bytes': + encoding = None + byte_converters = True + else: + byte_converters = False + # Initialize the filehandle, the LineSplitter and the NameValidator own_fhd = False try: if is_pathlib_path(fname): fname = str(fname) if isinstance(fname, basestring): - if sys.version_info[0] == 2: - fhd = iter(np.lib._datasource.open(fname, 'rbU')) - else: - fhd = iter(np.lib._datasource.open(fname, 'rb')) + fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding)) own_fhd = True else: fhd = iter(fname) @@ -1573,7 +1678,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, "or generator. Got %s instead." % type(fname)) split_line = LineSplitter(delimiter=delimiter, comments=comments, - autostrip=autostrip)._handyman + autostrip=autostrip, encoding=encoding) validate_names = NameValidator(excludelist=excludelist, deletechars=deletechars, case_sensitive=case_sensitive, @@ -1587,15 +1692,15 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, first_values = None try: while not first_values: - first_line = next(fhd) + first_line = _decode_line(next(fhd), encoding) if names is True: if comments in first_line: first_line = ( - b''.join(first_line.split(comments)[1:])) + ''.join(first_line.split(comments)[1:])) first_values = split_line(first_line) except StopIteration: # return an empty array if the datafile is empty - first_line = b'' + first_line = '' first_values = [] warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) @@ -1618,9 +1723,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Check the names and overwrite the dtype.names if needed if names is True: - names = validate_names([_bytes_to_name(_.strip()) - for _ in first_values]) - first_line = b'' + names = validate_names([str(_.strip()) for _ in first_values]) + first_line = '' elif _is_string_like(names): names = validate_names([_.strip() for _ in names.split(',')]) elif names: @@ -1657,9 +1761,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Process the missing values ............................... # Rename missing_values for convenience user_missing_values = missing_values or () + if isinstance(user_missing_values, bytes): + user_missing_values = user_missing_values.decode('latin1') # Define the list of missing_values (one column: one list) - missing_values = [list([b'']) for _ in range(nbcols)] + missing_values = [list(['']) for _ in range(nbcols)] # We have a dictionary: process it field by field if isinstance(user_missing_values, dict): @@ -1698,8 +1804,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if value not in entry: entry.append(value) # We have a string : apply it to all entries - elif isinstance(user_missing_values, bytes): - user_value = user_missing_values.split(b",") + elif isinstance(user_missing_values, basestring): + user_value = user_missing_values.split(",") for entry in missing_values: entry.extend(user_value) # We have something else: apply it to all entries @@ -1787,11 +1893,24 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, testing_value = first_values[j] else: testing_value = None - converters[i].update(conv, locked=True, + if conv is bytes: + user_conv = asbytes + elif byte_converters: + # converters may use decode to workaround numpy's oldd behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + import functools + user_conv = functools.partial(tobytes_first, conv=conv) + else: + user_conv = conv + converters[i].update(user_conv, locked=True, testing_value=testing_value, default=filling_values[i], missing_values=missing_values[i],) - uc_update.append((i, conv)) + uc_update.append((i, user_conv)) # Make sure we have the corrected keys in user_converters... user_converters.update(uc_update) @@ -1908,16 +2027,43 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, column_types = [conv.type for conv in converters] # Find the columns with strings... strcolidx = [i for (i, v) in enumerate(column_types) - if v in (type('S'), np.string_)] + if v == np.unicode_] + + typestr = 'U' + if byte_converters and strcolidx: + # convert strings back to bytes for backward compatibility + warnings.warn( + "Reading strings without specifying the encoding argument is " + "deprecated. Set encoding, use None for the system default.", + np.VisibleDeprecationWarning, stacklevel=2) + try: + for j in range(len(data)): + row = list(data[j]) + for i in strcolidx: + row[i] = row[i].encode('latin1') + data[j] = tuple(row) + typestr = 'S' + except UnicodeEncodeError: + # we must use unicode, revert encoding + for k in range(0, j + 1): + row = list(data[k]) + for i in strcolidx: + if isinstance(row[i], bytes): + row[i] = row[i].decode('latin1') + data[k] = tuple(row) + # ... and take the largest number of chars. for i in strcolidx: - column_types[i] = "|S%i" % max(len(row[i]) for row in data) + column_types[i] = "|%s%i" % (typestr, max(len(row[i]) for row in data)) # if names is None: # If the dtype is uniform, don't define names, else use '' base = set([c.type for c in converters if c._checked]) if len(base) == 1: - (ddtype, mdtype) = (list(base)[0], bool) + if strcolidx: + (ddtype, mdtype) = (typestr, bool) + else: + (ddtype, mdtype) = (list(base)[0], bool) else: ddtype = [(defaultfmt % i, dt) for (i, dt) in enumerate(column_types)] @@ -1966,8 +2112,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Keep the dtype of the current converter if i in user_converters: ishomogeneous &= (ttype == dtype.type) - if ttype == np.string_: - ttype = "|S%i" % max(len(row[i]) for row in data) + if np.issubdtype(ttype, np.character): + ttype = (ttype, max(len(row[i]) for row in data)) descr.append(('', ttype)) else: descr.append(('', dtype)) @@ -1992,7 +2138,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if usemask and names: for (name, conv) in zip(names or (), converters): missing_values = [conv(_) for _ in conv.missing_values - if _ != b''] + if _ != ''] for mval in missing_values: outputmask[name] |= (output[name] == mval) # Construct the final array -- cgit v1.2.1 From 55273d236945aa5f4b6e01682dfef82384a7fd65 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Sun, 19 Nov 2017 11:51:55 -0700 Subject: DOC: Add some docstrings and edit others. Add docstrings for some of the support functions in _datasource and npyio in order to aid future maintainers. [ci skip] --- numpy/lib/npyio.py | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index fe2aa436b..6b65834ed 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -296,7 +296,7 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, used in Python 3. encoding : str, optional What encoding to use when reading Python 2 strings. Only useful when - loading Python 2 generated pickled files on Python 3, which includes + loading Python 2 generated pickled files in Python 3, which includes npy/npz files containing object arrays. Values other than 'latin1', 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical data. Default: 'ASCII' @@ -819,13 +819,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, Legal values: 0 (default), 1 or 2. .. versionadded:: 1.6.0 - encoding: string, optional + encoding : str, optional Encoding used to decode the inputfile. Does not apply to input streams. The special value 'bytes' enables backward compatibility workarounds that ensures you receive byte arrays as results if possible and passes latin1 encoded strings to converters. Override this value to receive - unicode arrays and pass strings as input to converters. - If set to None the system default is used. + unicode arrays and pass strings as input to converters. If set to None + the system default is used. The default value is 'bytes'. .. versionadded:: 1.14.0 @@ -993,7 +993,17 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, return [] def read_data(chunk_size): - # Parse each line, including the first + """Parse each line, including the first. + + The file read, `fh`, is a global defined above. + + Parameters + ---------- + chunk_size : int + At most `chunk_size` lines are read at a time, with iteration + until all lines are read. + + """ X = [] for i, line in enumerate(itertools.chain([first_line], fh)): vals = split_line(line) @@ -1171,7 +1181,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', ``numpy.loadtxt``. .. versionadded:: 1.7.0 - encoding: string, optional + encoding : str, optional Encoding used to encode the outputfile. Does not apply to output streams. @@ -1251,7 +1261,9 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', delimiter = asstr(delimiter) class WriteWrap(object): - """ convert to unicode in py2 or to bytes on bytestream inputs """ + """Convert to unicode in py2 or to bytes on bytestream inputs. + + """ def __init__(self, fh, encoding): self.fh = fh self.encoding = encoding @@ -1387,7 +1399,7 @@ def fromregex(file, regexp, dtype, encoding=None): Groups in the regular expression correspond to fields in the dtype. dtype : dtype or list of dtypes Dtype for the structured array. - encoding: string, optional + encoding : str, optional Encoding used to decode the inputfile. Does not apply to input streams. .. versionadded:: 1.14.0 @@ -1562,13 +1574,13 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, to read the entire file. .. versionadded:: 1.10.0 - encoding: string, optional - Encoding used to decode the inputfile. Does not apply to input streams. - The special value 'bytes' enables backward compatibility workarounds - that ensures you receive byte arrays as results if possible and passes - latin1 encoded strings to converters. Override this value to receive - unicode arrays and pass strings as input to converters. - If set to None the system default is used. + encoding : str, optional + Encoding used to decode the inputfile. Does not apply when `fname` is + a file object. The special value 'bytes' enables backward compatibility + workarounds that ensure that you receive byte arrays when possible + and passes latin1 encoded strings to converters. Override this value to + receive unicode arrays and pass strings as input to converters. If set + to None the system default is used. The default value is 'bytes'. .. versionadded:: 1.14.0 -- cgit v1.2.1 From d9ca11117f37d48d07818a3aae3641c023454269 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Sun, 19 Nov 2017 13:43:32 -0700 Subject: MAINT: Refactor some code in npyio.py. --- numpy/lib/npyio.py | 78 +++++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 6b65834ed..e4d827334 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -734,7 +734,7 @@ def _getconv(dtype): def floatconv(x): x.lower() if '0x' in x: - return float.fromhex(asstr(x)) + return float.fromhex(x) return float(x) typ = dtype.type @@ -782,13 +782,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, each row will be interpreted as an element of the array. In this case, the number of columns used must match the number of fields in the data-type. - comments : str or sequence, optional + comments : str or sequence of str, optional The characters or list of characters used to indicate the start of a - comment; - default: '#'. + comment. For backwards compatibility, byte strings will be decoded as + 'latin1'. The default is '#'. delimiter : str, optional - The string used to separate values. By default, this is any - whitespace. + The string used to separate values. For backwards compatibility, byte + strings will be decoded as 'latin1'. The default is whitespace. converters : dict, optional A dictionary mapping column number to a function that will convert that column to a float. E.g., if column 0 is a date string: @@ -797,18 +797,15 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. skiprows : int, optional Skip the first `skiprows` lines; default: 0. - usecols : int or sequence, optional Which columns to read, with 0 being the first. For example, usecols = (1,4,5) will extract the 2nd, 5th and 6th columns. The default, None, results in all columns being read. - .. versionadded:: 1.11.0 - - Also when a single column has to be read it is possible to use - an integer instead of a tuple. E.g ``usecols = 3`` reads the - fourth column the same way as `usecols = (3,)`` would. - + .. versionchanged:: 1.11.0 + When a single column has to be read it is possible to use + an integer instead of a tuple. E.g ``usecols = 3`` reads the + fourth column the same way as `usecols = (3,)`` would. unpack : bool, optional If True, the returned array is transposed, so that arguments may be unpacked using ``x, y, z = loadtxt(...)``. When used with a structured @@ -877,12 +874,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if comments is not None: if isinstance(comments, (basestring, bytes)): comments = [comments] - comments = [_decode_line(x) for x in comments] - # Compile regex for comments beforehand comments = (re.escape(comment) for comment in comments) regex_comments = re.compile('|'.join(comments)) + + if delimiter is not None: + delimiter = _decode_line(delimiter) + user_converters = converters if encoding == 'bytes': @@ -1071,7 +1070,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Unused converter specified continue if byte_converters: - # converters may use decode to workaround numpy's oldd behaviour, + # converters may use decode to workaround numpy's old behaviour, # so encode the string again before passing to the user converter def tobytes_first(x, conv): if type(x) is bytes: @@ -1181,9 +1180,11 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', ``numpy.loadtxt``. .. versionadded:: 1.7.0 - encoding : str, optional + encoding : {None, str}, optional Encoding used to encode the outputfile. Does not apply to output - streams. + streams. If the encoding is something other than 'bytes' or 'latin1' + you will not be able to load the file in NumPy versions < 1.14. Default + is 'latin1'. .. versionadded:: 1.14.0 @@ -1908,7 +1909,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if conv is bytes: user_conv = asbytes elif byte_converters: - # converters may use decode to workaround numpy's oldd behaviour, + # converters may use decode to workaround numpy's old behaviour, # so encode the string again before passing to the user converter def tobytes_first(x, conv): if type(x) is bytes: @@ -1927,7 +1928,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, user_converters.update(uc_update) # Fixme: possible error as following variable never used. - #miss_chars = [_.missing_values for _ in converters] + # miss_chars = [_.missing_values for _ in converters] # Initialize the output lists ... # ... rows @@ -2041,39 +2042,38 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, strcolidx = [i for (i, v) in enumerate(column_types) if v == np.unicode_] - typestr = 'U' + type_str = np.unicode_ if byte_converters and strcolidx: # convert strings back to bytes for backward compatibility warnings.warn( - "Reading strings without specifying the encoding argument is " - "deprecated. Set encoding, use None for the system default.", + "Reading unicode strings without specifying the encoding " + "argument is deprecated. Set the encoding, use None for the " + "system default.", np.VisibleDeprecationWarning, stacklevel=2) + def encode_unicode_cols(row_tup): + row = list(row_tup) + for i in strcolidx: + row[i] = row[i].encode('latin1') + return tuple(row) + try: - for j in range(len(data)): - row = list(data[j]) - for i in strcolidx: - row[i] = row[i].encode('latin1') - data[j] = tuple(row) - typestr = 'S' + data = [encode_unicode_cols(r) for r in data] + type_str = np.bytes_ except UnicodeEncodeError: - # we must use unicode, revert encoding - for k in range(0, j + 1): - row = list(data[k]) - for i in strcolidx: - if isinstance(row[i], bytes): - row[i] = row[i].decode('latin1') - data[k] = tuple(row) + pass + # ... and take the largest number of chars. for i in strcolidx: - column_types[i] = "|%s%i" % (typestr, max(len(row[i]) for row in data)) + max_line_length = max(len(row[i]) for row in data) + column_types[i] = np.dtype((type_str, max_line_length)) # if names is None: # If the dtype is uniform, don't define names, else use '' base = set([c.type for c in converters if c._checked]) if len(base) == 1: if strcolidx: - (ddtype, mdtype) = (typestr, bool) + (ddtype, mdtype) = (type_str, bool) else: (ddtype, mdtype) = (list(base)[0], bool) else: @@ -2148,7 +2148,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Try to take care of the missing data we missed names = output.dtype.names if usemask and names: - for (name, conv) in zip(names or (), converters): + for (name, conv) in zip(names, converters): missing_values = [conv(_) for _ in conv.missing_values if _ != ''] for mval in missing_values: -- cgit v1.2.1 From b023d734eeec42a2a1064eaed4de12fd676f1de0 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 10 Dec 2017 10:08:44 -0800 Subject: DEP: Deprecate the pickle aliases * The np.ma functions are misleading, as they do not actually do anything special for ma.array * The np.loads functions doesn't even have numpy-specific documentation, and does not behave consistently with `np.load` * The string overloads of np.ma.load and np.ma.dump do not work well on python 3, as they make assumptions about whether a binary or text pickle file is used (gh-5491) --- numpy/lib/npyio.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index e4d827334..7b51cb9c7 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -30,7 +30,14 @@ else: import cPickle as pickle from future_builtins import map -loads = pickle.loads + +def loads(*args, **kwargs): + # NumPy 1.15.0, 2017-12-10 + warnings.warn( + "np.loads is deprecated, use pickle.loads instead", + DeprecationWarning, stacklevel=2) + return pickle.loads(*args, **kwargs) + __all__ = [ 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt', -- cgit v1.2.1 From de100beb4b53833f817d4ba9c6d940e4cff96d43 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 12 Dec 2017 19:08:43 -0500 Subject: DOC: fix minor typos --- numpy/lib/npyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 7b51cb9c7..66dc68538 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -766,7 +766,7 @@ def _getconv(dtype): else: return asstr -# amount of lines loadtxt reads in one chunk, can be overriden for testing +# amount of lines loadtxt reads in one chunk, can be overridden for testing _loadtxt_chunksize = 50000 def loadtxt(fname, dtype=float, comments='#', delimiter=None, @@ -1542,7 +1542,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, names : {None, True, str, sequence}, optional If `names` is True, the field names are read from the first line after the first `skip_header` lines. This line can optionally be proceeded - by a comment delimeter. If `names` is a sequence or a single-string of + by a comment delimiter. If `names` is a sequence or a single-string of comma-separated names, the names will be used to define the field names in a structured dtype. If `names` is None, the names of the dtype fields will be used, if any. -- cgit v1.2.1 From 976554a3eb4e66ebfeef2e9aace2cf4eb1e83e67 Mon Sep 17 00:00:00 2001 From: Jarrod Millman Date: Tue, 12 Dec 2017 14:02:48 -0800 Subject: DOC: Prepare to host NEPs on GH pages --- numpy/lib/npyio.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 66dc68538..9ee0aaaae 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -477,7 +477,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): ----- For a description of the ``.npy`` format, see the module docstring of `numpy.lib.format` or the NumPy Enhancement Proposal - http://docs.scipy.org/doc/numpy/neps/npy-format.html + http://numpy.github.io/neps/npy-format.html Examples -------- @@ -563,7 +563,7 @@ def savez(file, *args, **kwds): in the archive contains one variable in ``.npy`` format. For a description of the ``.npy`` format, see `numpy.lib.format` or the NumPy Enhancement Proposal - http://docs.scipy.org/doc/numpy/neps/npy-format.html + http://numpy.github.io/neps/npy-format.html When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for @@ -644,7 +644,7 @@ def savez_compressed(file, *args, **kwds): ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable in ``.npy`` format. For a description of the ``.npy`` format, see `numpy.lib.format` or the NumPy Enhancement Proposal - http://docs.scipy.org/doc/numpy/neps/npy-format.html + http://numpy.github.io/neps/npy-format.html When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for -- cgit v1.2.1 From 0a8786163133c4227bfa7dbc3c9a6800172b65f7 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 13 Jan 2018 20:15:32 -0800 Subject: BUG: Resize bytes_ columns in genfromtxt Fixes gh-10394, due to regression in gh-10054 --- numpy/lib/npyio.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index e4d827334..9e979bbe6 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -2042,7 +2042,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, strcolidx = [i for (i, v) in enumerate(column_types) if v == np.unicode_] - type_str = np.unicode_ if byte_converters and strcolidx: # convert strings back to bytes for backward compatibility warnings.warn( @@ -2058,33 +2057,37 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, try: data = [encode_unicode_cols(r) for r in data] - type_str = np.bytes_ except UnicodeEncodeError: pass + else: + for i in strcolidx: + column_types[i] = np.bytes_ + # Update string types to be the right length + sized_column_types = column_types[:] + for i, col_type in enumerate(column_types): + if np.issubdtype(col_type, np.character): + n_chars = max(len(row[i]) for row in data) + sized_column_types[i] = (col_type, n_chars) - # ... and take the largest number of chars. - for i in strcolidx: - max_line_length = max(len(row[i]) for row in data) - column_types[i] = np.dtype((type_str, max_line_length)) - # if names is None: - # If the dtype is uniform, don't define names, else use '' - base = set([c.type for c in converters if c._checked]) + # If the dtype is uniform (before sizing strings) + base = set([ + c_type + for c, c_type in zip(converters, column_types) + if c._checked]) if len(base) == 1: - if strcolidx: - (ddtype, mdtype) = (type_str, bool) - else: - (ddtype, mdtype) = (list(base)[0], bool) + uniform_type, = base + (ddtype, mdtype) = (uniform_type, bool) else: ddtype = [(defaultfmt % i, dt) - for (i, dt) in enumerate(column_types)] + for (i, dt) in enumerate(sized_column_types)] if usemask: mdtype = [(defaultfmt % i, bool) - for (i, dt) in enumerate(column_types)] + for (i, dt) in enumerate(sized_column_types)] else: - ddtype = list(zip(names, column_types)) - mdtype = list(zip(names, [bool] * len(column_types))) + ddtype = list(zip(names, sized_column_types)) + mdtype = list(zip(names, [bool] * len(sized_column_types))) output = np.array(data, dtype=ddtype) if usemask: outputmask = np.array(masks, dtype=mdtype) -- cgit v1.2.1 From ab51f997645c8b1f6a432442c8a999911a8eff25 Mon Sep 17 00:00:00 2001 From: Derrick Williams Date: Sun, 28 Jan 2018 12:35:55 -0800 Subject: DOC: See #10098 and minor punctuation cleanup (#10478) * DOC: See #10098 and minor punctuation cleanup * DOC: Correcting per PR comments --- numpy/lib/npyio.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 9ee0aaaae..588e26b4c 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -797,22 +797,23 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, The string used to separate values. For backwards compatibility, byte strings will be decoded as 'latin1'. The default is whitespace. converters : dict, optional - A dictionary mapping column number to a function that will convert - that column to a float. E.g., if column 0 is a date string: - ``converters = {0: datestr2num}``. Converters can also be used to - provide a default value for missing data (but see also `genfromtxt`): - ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. + A dictionary mapping column number to a function that will parse the + column string into the desired value. E.g., if column 0 is a date + string: ``converters = {0: datestr2num}``. Converters can also be + used to provide a default value for missing data (but see also + `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``. + Default: None. skiprows : int, optional Skip the first `skiprows` lines; default: 0. usecols : int or sequence, optional Which columns to read, with 0 being the first. For example, - usecols = (1,4,5) will extract the 2nd, 5th and 6th columns. + ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. The default, None, results in all columns being read. .. versionchanged:: 1.11.0 When a single column has to be read it is possible to use an integer instead of a tuple. E.g ``usecols = 3`` reads the - fourth column the same way as `usecols = (3,)`` would. + fourth column the same way as ``usecols = (3,)`` would. unpack : bool, optional If True, the returned array is transposed, so that arguments may be unpacked using ``x, y, z = loadtxt(...)``. When used with a structured @@ -827,7 +828,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, Encoding used to decode the inputfile. Does not apply to input streams. The special value 'bytes' enables backward compatibility workarounds that ensures you receive byte arrays as results if possible and passes - latin1 encoded strings to converters. Override this value to receive + 'latin1' encoded strings to converters. Override this value to receive unicode arrays and pass strings as input to converters. If set to None the system default is used. The default value is 'bytes'. -- cgit v1.2.1 From c45e445e9fe6bd264aba5a1736f0145ca7bdacc9 Mon Sep 17 00:00:00 2001 From: Mathieu Sornay Date: Wed, 31 Jan 2018 16:58:56 +0100 Subject: BUG: fromregex: asbytes called on regexp objects When calling fromregex() with a binary stream and a regular expression object, asbytes() was called on the regexp object, resulting in an incorrect regular expression being compiled and used. --- numpy/lib/npyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 9ee0aaaae..02d68bc9e 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1459,9 +1459,9 @@ def fromregex(file, regexp, dtype, encoding=None): dtype = np.dtype(dtype) content = file.read() - if isinstance(content, bytes) and not isinstance(regexp, bytes): + if isinstance(content, bytes) and isinstance(regexp, np.unicode): regexp = asbytes(regexp) - elif not isinstance(content, bytes) and isinstance(regexp, bytes): + elif isinstance(content, np.unicode) and isinstance(regexp, bytes): regexp = asstr(regexp) if not hasattr(regexp, 'match'): -- cgit v1.2.1 From 50fde71f1ac0528f40ee216136b33fde41205ef2 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sat, 17 Feb 2018 14:36:49 -0500 Subject: BUG: break cyclic refs in recursive closures Fixes #10620 --- numpy/lib/npyio.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 096f1a3a4..73613d2a4 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1109,6 +1109,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, finally: if fown: fh.close() + # recursive closures have a cyclic reference to themselves, which + # requires gc to collect (gh-10620). To avoid this problem, for + # performance and PyPy friendliness, we break the cycle: + flatten_dtype_internal = None + pack_items = None if X is None: X = np.array([], dtype) -- cgit v1.2.1 From 14e64281cfe374a9cad476599cbe9b4fa850efb7 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Thu, 8 Feb 2018 20:46:53 -0800 Subject: ENH: Make NpzFile conform to the Mapping protocol This is potentially a breaking change for python 3, because the Mapping protocol changed between python 2 and python 3 - `items()` and `keys()` now return views, not lists. In practice, any user running `2to3` should have found this fixed itself automatically. Also fixes dir(np.lib.npyio.BagObj(dict(a=1))) on python 3. Fixes gh-1723 --- numpy/lib/npyio.py | 59 +++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 27 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 096f1a3a4..ff5e24bef 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -26,9 +26,11 @@ from numpy.compat import ( if sys.version_info[0] >= 3: import pickle + from collections.abc import Mapping else: import cPickle as pickle from future_builtins import map + from collections import Mapping def loads(*args, **kwargs): @@ -92,7 +94,7 @@ class BagObj(object): This also enables tab-completion in an interpreter or IPython. """ - return object.__getattribute__(self, '_obj').keys() + return list(object.__getattribute__(self, '_obj').keys()) def zipfile_factory(file, *args, **kwargs): @@ -110,7 +112,7 @@ def zipfile_factory(file, *args, **kwargs): return zipfile.ZipFile(file, *args, **kwargs) -class NpzFile(object): +class NpzFile(Mapping): """ NpzFile(fid) @@ -216,6 +218,13 @@ class NpzFile(object): def __del__(self): self.close() + # Implement the Mapping ABC + def __iter__(self): + return iter(self.files) + + def __len__(self): + return len(self.files) + def __getitem__(self, key): # FIXME: This seems like it will copy strings around # more than is strictly necessary. The zipfile @@ -225,11 +234,11 @@ class NpzFile(object): # It would be better if the zipfile could read # (or at least uncompress) the data # directly into the array memory. - member = 0 + member = False if key in self._files: - member = 1 + member = True elif key in self.files: - member = 1 + member = True key += '.npy' if member: bytes = self.zip.open(key) @@ -245,31 +254,27 @@ class NpzFile(object): else: raise KeyError("%s is not a file in the archive" % key) - def __iter__(self): - return iter(self.files) - def items(self): - """ - Return a list of tuples, with each tuple (filename, array in file). + if sys.version_info.major == 3: + # deprecate the python 2 dict apis that we supported by accident in + # python 3. We forgot to implement itervalues() at all in earlier + # versions of numpy, so no need to deprecated it here. - """ - return [(f, self[f]) for f in self.files] - - def iteritems(self): - """Generator that returns tuples (filename, array in file).""" - for f in self.files: - yield (f, self[f]) - - def keys(self): - """Return files in the archive with a ``.npy`` extension.""" - return self.files - - def iterkeys(self): - """Return an iterator over the files in the archive.""" - return self.__iter__() + def iteritems(self): + # Numpy 1.15, 2018-02-20 + warnings.warn( + "NpzFile.iteritems is deprecated in python 3, to match the " + "removal of dict.itertems. Use .items() instead.", + DeprecationWarning, stacklevel=2) + return self.items() - def __contains__(self, key): - return self.files.__contains__(key) + def iterkeys(self): + # Numpy 1.15, 2018-02-20 + warnings.warn( + "NpzFile.iterkeys is deprecated in python 3, to match the " + "removal of dict.iterkeys. Use .keys() instead.", + DeprecationWarning, stacklevel=2) + return self.keys() def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, -- cgit v1.2.1 From e97de95d4cae6805ed6c258655e7492a5f2ce863 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 12 Mar 2018 22:47:12 +0000 Subject: Fix low-hanging Pypy compatibility issues (#10737) * TST: skip refcount-requiring tests if sys.refcount is missing * ENH: io: add refcheck=False to a safe .resize() call The array is allocated immediately above, and the resize always succeeds so it is not necessary to check it. Fixes Pypy compatibility. * TST: remove unused code * TST: factor skipif(not HAS_REFCOUNT) into a separate decorator --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 959574594..0f338d781 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1104,7 +1104,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, nshape = list(X.shape) pos = nshape[0] nshape[0] += len(x) - X.resize(nshape) + X.resize(nshape, refcheck=False) X[pos:, ...] = x finally: if fown: -- cgit v1.2.1 From 171eeafa26ce71533ac9f7f6d3585e9ec967442d Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 10 Apr 2018 23:21:20 +0300 Subject: BUG: fix savetxt, loadtxt for '+-' in complex --- numpy/lib/npyio.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 0f338d781..197562818 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -758,7 +758,7 @@ def _getconv(dtype): elif issubclass(typ, np.floating): return floatconv elif issubclass(typ, complex): - return lambda x: complex(asstr(x)) + return lambda x: complex(asstr(x).replace('+-', '-')) elif issubclass(typ, np.bytes_): return asbytes elif issubclass(typ, np.unicode_): @@ -1377,7 +1377,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', for number in row: row2.append(number.real) row2.append(number.imag) - fh.write(format % tuple(row2) + newline) + s = format % tuple(row2) + newline + fh.write(s.replace('+-', '-')) else: for row in X: try: -- cgit v1.2.1 From 8323be1bc44c2811fc36f5b99c1a30ebcee8edbd Mon Sep 17 00:00:00 2001 From: Raunak Shah <32986603+raunaks13@users.noreply.github.com> Date: Tue, 17 Apr 2018 05:12:01 +0000 Subject: BUG: fix crash in numpy.genfromtxt(..., names=True, comments=None) (#10822) Fixes gh-10780 --- numpy/lib/npyio.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 197562818..29688f73d 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1720,7 +1720,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, try: while not first_values: first_line = _decode_line(next(fhd), encoding) - if names is True: + if (names is True) and (comments is not None): if comments in first_line: first_line = ( ''.join(first_line.split(comments)[1:])) @@ -1734,8 +1734,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Should we take the first values as names ? if names is True: fval = first_values[0].strip() - if fval in comments: - del first_values[0] + if comments is not None: + if fval in comments: + del first_values[0] # Check the columns to use: make sure `usecols` is a list if usecols is not None: -- cgit v1.2.1 From df8e83538461c29bc12c44198574bde8ffefcad7 Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 17 Apr 2018 13:46:36 +0300 Subject: DOC: clear up warnings, fix matplotlib plot --- numpy/lib/npyio.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 29688f73d..59379bdda 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1166,13 +1166,14 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. For complex `X`, the legal options for `fmt` are: - a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted - like `' (%s+%sj)' % (fmt, fmt)` - b) a full string specifying every real and imaginary part, e.g. - `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns - c) a list of specifiers, one per column - in this case, the real - and imaginary part must have separate specifiers, - e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns + + * a single specifier, `fmt='%.4e'`, resulting in numbers formatted + like `' (%s+%sj)' % (fmt, fmt)` + * a full string specifying every real and imaginary part, e.g. + `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns + * a list of specifiers, one per column - in this case, the real + and imaginary part must have separate specifiers, + e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns delimiter : str, optional String or character separating columns. newline : str, optional -- cgit v1.2.1 From a043c3ed2a08fc42fd2eff6669a862c5cb045bfc Mon Sep 17 00:00:00 2001 From: "Nicholas Nadeau, P.Eng., AVS" Date: Mon, 23 Apr 2018 16:53:18 -0400 Subject: MAINT, DOC: Fix typos (#10958) * fixed doc typo * fixed lib typos * fixed lapack_lite typos * Revert "fixed lapack_lite typos" This reverts commit e7dada860cb73af190234402508ab79965ecd079. --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 59379bdda..67585443b 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -936,7 +936,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if encoding is not None: fencoding = encoding # we must assume local encoding - # TOOD emit portability warning? + # TODO emit portability warning? elif fencoding is None: import locale fencoding = locale.getpreferredencoding() -- cgit v1.2.1 From 92f85239dad607540a1fa3124e41c7b357caf7fe Mon Sep 17 00:00:00 2001 From: Andras Deak Date: Fri, 27 Apr 2018 14:05:07 +0200 Subject: DOC: Make doc examples using StringIO python2-3 compatible --- numpy/lib/npyio.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 67585443b..97f50b5d8 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -859,18 +859,18 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, Examples -------- >>> from io import StringIO # StringIO behaves like a file object - >>> c = StringIO("0 1\\n2 3") + >>> c = StringIO(u"0 1\\n2 3") >>> np.loadtxt(c) array([[ 0., 1.], [ 2., 3.]]) - >>> d = StringIO("M 21 72\\nF 35 58") + >>> d = StringIO(u"M 21 72\\nF 35 58") >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), ... 'formats': ('S1', 'i4', 'f4')}) array([('M', 21, 72.0), ('F', 35, 58.0)], dtype=[('gender', '|S1'), ('age', '>> c = StringIO("1,0,2\\n3,0,4") + >>> c = StringIO(u"1,0,2\\n3,0,4") >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) >>> x array([ 1., 3.]) @@ -1632,7 +1632,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, Comma delimited file with mixed dtype - >>> s = StringIO("1,1.3,abcde") + >>> s = StringIO(u"1,1.3,abcde") >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), ... ('mystring','S5')], delimiter=",") >>> data @@ -1659,7 +1659,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, An example with fixed-width columns - >>> s = StringIO("11.3abcde") + >>> s = StringIO(u"11.3abcde") >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], ... delimiter=[1,3,5]) >>> data -- cgit v1.2.1 From 406ccc9c574555ebdbda3de6abfc44a833f523e7 Mon Sep 17 00:00:00 2001 From: Hong Xu Date: Sun, 6 May 2018 11:15:49 -0700 Subject: DOC: Add explanation for comments=None in loadtxt. --- numpy/lib/npyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 97f50b5d8..76cc07ff1 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -791,8 +791,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, the data-type. comments : str or sequence of str, optional The characters or list of characters used to indicate the start of a - comment. For backwards compatibility, byte strings will be decoded as - 'latin1'. The default is '#'. + comment. None implies no comments. For backwards compatibility, byte + strings will be decoded as 'latin1'. The default is '#'. delimiter : str, optional The string used to separate values. For backwards compatibility, byte strings will be decoded as 'latin1'. The default is whitespace. -- cgit v1.2.1 From 85282a5dac927ed731655e0a58fb67d2483f18b9 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 May 2018 11:09:05 +0300 Subject: DOC: link to updated module docstring, not NEP --- numpy/lib/npyio.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 76cc07ff1..5d3c1e525 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -475,9 +475,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): Notes ----- - For a description of the ``.npy`` format, see the module docstring - of `numpy.lib.format` or the NumPy Enhancement Proposal - http://numpy.github.io/neps/npy-format.html + For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format` Examples -------- @@ -561,9 +559,7 @@ def savez(file, *args, **kwds): The ``.npz`` file format is a zipped archive of files named after the variables they contain. The archive is not compressed and each file in the archive contains one variable in ``.npy`` format. For a - description of the ``.npy`` format, see `numpy.lib.format` or the - NumPy Enhancement Proposal - http://numpy.github.io/neps/npy-format.html + description of the ``.npy`` format, see :py:mod:`numpy.lib.format` When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for @@ -642,9 +638,9 @@ def savez_compressed(file, *args, **kwds): The ``.npz`` file format is a zipped archive of files named after the variables they contain. The archive is compressed with ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable - in ``.npy`` format. For a description of the ``.npy`` format, see - `numpy.lib.format` or the NumPy Enhancement Proposal - http://numpy.github.io/neps/npy-format.html + in ``.npy`` format. For a description of the ``.npy`` format, see + :py:mod:`numpy.lib.format` + When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for -- cgit v1.2.1 From c759466acbcb2c8ce6cce0ae971ba4ada8055a7a Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 May 2018 20:28:27 +0300 Subject: DOC: create label and ref, fixes broken link --- numpy/lib/npyio.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 5d3c1e525..95804be7f 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -475,7 +475,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): Notes ----- - For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format` + For a description of the ``.npy`` format, see :ref:`binary-serialization`. Examples -------- @@ -559,7 +559,7 @@ def savez(file, *args, **kwds): The ``.npz`` file format is a zipped archive of files named after the variables they contain. The archive is not compressed and each file in the archive contains one variable in ``.npy`` format. For a - description of the ``.npy`` format, see :py:mod:`numpy.lib.format` + description of the ``.npy`` format, see :ref:`binary-serialization`. When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for @@ -639,7 +639,7 @@ def savez_compressed(file, *args, **kwds): variables they contain. The archive is compressed with ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable in ``.npy`` format. For a description of the ``.npy`` format, see - :py:mod:`numpy.lib.format` + :ref:`binary-serialization`. When opening the saved ``.npz`` file with `load` a `NpzFile` object is -- cgit v1.2.1 From 7a01f661cef8fe492cbbf5ed1e2474c11ce0527b Mon Sep 17 00:00:00 2001 From: mattip Date: Mon, 14 May 2018 13:46:00 +0300 Subject: DOC: add numpy.lib.format to docs and link to it --- numpy/lib/npyio.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 95804be7f..b109d65e1 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -475,7 +475,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): Notes ----- - For a description of the ``.npy`` format, see :ref:`binary-serialization`. + For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. Examples -------- @@ -559,7 +559,7 @@ def savez(file, *args, **kwds): The ``.npz`` file format is a zipped archive of files named after the variables they contain. The archive is not compressed and each file in the archive contains one variable in ``.npy`` format. For a - description of the ``.npy`` format, see :ref:`binary-serialization`. + description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for @@ -639,7 +639,7 @@ def savez_compressed(file, *args, **kwds): variables they contain. The archive is compressed with ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable in ``.npy`` format. For a description of the ``.npy`` format, see - :ref:`binary-serialization`. + :py:mod:`numpy.lib.format`. When opening the saved ``.npz`` file with `load` a `NpzFile` object is -- cgit v1.2.1 From 83828f52b287fefb3d8753a21bd3441997a4d687 Mon Sep 17 00:00:00 2001 From: Mike Toews Date: Sat, 16 Jun 2018 18:18:19 +1200 Subject: HTTP -> HTTPS, and other linkrot fixes --- numpy/lib/npyio.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 390927601..7788ac319 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1259,8 +1259,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', References ---------- .. [1] `Format Specification Mini-Language - `_, Python Documentation. + `_, + Python Documentation. Examples -------- @@ -1624,7 +1624,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, References ---------- .. [1] NumPy User Guide, section `I/O with NumPy - `_. + `_. Examples --------- -- cgit v1.2.1 From 4c74e384a7ec961d171b7d6a0fbf20b2bc831c28 Mon Sep 17 00:00:00 2001 From: John Zwinck Date: Sat, 7 Jul 2018 10:14:46 +0800 Subject: BUG: fix np.load() of empty .npz file Fixes #9989 --- numpy/lib/npyio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 7788ac319..d8cfbf769 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -412,12 +412,13 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, try: # Code to distinguish from NumPy binary files and pickles. _ZIP_PREFIX = b'PK\x03\x04' + _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this N = len(format.MAGIC_PREFIX) magic = fid.read(N) # If the file size is less than N, we need to make sure not # to seek past the beginning of the file fid.seek(-min(N, len(magic)), 1) # back-up - if magic.startswith(_ZIP_PREFIX): + if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): # zip-file (assume .npz) # Transfer file ownership to NpzFile tmp = own_fid -- cgit v1.2.1 From 73d7871970a951edd48e5c40bdc7609385ce61e6 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Mon, 17 Sep 2018 09:08:42 +0300 Subject: MAINT: refactor design of recursive closures (#11910) --- numpy/lib/npyio.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index d8cfbf769..9a7b244ac 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -13,6 +13,7 @@ import numpy as np from . import format from ._datasource import DataSource from numpy.core.multiarray import packbits, unpackbits +from numpy.core._internal import recursive from ._iotools import ( LineSplitter, NameValidator, StringConverter, ConverterError, ConverterLockError, ConversionWarning, _is_string_like, @@ -944,7 +945,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, fencoding = locale.getpreferredencoding() # not to be confused with the flatten_dtype we import... - def flatten_dtype_internal(dt): + @recursive + def flatten_dtype_internal(self, dt): """Unpack a structured data-type, and produce re-packing info.""" if dt.names is None: # If the dtype is flattened, return. @@ -964,7 +966,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, packing = [] for field in dt.names: tp, bytes = dt.fields[field] - flat_dt, flat_packing = flatten_dtype_internal(tp) + flat_dt, flat_packing = self(tp) types.extend(flat_dt) # Avoid extra nesting for subarrays if tp.ndim > 0: @@ -973,7 +975,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, packing.append((len(flat_dt), flat_packing)) return (types, packing) - def pack_items(items, packing): + @recursive + def pack_items(self, items, packing): """Pack items into nested lists based on re-packing info.""" if packing is None: return items[0] @@ -985,7 +988,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, start = 0 ret = [] for length, subpacking in packing: - ret.append(pack_items(items[start:start+length], subpacking)) + ret.append(self(items[start:start+length], subpacking)) start += length return tuple(ret) @@ -1111,11 +1114,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, finally: if fown: fh.close() - # recursive closures have a cyclic reference to themselves, which - # requires gc to collect (gh-10620). To avoid this problem, for - # performance and PyPy friendliness, we break the cycle: - flatten_dtype_internal = None - pack_items = None if X is None: X = np.array([], dtype) -- cgit v1.2.1 From 195881ed6e50085ca8b195c96367748ff3563c53 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Thu, 8 Feb 2018 21:26:34 -0800 Subject: BUG: Don't leave files open and dangling if np.load has a bad encoding argument, or the file is an invalid zip --- numpy/lib/npyio.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 9a7b244ac..73cf5554a 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -380,16 +380,6 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, memmap([4, 5, 6]) """ - own_fid = False - if isinstance(file, basestring): - fid = open(file, "rb") - own_fid = True - elif is_pathlib_path(file): - fid = file.open("rb") - own_fid = True - else: - fid = file - if encoding not in ('ASCII', 'latin1', 'bytes'): # The 'encoding' value for pickle also affects what encoding # the serialized binary data of NumPy arrays is loaded @@ -410,6 +400,17 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, # Nothing to do on Python 2 pickle_kwargs = {} + # TODO: Use contextlib.ExitStack once we drop Python 2 + if isinstance(file, basestring): + fid = open(file, "rb") + own_fid = True + elif is_pathlib_path(file): + fid = file.open("rb") + own_fid = True + else: + fid = file + own_fid = False + try: # Code to distinguish from NumPy binary files and pickles. _ZIP_PREFIX = b'PK\x03\x04' @@ -422,10 +423,10 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): # zip-file (assume .npz) # Transfer file ownership to NpzFile - tmp = own_fid + ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) own_fid = False - return NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle, - pickle_kwargs=pickle_kwargs) + return ret elif magic == format.MAGIC_PREFIX: # .npy file if mmap_mode: -- cgit v1.2.1 From 9e86f6311a88a989241fd34da39dcccce08a19dc Mon Sep 17 00:00:00 2001 From: mfkasim91 Date: Sun, 16 Sep 2018 11:29:40 +0100 Subject: ENH: add max_rows kwarg to numpy.loadtxt like numpy.genfromtxt --- numpy/lib/npyio.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index d8cfbf769..6e1d26bb9 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -773,7 +773,7 @@ _loadtxt_chunksize = 50000 def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, - ndmin=0, encoding='bytes'): + ndmin=0, encoding='bytes', max_rows=None): """ Load data from a text file. @@ -835,6 +835,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, the system default is used. The default value is 'bytes'. .. versionadded:: 1.14.0 + max_rows : int, optional + Read `max_rows` lines of content after `skiprows` lines. The default + (`None`) is to read all the lines. + + .. versionadded:: 1.16.0 Returns ------- @@ -1014,7 +1019,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, """ X = [] - for i, line in enumerate(itertools.chain([first_line], fh)): + line_iter = itertools.chain([first_line], fh) + line_iter = itertools.islice(line_iter, max_rows) + for i, line in enumerate(line_iter): vals = split_line(line) if len(vals) == 0: continue -- cgit v1.2.1 From f988dfb6b774d751277e8afcb3a329b871dfc476 Mon Sep 17 00:00:00 2001 From: mfkasim91 Date: Sun, 30 Sep 2018 16:24:26 +0100 Subject: MAINT: No tick for None in `max_rows` docstring for `np.loadtxt` --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 6e1d26bb9..43e744f28 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -837,7 +837,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, .. versionadded:: 1.14.0 max_rows : int, optional Read `max_rows` lines of content after `skiprows` lines. The default - (`None`) is to read all the lines. + (None) is to read all the lines. .. versionadded:: 1.16.0 -- cgit v1.2.1 From 4577a69516bcc0406aaaa48304c8a2cbd82c58c9 Mon Sep 17 00:00:00 2001 From: mfkasim91 Date: Sun, 30 Sep 2018 16:47:40 +0100 Subject: MAINT: Omit None in max_rows for np.loadtxt docstring --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 43e744f28..c4fa6b10d 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -837,7 +837,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, .. versionadded:: 1.14.0 max_rows : int, optional Read `max_rows` lines of content after `skiprows` lines. The default - (None) is to read all the lines. + is to read all the lines. .. versionadded:: 1.16.0 -- cgit v1.2.1 From e07b0fce7844a227fa05e4f20772ec9cc5bf9912 Mon Sep 17 00:00:00 2001 From: Emil Hessman Date: Sun, 30 Sep 2018 19:22:49 +0200 Subject: MAINT: remove unused stdlib imports --- numpy/lib/npyio.py | 1 - 1 file changed, 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 77e007d23..7eb203868 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1,6 +1,5 @@ from __future__ import division, absolute_import, print_function -import io import sys import os import re -- cgit v1.2.1 From 7372f8dcc6af4446e502c0daec3199dace27e863 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 19 Sep 2018 17:07:25 +0200 Subject: MAINT, TST import pickle from numpy.core.numeric All imports of pickle from numpy modules are now done this way: >>> from numpy.core.numeric import pickle Also, some loops on protocol numbers are added over pickle tests that were not caught from #12090 --- numpy/lib/npyio.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 7eb203868..62fc9c5b3 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -23,12 +23,11 @@ from numpy.compat import ( asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, is_pathlib_path ) +from numpy.core.numeric import pickle if sys.version_info[0] >= 3: - import pickle from collections.abc import Mapping else: - import cPickle as pickle from future_builtins import map from collections import Mapping -- cgit v1.2.1 From 489362c0779bd60c688ce87baf9ecd6ac9ccf938 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 12 Oct 2018 07:08:22 -0700 Subject: ENH: Add support for third-party path-like objects by backporting os.fspath --- numpy/lib/npyio.py | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 62fc9c5b3..5e4e8e47f 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -21,7 +21,7 @@ from ._iotools import ( from numpy.compat import ( asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, - is_pathlib_path + os_fspath, os_PathLike ) from numpy.core.numeric import pickle @@ -104,8 +104,8 @@ def zipfile_factory(file, *args, **kwargs): pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile constructor. """ - if is_pathlib_path(file): - file = str(file) + if not hasattr(file, 'read'): + file = os_fspath(file) import zipfile kwargs['allowZip64'] = True return zipfile.ZipFile(file, *args, **kwargs) @@ -399,15 +399,12 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, pickle_kwargs = {} # TODO: Use contextlib.ExitStack once we drop Python 2 - if isinstance(file, basestring): - fid = open(file, "rb") - own_fid = True - elif is_pathlib_path(file): - fid = file.open("rb") - own_fid = True - else: + if hasattr(file, 'read'): fid = file own_fid = False + else: + fid = open(os_fspath(file), "rb") + own_fid = True try: # Code to distinguish from NumPy binary files and pickles. @@ -497,18 +494,14 @@ def save(file, arr, allow_pickle=True, fix_imports=True): """ own_fid = False - if isinstance(file, basestring): + if hasattr(file, 'read'): + fid = file + else: + file = os_fspath(file) if not file.endswith('.npy'): file = file + '.npy' fid = open(file, "wb") own_fid = True - elif is_pathlib_path(file): - if not file.name.endswith('.npy'): - file = file.parent / (file.name + '.npy') - fid = file.open("wb") - own_fid = True - else: - fid = file if sys.version_info[0] >= 3: pickle_kwargs = dict(fix_imports=fix_imports) @@ -673,12 +666,10 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): # component of the so-called standard library. import zipfile - if isinstance(file, basestring): + if not hasattr(file, 'read'): + file = os_fspath(file) if not file.endswith('.npz'): file = file + '.npz' - elif is_pathlib_path(file): - if not file.name.endswith('.npz'): - file = file.parent / (file.name + '.npz') namedict = kwds for i, val in enumerate(args): @@ -926,8 +917,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, fown = False try: - if is_pathlib_path(fname): - fname = str(fname) + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) if _is_string_like(fname): fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) fencoding = getattr(fh, 'encoding', 'latin1') @@ -1315,8 +1306,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', self.write = self.write_bytes own_fh = False - if is_pathlib_path(fname): - fname = str(fname) + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) if _is_string_like(fname): # datasource doesn't support creating a new file ... open(fname, 'wt').close() @@ -1699,8 +1690,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Initialize the filehandle, the LineSplitter and the NameValidator own_fhd = False try: - if is_pathlib_path(fname): - fname = str(fname) + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) if isinstance(fname, basestring): fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding)) own_fhd = True -- cgit v1.2.1 From 73151451437fa6ce0d8b5f033c1e005885f63cf8 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 22 Oct 2018 17:40:08 -0700 Subject: ENH: __array_function__ support for np.lib, part 2/2 (#12119) * ENH: __array_function__ support for np.lib, part 2 xref GH12028 np.lib.npyio through np.lib.ufunclike * Fix failures in numpy/core/tests/test_overrides.py * CLN: handle depreaction in dispatchers for np.lib.ufunclike * CLN: fewer dispatchers in lib.twodim_base * CLN: fewer dispatchers in lib.shape_base * CLN: more dispatcher consolidation * BUG: fix test failure * Use all method instead of function in assert_equal * DOC: indicate n is array_like in scimath.logn * MAINT: updates per review * MAINT: more conservative changes in assert_array_equal * MAINT: add back in comment * MAINT: casting tweaks in assert_array_equal * MAINT: fixes and tests for assert_array_equal on subclasses --- numpy/lib/npyio.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 62fc9c5b3..733795671 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -12,6 +12,7 @@ import numpy as np from . import format from ._datasource import DataSource from numpy.core.multiarray import packbits, unpackbits +from numpy.core.overrides import array_function_dispatch from numpy.core._internal import recursive from ._iotools import ( LineSplitter, NameValidator, StringConverter, ConverterError, @@ -447,6 +448,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, fid.close() +def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None): + return (arr,) + + +@array_function_dispatch(_save_dispatcher) def save(file, arr, allow_pickle=True, fix_imports=True): """ Save an array to a binary file in NumPy ``.npy`` format. @@ -525,6 +531,14 @@ def save(file, arr, allow_pickle=True, fix_imports=True): fid.close() +def _savez_dispatcher(file, *args, **kwds): + for a in args: + yield a + for v in kwds.values(): + yield v + + +@array_function_dispatch(_savez_dispatcher) def savez(file, *args, **kwds): """ Save several arrays into a single file in uncompressed ``.npz`` format. @@ -604,6 +618,14 @@ def savez(file, *args, **kwds): _savez(file, args, kwds, False) +def _savez_compressed_dispatcher(file, *args, **kwds): + for a in args: + yield a + for v in kwds.values(): + yield v + + +@array_function_dispatch(_savez_compressed_dispatcher) def savez_compressed(file, *args, **kwds): """ Save several arrays into a single file in compressed ``.npz`` format. @@ -1154,6 +1176,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, return X +def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None, + header=None, footer=None, comments=None, + encoding=None): + return (X,) + + +@array_function_dispatch(_savetxt_dispatcher) def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None): """ -- cgit v1.2.1 From 4d24bbda32d133d51940b0691bd9b428d4198eaa Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 13 Nov 2018 09:38:07 -0800 Subject: ENH: set correct __module__ for objects in numpy's public API Fixes GH-12271 Tests verify that everything in ``dir(numpy)`` either has ``__module__`` set to ``'numpy'``, or appears in an explicit whitelist of undocumented functions and exported bulitins. These should eventually be documented or removed. I also identified a handful of functions for which I had accidentally not setup dispatch for with ``__array_function__`` before, because they were listed under "ndarray methods" in ``_add_newdocs.py``. I guess that should be a lesson in trusting code comments :). --- numpy/lib/npyio.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 6fbb7e805..1da5b0a25 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -3,6 +3,7 @@ from __future__ import division, absolute_import, print_function import sys import os import re +import functools import itertools import warnings import weakref @@ -11,8 +12,9 @@ from operator import itemgetter, index as opindex import numpy as np from . import format from ._datasource import DataSource +from numpy.core import overrides from numpy.core.multiarray import packbits, unpackbits -from numpy.core.overrides import array_function_dispatch +from numpy.core.overrides import set_module from numpy.core._internal import recursive from ._iotools import ( LineSplitter, NameValidator, StringConverter, ConverterError, @@ -33,6 +35,7 @@ else: from collections import Mapping +@set_module('numpy') def loads(*args, **kwargs): # NumPy 1.15.0, 2017-12-10 warnings.warn( @@ -48,6 +51,10 @@ __all__ = [ ] +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') + + class BagObj(object): """ BagObj(obj) @@ -277,6 +284,7 @@ class NpzFile(Mapping): return self.keys() +@set_module('numpy') def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, encoding='ASCII'): """ @@ -784,6 +792,8 @@ def _getconv(dtype): # amount of lines loadtxt reads in one chunk, can be overridden for testing _loadtxt_chunksize = 50000 + +@set_module('numpy') def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None): @@ -1424,6 +1434,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', fh.close() +@set_module('numpy') def fromregex(file, regexp, dtype, encoding=None): """ Construct an array from a text file, using regular expression parsing. @@ -1522,6 +1533,7 @@ def fromregex(file, regexp, dtype, encoding=None): #####-------------------------------------------------------------------------- +@set_module('numpy') def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, -- cgit v1.2.1 From c4bce96a2588f889b9659d4dd88538466d937fe5 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Mon, 19 Nov 2018 23:49:14 -0500 Subject: MNT: Reword error message about loading pickled data. This double negative is confusing to me. --- numpy/lib/npyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 1da5b0a25..f623c58e7 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -441,8 +441,8 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, else: # Try a pickle if not allow_pickle: - raise ValueError("allow_pickle=False, but file does not contain " - "non-pickled data") + raise ValueError("Cannot load file containing pickled data " + "when allow_pickle=False") try: return pickle.load(fid, **pickle_kwargs) except Exception: -- cgit v1.2.1 From 09992482c93f1b9e28b7958a792e6b3b709834fa Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Sat, 24 Nov 2018 20:59:42 +0100 Subject: Use set litterals --- numpy/lib/npyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index f623c58e7..db6a8e5eb 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -2126,10 +2126,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if names is None: # If the dtype is uniform (before sizing strings) - base = set([ + base = { c_type for c, c_type in zip(converters, column_types) - if c._checked]) + if c._checked} if len(base) == 1: uniform_type, = base (ddtype, mdtype) = (uniform_type, bool) -- cgit v1.2.1 From 250861059b106371cb232456eeccd6d9e97d8f00 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Wed, 14 Nov 2018 11:36:59 -0800 Subject: TST, DOC: enable refguide_check * ported the refguide_check module from SciPy for usage in NumPy docstring execution/ verification; added the refguide_check run to Azure Mac OS CI * adjusted NumPy docstrings such that refguide_check passes --- numpy/lib/npyio.py | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index db6a8e5eb..71261b826 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -168,13 +168,13 @@ class NpzFile(Mapping): >>> x = np.arange(10) >>> y = np.sin(x) >>> np.savez(outfile, x=x, y=y) - >>> outfile.seek(0) + >>> _ = outfile.seek(0) >>> npz = np.load(outfile) >>> isinstance(npz, np.lib.io.NpzFile) True >>> npz.files - ['y', 'x'] + ['x', 'y'] >>> npz['x'] # getitem access array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> npz.f.x # attribute lookup @@ -502,7 +502,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): >>> x = np.arange(10) >>> np.save(outfile, x) - >>> outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file >>> np.load(outfile) array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -597,10 +597,10 @@ def savez(file, *args, **kwds): Using `savez` with \\*args, the arrays are saved with default names. >>> np.savez(outfile, x, y) - >>> outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file >>> npzfile = np.load(outfile) >>> npzfile.files - ['arr_1', 'arr_0'] + ['arr_0', 'arr_1'] >>> npzfile['arr_0'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -608,10 +608,10 @@ def savez(file, *args, **kwds): >>> outfile = TemporaryFile() >>> np.savez(outfile, x=x, y=y) - >>> outfile.seek(0) + >>> _ = outfile.seek(0) >>> npzfile = np.load(outfile) >>> npzfile.files - ['y', 'x'] + ['x', 'y'] >>> npzfile['x'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -891,21 +891,21 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, >>> from io import StringIO # StringIO behaves like a file object >>> c = StringIO(u"0 1\\n2 3") >>> np.loadtxt(c) - array([[ 0., 1.], - [ 2., 3.]]) + array([[0., 1.], + [2., 3.]]) >>> d = StringIO(u"M 21 72\\nF 35 58") >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), ... 'formats': ('S1', 'i4', 'f4')}) - array([('M', 21, 72.0), ('F', 35, 58.0)], - dtype=[('gender', '|S1'), ('age', '>> c = StringIO(u"1,0,2\\n3,0,4") >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) >>> x - array([ 1., 3.]) + array([1., 3.]) >>> y - array([ 2., 4.]) + array([2., 4.]) """ # Type conversions for Py3 convenience @@ -1481,17 +1481,17 @@ def fromregex(file, regexp, dtype, encoding=None): Examples -------- >>> f = open('test.dat', 'w') - >>> f.write("1312 foo\\n1534 bar\\n444 qux") + >>> _ = f.write("1312 foo\\n1534 bar\\n444 qux") >>> f.close() >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] >>> output = np.fromregex('test.dat', regexp, ... [('num', np.int64), ('key', 'S3')]) >>> output - array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], - dtype=[('num', '>> output['num'] - array([1312, 1534, 444], dtype=int64) + array([1312, 1534, 444]) """ own_fh = False @@ -1674,26 +1674,26 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), ... ('mystring','S5')], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '>> s.seek(0) # needed for StringIO example only + >>> _ = s.seek(0) # needed for StringIO example only >>> data = np.genfromtxt(s, dtype=None, ... names = ['myint','myfloat','mystring'], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '>> s.seek(0) + >>> _ = s.seek(0) >>> data = np.genfromtxt(s, dtype="i8,f8,S5", ... names=['myint','myfloat','mystring'], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], ... delimiter=[1,3,5]) >>> data - array((1, 1.3, 'abcde'), - dtype=[('intvar', ' Date: Tue, 4 Dec 2018 12:17:59 -0800 Subject: MAINT: address several reviewer comments --- numpy/lib/npyio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 71261b826..e98c33e29 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -173,7 +173,7 @@ class NpzFile(Mapping): >>> npz = np.load(outfile) >>> isinstance(npz, np.lib.io.NpzFile) True - >>> npz.files + >>> sorted(npz.files) ['x', 'y'] >>> npz['x'] # getitem access array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -610,7 +610,7 @@ def savez(file, *args, **kwds): >>> np.savez(outfile, x=x, y=y) >>> _ = outfile.seek(0) >>> npzfile = np.load(outfile) - >>> npzfile.files + >>> sorted(npzfile.files) ['x', 'y'] >>> npzfile['x'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) -- cgit v1.2.1 From eac2334d2e51a4960b82cc2ed9e47e10f5d767c6 Mon Sep 17 00:00:00 2001 From: Ilja Date: Tue, 18 Dec 2018 11:02:25 +0200 Subject: Clarify skiprows in loadtxt --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index e98c33e29..704fea108 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -829,7 +829,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. skiprows : int, optional - Skip the first `skiprows` lines; default: 0. + Skip the first `skiprows` lines, including comments; default: 0. usecols : int or sequence, optional Which columns to read, with 0 being the first. For example, ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. -- cgit v1.2.1 From 113b28acb921401e22f7c738adfb44d15be79d08 Mon Sep 17 00:00:00 2001 From: Andras Deak Date: Sat, 2 Feb 2019 19:00:40 +0100 Subject: DOC: Add warning about arbitrary code execution to numpy.load Load uses pickle under the hood for object arrays, this is made more visible in the documentation using a warning. See also gh-12759 --- numpy/lib/npyio.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 704fea108..25eadd0bb 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -290,6 +290,12 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, """ Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. + .. warning:: Loading files that contain object arrays uses the ``pickle`` + module, which is not secure against erroneous or maliciously + constructed data. Consider passing ``allow_pickle=False`` to + load data that is known not to contain object arrays for the + safer handling of untrusted sources. + Parameters ---------- file : file-like object, string, or pathlib.Path -- cgit v1.2.1 From b6dc039961768bd5f3a3d7f57e8c396f8fa02815 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Thu, 21 Feb 2019 12:49:33 -0700 Subject: MAINT: Move pickle import to numpy.compat The pickle module was being imported from numpy.core.numeric. It was defined there in order to use pickle5 when available in Python3 and cpickle in Python2. The numpy.compat module seems a better place for that. --- numpy/lib/npyio.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 25eadd0bb..1e43fdb34 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -24,9 +24,8 @@ from ._iotools import ( from numpy.compat import ( asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, - os_fspath, os_PathLike + os_fspath, os_PathLike, pickle ) -from numpy.core.numeric import pickle if sys.version_info[0] >= 3: from collections.abc import Mapping -- cgit v1.2.1 From 32129453da0f9bf0f352adaaff79f884d2bf52bc Mon Sep 17 00:00:00 2001 From: mattip Date: Sat, 16 Mar 2019 21:46:59 +0200 Subject: DEV: cleanup imports and some assignments (from LGTM) --- numpy/lib/npyio.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 1e43fdb34..d702859fa 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -23,8 +23,7 @@ from ._iotools import ( ) from numpy.compat import ( - asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, - os_fspath, os_PathLike, pickle + asbytes, asstr, asunicode, bytes, basestring, os_fspath, os_PathLike, pickle ) if sys.version_info[0] >= 3: @@ -1123,7 +1122,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if type(x) is bytes: return conv(x) return conv(x.encode("latin1")) - import functools converters[i] = functools.partial(tobytes_first, conv=conv) else: converters[i] = conv @@ -1974,7 +1972,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if type(x) is bytes: return conv(x) return conv(x.encode("latin1")) - import functools user_conv = functools.partial(tobytes_first, conv=conv) else: user_conv = conv -- cgit v1.2.1 From efdd3f50bd5a1e58b815ad8f82d896f4e72ae2b5 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Thu, 11 Apr 2019 01:47:58 -0700 Subject: BUG: Fix crash when calling savetxt on a padded array As a general rule, _every_ use of `.descr` is broken. Fixes #13297 --- numpy/lib/npyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index d702859fa..beeba1334 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1379,7 +1379,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', # Complex dtype -- each field indicates a separate column else: - ncol = len(X.dtype.descr) + ncol = len(X.dtype.names) else: ncol = X.shape[1] -- cgit v1.2.1 From 87c126d794ac01d6a874ce345ab7d31f08ff1964 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Thu, 11 Apr 2019 21:45:39 -0700 Subject: BUG: Close files if an error occurs in genfromtxt Mostly indentation changes Fixes gh-13200 --- numpy/lib/npyio.py | 539 +++++++++++++++++++++++++++-------------------------- 1 file changed, 270 insertions(+), 269 deletions(-) (limited to 'numpy/lib/npyio.py') diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index beeba1334..d6d2a0c6c 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -7,6 +7,7 @@ import functools import itertools import warnings import weakref +import contextlib from operator import itemgetter, index as opindex import numpy as np @@ -23,7 +24,8 @@ from ._iotools import ( ) from numpy.compat import ( - asbytes, asstr, asunicode, bytes, basestring, os_fspath, os_PathLike, pickle + asbytes, asstr, asunicode, bytes, basestring, os_fspath, os_PathLike, + pickle, contextlib_nullcontext ) if sys.version_info[0] >= 3: @@ -1732,300 +1734,299 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, byte_converters = False # Initialize the filehandle, the LineSplitter and the NameValidator - own_fhd = False try: if isinstance(fname, os_PathLike): fname = os_fspath(fname) if isinstance(fname, basestring): - fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding)) - own_fhd = True + fid = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fid_ctx = contextlib.closing(fid) else: - fhd = iter(fname) + fid = fname + fid_ctx = contextlib_nullcontext(fid) + fhd = iter(fid) except TypeError: raise TypeError( "fname must be a string, filehandle, list of strings, " "or generator. Got %s instead." % type(fname)) - split_line = LineSplitter(delimiter=delimiter, comments=comments, - autostrip=autostrip, encoding=encoding) - validate_names = NameValidator(excludelist=excludelist, - deletechars=deletechars, - case_sensitive=case_sensitive, - replace_space=replace_space) + with fid_ctx: + split_line = LineSplitter(delimiter=delimiter, comments=comments, + autostrip=autostrip, encoding=encoding) + validate_names = NameValidator(excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) - # Skip the first `skip_header` rows - for i in range(skip_header): - next(fhd) + # Skip the first `skip_header` rows + for i in range(skip_header): + next(fhd) - # Keep on until we find the first valid values - first_values = None - try: - while not first_values: - first_line = _decode_line(next(fhd), encoding) - if (names is True) and (comments is not None): - if comments in first_line: - first_line = ( - ''.join(first_line.split(comments)[1:])) - first_values = split_line(first_line) - except StopIteration: - # return an empty array if the datafile is empty - first_line = '' - first_values = [] - warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) - - # Should we take the first values as names ? - if names is True: - fval = first_values[0].strip() - if comments is not None: - if fval in comments: - del first_values[0] - - # Check the columns to use: make sure `usecols` is a list - if usecols is not None: + # Keep on until we find the first valid values + first_values = None try: - usecols = [_.strip() for _ in usecols.split(",")] - except AttributeError: + while not first_values: + first_line = _decode_line(next(fhd), encoding) + if (names is True) and (comments is not None): + if comments in first_line: + first_line = ( + ''.join(first_line.split(comments)[1:])) + first_values = split_line(first_line) + except StopIteration: + # return an empty array if the datafile is empty + first_line = '' + first_values = [] + warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) + + # Should we take the first values as names ? + if names is True: + fval = first_values[0].strip() + if comments is not None: + if fval in comments: + del first_values[0] + + # Check the columns to use: make sure `usecols` is a list + if usecols is not None: try: - usecols = list(usecols) - except TypeError: - usecols = [usecols, ] - nbcols = len(usecols or first_values) - - # Check the names and overwrite the dtype.names if needed - if names is True: - names = validate_names([str(_.strip()) for _ in first_values]) - first_line = '' - elif _is_string_like(names): - names = validate_names([_.strip() for _ in names.split(',')]) - elif names: - names = validate_names(names) - # Get the dtype - if dtype is not None: - dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, - excludelist=excludelist, - deletechars=deletechars, - case_sensitive=case_sensitive, - replace_space=replace_space) - # Make sure the names is a list (for 2.5) - if names is not None: - names = list(names) - - if usecols: - for (i, current) in enumerate(usecols): - # if usecols is a list of names, convert to a list of indices - if _is_string_like(current): - usecols[i] = names.index(current) - elif current < 0: - usecols[i] = current + len(first_values) - # If the dtype is not None, make sure we update it - if (dtype is not None) and (len(dtype) > nbcols): - descr = dtype.descr - dtype = np.dtype([descr[_] for _ in usecols]) - names = list(dtype.names) - # If `names` is not None, update the names - elif (names is not None) and (len(names) > nbcols): - names = [names[_] for _ in usecols] - elif (names is not None) and (dtype is not None): - names = list(dtype.names) - - # Process the missing values ............................... - # Rename missing_values for convenience - user_missing_values = missing_values or () - if isinstance(user_missing_values, bytes): - user_missing_values = user_missing_values.decode('latin1') - - # Define the list of missing_values (one column: one list) - missing_values = [list(['']) for _ in range(nbcols)] - - # We have a dictionary: process it field by field - if isinstance(user_missing_values, dict): - # Loop on the items - for (key, val) in user_missing_values.items(): - # Is the key a string ? - if _is_string_like(key): + usecols = [_.strip() for _ in usecols.split(",")] + except AttributeError: try: - # Transform it into an integer - key = names.index(key) - except ValueError: - # We couldn't find it: the name must have been dropped - continue - # Redefine the key as needed if it's a column number - if usecols: - try: - key = usecols.index(key) - except ValueError: - pass - # Transform the value as a list of string - if isinstance(val, (list, tuple)): - val = [str(_) for _ in val] + usecols = list(usecols) + except TypeError: + usecols = [usecols, ] + nbcols = len(usecols or first_values) + + # Check the names and overwrite the dtype.names if needed + if names is True: + names = validate_names([str(_.strip()) for _ in first_values]) + first_line = '' + elif _is_string_like(names): + names = validate_names([_.strip() for _ in names.split(',')]) + elif names: + names = validate_names(names) + # Get the dtype + if dtype is not None: + dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, + excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) + # Make sure the names is a list (for 2.5) + if names is not None: + names = list(names) + + if usecols: + for (i, current) in enumerate(usecols): + # if usecols is a list of names, convert to a list of indices + if _is_string_like(current): + usecols[i] = names.index(current) + elif current < 0: + usecols[i] = current + len(first_values) + # If the dtype is not None, make sure we update it + if (dtype is not None) and (len(dtype) > nbcols): + descr = dtype.descr + dtype = np.dtype([descr[_] for _ in usecols]) + names = list(dtype.names) + # If `names` is not None, update the names + elif (names is not None) and (len(names) > nbcols): + names = [names[_] for _ in usecols] + elif (names is not None) and (dtype is not None): + names = list(dtype.names) + + # Process the missing values ............................... + # Rename missing_values for convenience + user_missing_values = missing_values or () + if isinstance(user_missing_values, bytes): + user_missing_values = user_missing_values.decode('latin1') + + # Define the list of missing_values (one column: one list) + missing_values = [list(['']) for _ in range(nbcols)] + + # We have a dictionary: process it field by field + if isinstance(user_missing_values, dict): + # Loop on the items + for (key, val) in user_missing_values.items(): + # Is the key a string ? + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped + continue + # Redefine the key as needed if it's a column number + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Transform the value as a list of string + if isinstance(val, (list, tuple)): + val = [str(_) for _ in val] + else: + val = [str(val), ] + # Add the value(s) to the current list of missing + if key is None: + # None acts as default + for miss in missing_values: + miss.extend(val) + else: + missing_values[key].extend(val) + # We have a sequence : each item matches a column + elif isinstance(user_missing_values, (list, tuple)): + for (value, entry) in zip(user_missing_values, missing_values): + value = str(value) + if value not in entry: + entry.append(value) + # We have a string : apply it to all entries + elif isinstance(user_missing_values, basestring): + user_value = user_missing_values.split(",") + for entry in missing_values: + entry.extend(user_value) + # We have something else: apply it to all entries + else: + for entry in missing_values: + entry.extend([str(user_missing_values)]) + + # Process the filling_values ............................... + # Rename the input for convenience + user_filling_values = filling_values + if user_filling_values is None: + user_filling_values = [] + # Define the default + filling_values = [None] * nbcols + # We have a dictionary : update each entry individually + if isinstance(user_filling_values, dict): + for (key, val) in user_filling_values.items(): + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped, + continue + # Redefine the key if it's a column number and usecols is defined + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Add the value to the list + filling_values[key] = val + # We have a sequence : update on a one-to-one basis + elif isinstance(user_filling_values, (list, tuple)): + n = len(user_filling_values) + if (n <= nbcols): + filling_values[:n] = user_filling_values else: - val = [str(val), ] - # Add the value(s) to the current list of missing - if key is None: - # None acts as default - for miss in missing_values: - miss.extend(val) + filling_values = user_filling_values[:nbcols] + # We have something else : use it for all entries + else: + filling_values = [user_filling_values] * nbcols + + # Initialize the converters ................................ + if dtype is None: + # Note: we can't use a [...]*nbcols, as we would have 3 times the same + # ... converter, instead of 3 different converters. + converters = [StringConverter(None, missing_values=miss, default=fill) + for (miss, fill) in zip(missing_values, filling_values)] + else: + dtype_flat = flatten_dtype(dtype, flatten_base=True) + # Initialize the converters + if len(dtype_flat) > 1: + # Flexible type : get a converter from each dtype + zipit = zip(dtype_flat, missing_values, filling_values) + converters = [StringConverter(dt, locked=True, + missing_values=miss, default=fill) + for (dt, miss, fill) in zipit] else: - missing_values[key].extend(val) - # We have a sequence : each item matches a column - elif isinstance(user_missing_values, (list, tuple)): - for (value, entry) in zip(user_missing_values, missing_values): - value = str(value) - if value not in entry: - entry.append(value) - # We have a string : apply it to all entries - elif isinstance(user_missing_values, basestring): - user_value = user_missing_values.split(",") - for entry in missing_values: - entry.extend(user_value) - # We have something else: apply it to all entries - else: - for entry in missing_values: - entry.extend([str(user_missing_values)]) - - # Process the filling_values ............................... - # Rename the input for convenience - user_filling_values = filling_values - if user_filling_values is None: - user_filling_values = [] - # Define the default - filling_values = [None] * nbcols - # We have a dictionary : update each entry individually - if isinstance(user_filling_values, dict): - for (key, val) in user_filling_values.items(): - if _is_string_like(key): + # Set to a default converter (but w/ different missing values) + zipit = zip(missing_values, filling_values) + converters = [StringConverter(dtype, locked=True, + missing_values=miss, default=fill) + for (miss, fill) in zipit] + # Update the converters to use the user-defined ones + uc_update = [] + for (j, conv) in user_converters.items(): + # If the converter is specified by column names, use the index instead + if _is_string_like(j): try: - # Transform it into an integer - key = names.index(key) + j = names.index(j) + i = j except ValueError: - # We couldn't find it: the name must have been dropped, continue - # Redefine the key if it's a column number and usecols is defined - if usecols: + elif usecols: try: - key = usecols.index(key) + i = usecols.index(j) except ValueError: - pass - # Add the value to the list - filling_values[key] = val - # We have a sequence : update on a one-to-one basis - elif isinstance(user_filling_values, (list, tuple)): - n = len(user_filling_values) - if (n <= nbcols): - filling_values[:n] = user_filling_values - else: - filling_values = user_filling_values[:nbcols] - # We have something else : use it for all entries - else: - filling_values = [user_filling_values] * nbcols - - # Initialize the converters ................................ - if dtype is None: - # Note: we can't use a [...]*nbcols, as we would have 3 times the same - # ... converter, instead of 3 different converters. - converters = [StringConverter(None, missing_values=miss, default=fill) - for (miss, fill) in zip(missing_values, filling_values)] - else: - dtype_flat = flatten_dtype(dtype, flatten_base=True) - # Initialize the converters - if len(dtype_flat) > 1: - # Flexible type : get a converter from each dtype - zipit = zip(dtype_flat, missing_values, filling_values) - converters = [StringConverter(dt, locked=True, - missing_values=miss, default=fill) - for (dt, miss, fill) in zipit] - else: - # Set to a default converter (but w/ different missing values) - zipit = zip(missing_values, filling_values) - converters = [StringConverter(dtype, locked=True, - missing_values=miss, default=fill) - for (miss, fill) in zipit] - # Update the converters to use the user-defined ones - uc_update = [] - for (j, conv) in user_converters.items(): - # If the converter is specified by column names, use the index instead - if _is_string_like(j): - try: - j = names.index(j) + # Unused converter specified + continue + else: i = j - except ValueError: - continue - elif usecols: - try: - i = usecols.index(j) - except ValueError: - # Unused converter specified + # Find the value to test - first_line is not filtered by usecols: + if len(first_line): + testing_value = first_values[j] + else: + testing_value = None + if conv is bytes: + user_conv = asbytes + elif byte_converters: + # converters may use decode to workaround numpy's old behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + user_conv = functools.partial(tobytes_first, conv=conv) + else: + user_conv = conv + converters[i].update(user_conv, locked=True, + testing_value=testing_value, + default=filling_values[i], + missing_values=missing_values[i],) + uc_update.append((i, user_conv)) + # Make sure we have the corrected keys in user_converters... + user_converters.update(uc_update) + + # Fixme: possible error as following variable never used. + # miss_chars = [_.missing_values for _ in converters] + + # Initialize the output lists ... + # ... rows + rows = [] + append_to_rows = rows.append + # ... masks + if usemask: + masks = [] + append_to_masks = masks.append + # ... invalid + invalid = [] + append_to_invalid = invalid.append + + # Parse each line + for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): + values = split_line(line) + nbvalues = len(values) + # Skip an empty line + if nbvalues == 0: continue - else: - i = j - # Find the value to test - first_line is not filtered by usecols: - if len(first_line): - testing_value = first_values[j] - else: - testing_value = None - if conv is bytes: - user_conv = asbytes - elif byte_converters: - # converters may use decode to workaround numpy's old behaviour, - # so encode the string again before passing to the user converter - def tobytes_first(x, conv): - if type(x) is bytes: - return conv(x) - return conv(x.encode("latin1")) - user_conv = functools.partial(tobytes_first, conv=conv) - else: - user_conv = conv - converters[i].update(user_conv, locked=True, - testing_value=testing_value, - default=filling_values[i], - missing_values=missing_values[i],) - uc_update.append((i, user_conv)) - # Make sure we have the corrected keys in user_converters... - user_converters.update(uc_update) - - # Fixme: possible error as following variable never used. - # miss_chars = [_.missing_values for _ in converters] - - # Initialize the output lists ... - # ... rows - rows = [] - append_to_rows = rows.append - # ... masks - if usemask: - masks = [] - append_to_masks = masks.append - # ... invalid - invalid = [] - append_to_invalid = invalid.append - - # Parse each line - for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): - values = split_line(line) - nbvalues = len(values) - # Skip an empty line - if nbvalues == 0: - continue - if usecols: - # Select only the columns we need - try: - values = [values[_] for _ in usecols] - except IndexError: + if usecols: + # Select only the columns we need + try: + values = [values[_] for _ in usecols] + except IndexError: + append_to_invalid((i + skip_header + 1, nbvalues)) + continue + elif nbvalues != nbcols: append_to_invalid((i + skip_header + 1, nbvalues)) continue - elif nbvalues != nbcols: - append_to_invalid((i + skip_header + 1, nbvalues)) - continue - # Store the values - append_to_rows(tuple(values)) - if usemask: - append_to_masks(tuple([v.strip() in m - for (v, m) in zip(values, - missing_values)])) - if len(rows) == max_rows: - break - - if own_fhd: - fhd.close() + # Store the values + append_to_rows(tuple(values)) + if usemask: + append_to_masks(tuple([v.strip() in m + for (v, m) in zip(values, + missing_values)])) + if len(rows) == max_rows: + break # Upgrade the converters (if needed) if dtype is None: -- cgit v1.2.1