summaryrefslogtreecommitdiff
path: root/numpy/lib/npyio.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/npyio.py')
-rw-r--r--numpy/lib/npyio.py58
1 files changed, 31 insertions, 27 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index d8cfbf769..62fc9c5b3 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1,6 +1,5 @@
from __future__ import division, absolute_import, print_function
-import io
import sys
import os
import re
@@ -13,6 +12,7 @@ import numpy as np
from . import format
from ._datasource import DataSource
from numpy.core.multiarray import packbits, unpackbits
+from numpy.core._internal import recursive
from ._iotools import (
LineSplitter, NameValidator, StringConverter, ConverterError,
ConverterLockError, ConversionWarning, _is_string_like,
@@ -23,12 +23,11 @@ from numpy.compat import (
asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode,
is_pathlib_path
)
+from numpy.core.numeric import pickle
if sys.version_info[0] >= 3:
- import pickle
from collections.abc import Mapping
else:
- import cPickle as pickle
from future_builtins import map
from collections import Mapping
@@ -379,16 +378,6 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
memmap([4, 5, 6])
"""
- own_fid = False
- if isinstance(file, basestring):
- fid = open(file, "rb")
- own_fid = True
- elif is_pathlib_path(file):
- fid = file.open("rb")
- own_fid = True
- else:
- fid = file
-
if encoding not in ('ASCII', 'latin1', 'bytes'):
# The 'encoding' value for pickle also affects what encoding
# the serialized binary data of NumPy arrays is loaded
@@ -409,6 +398,17 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
# Nothing to do on Python 2
pickle_kwargs = {}
+ # TODO: Use contextlib.ExitStack once we drop Python 2
+ if isinstance(file, basestring):
+ fid = open(file, "rb")
+ own_fid = True
+ elif is_pathlib_path(file):
+ fid = file.open("rb")
+ own_fid = True
+ else:
+ fid = file
+ own_fid = False
+
try:
# Code to distinguish from NumPy binary files and pickles.
_ZIP_PREFIX = b'PK\x03\x04'
@@ -421,10 +421,10 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
# zip-file (assume .npz)
# Transfer file ownership to NpzFile
- tmp = own_fid
+ ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
+ pickle_kwargs=pickle_kwargs)
own_fid = False
- return NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle,
- pickle_kwargs=pickle_kwargs)
+ return ret
elif magic == format.MAGIC_PREFIX:
# .npy file
if mmap_mode:
@@ -773,7 +773,7 @@ _loadtxt_chunksize = 50000
def loadtxt(fname, dtype=float, comments='#', delimiter=None,
converters=None, skiprows=0, usecols=None, unpack=False,
- ndmin=0, encoding='bytes'):
+ ndmin=0, encoding='bytes', max_rows=None):
"""
Load data from a text file.
@@ -835,6 +835,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
the system default is used. The default value is 'bytes'.
.. versionadded:: 1.14.0
+ max_rows : int, optional
+ Read `max_rows` lines of content after `skiprows` lines. The default
+ is to read all the lines.
+
+ .. versionadded:: 1.16.0
Returns
-------
@@ -944,7 +949,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
fencoding = locale.getpreferredencoding()
# not to be confused with the flatten_dtype we import...
- def flatten_dtype_internal(dt):
+ @recursive
+ def flatten_dtype_internal(self, dt):
"""Unpack a structured data-type, and produce re-packing info."""
if dt.names is None:
# If the dtype is flattened, return.
@@ -964,7 +970,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
packing = []
for field in dt.names:
tp, bytes = dt.fields[field]
- flat_dt, flat_packing = flatten_dtype_internal(tp)
+ flat_dt, flat_packing = self(tp)
types.extend(flat_dt)
# Avoid extra nesting for subarrays
if tp.ndim > 0:
@@ -973,7 +979,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
packing.append((len(flat_dt), flat_packing))
return (types, packing)
- def pack_items(items, packing):
+ @recursive
+ def pack_items(self, items, packing):
"""Pack items into nested lists based on re-packing info."""
if packing is None:
return items[0]
@@ -985,7 +992,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
start = 0
ret = []
for length, subpacking in packing:
- ret.append(pack_items(items[start:start+length], subpacking))
+ ret.append(self(items[start:start+length], subpacking))
start += length
return tuple(ret)
@@ -1014,7 +1021,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
"""
X = []
- for i, line in enumerate(itertools.chain([first_line], fh)):
+ line_iter = itertools.chain([first_line], fh)
+ line_iter = itertools.islice(line_iter, max_rows)
+ for i, line in enumerate(line_iter):
vals = split_line(line)
if len(vals) == 0:
continue
@@ -1111,11 +1120,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
finally:
if fown:
fh.close()
- # recursive closures have a cyclic reference to themselves, which
- # requires gc to collect (gh-10620). To avoid this problem, for
- # performance and PyPy friendliness, we break the cycle:
- flatten_dtype_internal = None
- pack_items = None
if X is None:
X = np.array([], dtype)