summaryrefslogtreecommitdiff
path: root/numpy/lib/format.py
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2013-06-12 07:41:37 -0700
committerCharles Harris <charlesr.harris@gmail.com>2013-06-12 07:41:37 -0700
commitf7ea47401ebd4cee5853c62ba3c849c9fdeaea46 (patch)
tree53bf3a37778166b1c3f6b4b16f62a04469e0e85f /numpy/lib/format.py
parentd4b4ff038d536500e4bfd16f164d88a1a99f5ac3 (diff)
parent7c4e9e14c473060595271a856b307bbc04f1c7bb (diff)
downloadnumpy-f7ea47401ebd4cee5853c62ba3c849c9fdeaea46.tar.gz
Merge pull request #2942 from btel/npz_optimize
BUG: Fix loading npz files >2GB on 64bit systems
Diffstat (limited to 'numpy/lib/format.py')
-rw-r--r--numpy/lib/format.py20
1 files changed, 17 insertions, 3 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 81e8cd010..ff3b95d6e 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -148,6 +148,7 @@ else:
MAGIC_PREFIX = asbytes('\x93NUMPY')
MAGIC_LEN = len(MAGIC_PREFIX) + 2
+BUFFER_SIZE = 2 ** 18 #size of buffer for reading npz files in bytes
def magic(major, minor):
""" Return the magic string for the given file format version.
@@ -457,9 +458,22 @@ def read_array(fp):
else:
# This is not a real file. We have to read it the memory-intensive
# way.
- # XXX: we can probably chunk this to avoid the memory hit.
- data = fp.read(int(count * dtype.itemsize))
- array = numpy.fromstring(data, dtype=dtype, count=count)
+ # crc32 module fails on reads greater than 2 ** 32 bytes, breaking
+ # large reads from gzip streams. Chunk reads to BUFFER_SIZE bytes to
+ # avoid issue and reduce memory overhead of the read. In
+ # non-chunked case count < max_read_count, so only one read is
+ # performed.
+
+ max_read_count = BUFFER_SIZE // dtype.itemsize
+
+ array = numpy.empty(count, dtype=dtype)
+
+ for i in range(0, count, max_read_count):
+ read_count = min(max_read_count, count - i)
+
+ data = fp.read(int(read_count * dtype.itemsize))
+ array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype,
+ count=read_count)
if fortran_order:
array.shape = shape[::-1]