adjust the optimal IO buffer size for npz files

author: Bartosz Telenczuk <muchatel@poczta.fm> 2013-01-31 15:21:14 +0100
committer: Bartosz Telenczuk <muchatel@poczta.fm> 2013-06-12 15:08:51 +0200
commit: 7c4e9e14c473060595271a856b307bbc04f1c7bb (patch)
tree: 53bf3a37778166b1c3f6b4b16f62a04469e0e85f /numpy/lib
parent: b69c48d34d6b6d9be01f37bd5117e946e2556df8 (diff)
download: numpy-7c4e9e14c473060595271a856b307bbc04f1c7bb.tar.gz
3 files changed, 14 insertions, 15 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index de84d2820..ff3b95d6e 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -148,6 +148,7 @@ else:
 
 MAGIC_PREFIX = asbytes('\x93NUMPY')
 MAGIC_LEN = len(MAGIC_PREFIX) + 2
+BUFFER_SIZE = 2 ** 18 #size of buffer for reading npz files in bytes
 
 def magic(major, minor):
     """ Return the magic string for the given file format version.
@@ -457,20 +458,22 @@ def read_array(fp):
         else:
             # This is not a real file. We have to read it the memory-intensive
             # way.
-            # crc32 module fails on reads greater than 2 ** 32 bytes, breaking large reads from gzip streams
-            # Chunk reads to 256mb to avoid issue and reduce memory overhead of the read.
-            # In non-chunked case count < max_read_count, so only one read is performed.
+            # crc32 module fails on reads greater than 2 ** 32 bytes, breaking
+            # large reads from gzip streams. Chunk reads to BUFFER_SIZE bytes to
+            # avoid issue and reduce memory overhead of the read. In
+            # non-chunked case count < max_read_count, so only one read is
+            # performed.
 
-            max_buffer_size = 2 ** 28
-            max_read_count = max_buffer_size / dtype.itemsize
+            max_read_count = BUFFER_SIZE // dtype.itemsize
 
             array = numpy.empty(count, dtype=dtype)
 
-            for i in xrange(0, count, max_read_count):
-                read_count = max_read_count if i + max_read_count < count else count - i
+            for i in range(0, count, max_read_count):
+                read_count = min(max_read_count, count - i)
 
                 data = fp.read(int(read_count * dtype.itemsize))
-                array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype, count=read_count)
+                array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype,
+                                                         count=read_count)
 
         if fortran_order:
             array.shape = shape[::-1]
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index d400b4d30..ba26fc26c 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -25,8 +25,6 @@ from numpy.compat import (
         asbytes, asstr, asbytes_nested, bytes, basestring, unicode
         )
 
-from io import BytesIO
-
 if sys.version_info[0] >= 3:
     import pickle
 else:
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index aae95ed86..20fc5b665 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -43,9 +43,8 @@ class TextIO(BytesIO):
 
 
 MAJVER, MINVER = sys.version_info[:2]
+IS_64BIT = sys.maxsize > 2**32
 
-def is_64bit_platform():
-    return sys.maxsize> 2**32
 
 def strptime(s, fmt=None):
     """This function is available in the datetime module only
@@ -142,11 +141,10 @@ class TestSavezLoad(RoundtripTest, TestCase):
         for n, arr in enumerate(self.arr):
             assert_equal(arr, self.arr_reloaded['arr_%d' % n])
 
-
-    @np.testing.dec.skipif(not is_64bit_platform(), "Works only with 64bit systems")
+    @np.testing.dec.skipif(not IS_64BIT, "Works only with 64bit systems")
     @np.testing.dec.slow
     def test_big_arrays(self):
-        L = 2**31+1
+        L = (1 << 31) + 100000
         tmp = mktemp(suffix='.npz')
         a = np.empty(L, dtype=np.uint8)
         np.savez(tmp, a=a)
author	Bartosz Telenczuk <muchatel@poczta.fm>	2013-01-31 15:21:14 +0100
committer	Bartosz Telenczuk <muchatel@poczta.fm>	2013-06-12 15:08:51 +0200
commit	7c4e9e14c473060595271a856b307bbc04f1c7bb (patch)
tree	53bf3a37778166b1c3f6b4b16f62a04469e0e85f /numpy/lib
parent	b69c48d34d6b6d9be01f37bd5117e946e2556df8 (diff)
download	numpy-7c4e9e14c473060595271a856b307bbc04f1c7bb.tar.gz