diff options
| author | Bartosz Telenczuk <muchatel@poczta.fm> | 2013-01-31 15:21:14 +0100 |
|---|---|---|
| committer | Bartosz Telenczuk <muchatel@poczta.fm> | 2013-06-12 15:08:51 +0200 |
| commit | 7c4e9e14c473060595271a856b307bbc04f1c7bb (patch) | |
| tree | 53bf3a37778166b1c3f6b4b16f62a04469e0e85f /numpy/lib | |
| parent | b69c48d34d6b6d9be01f37bd5117e946e2556df8 (diff) | |
| download | numpy-7c4e9e14c473060595271a856b307bbc04f1c7bb.tar.gz | |
adjust the optimal IO buffer size for npz files
Diffstat (limited to 'numpy/lib')
| -rw-r--r-- | numpy/lib/format.py | 19 | ||||
| -rw-r--r-- | numpy/lib/npyio.py | 2 | ||||
| -rw-r--r-- | numpy/lib/tests/test_io.py | 8 |
3 files changed, 14 insertions, 15 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py index de84d2820..ff3b95d6e 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -148,6 +148,7 @@ else: MAGIC_PREFIX = asbytes('\x93NUMPY') MAGIC_LEN = len(MAGIC_PREFIX) + 2 +BUFFER_SIZE = 2 ** 18 #size of buffer for reading npz files in bytes def magic(major, minor): """ Return the magic string for the given file format version. @@ -457,20 +458,22 @@ def read_array(fp): else: # This is not a real file. We have to read it the memory-intensive # way. - # crc32 module fails on reads greater than 2 ** 32 bytes, breaking large reads from gzip streams - # Chunk reads to 256mb to avoid issue and reduce memory overhead of the read. - # In non-chunked case count < max_read_count, so only one read is performed. + # crc32 module fails on reads greater than 2 ** 32 bytes, breaking + # large reads from gzip streams. Chunk reads to BUFFER_SIZE bytes to + # avoid issue and reduce memory overhead of the read. In + # non-chunked case count < max_read_count, so only one read is + # performed. - max_buffer_size = 2 ** 28 - max_read_count = max_buffer_size / dtype.itemsize + max_read_count = BUFFER_SIZE // dtype.itemsize array = numpy.empty(count, dtype=dtype) - for i in xrange(0, count, max_read_count): - read_count = max_read_count if i + max_read_count < count else count - i + for i in range(0, count, max_read_count): + read_count = min(max_read_count, count - i) data = fp.read(int(read_count * dtype.itemsize)) - array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype, count=read_count) + array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype, + count=read_count) if fortran_order: array.shape = shape[::-1] diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index d400b4d30..ba26fc26c 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -25,8 +25,6 @@ from numpy.compat import ( asbytes, asstr, asbytes_nested, bytes, basestring, unicode ) -from io import BytesIO - if sys.version_info[0] >= 3: import pickle else: diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index aae95ed86..20fc5b665 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -43,9 +43,8 @@ class TextIO(BytesIO): MAJVER, MINVER = sys.version_info[:2] +IS_64BIT = sys.maxsize > 2**32 -def is_64bit_platform(): - return sys.maxsize> 2**32 def strptime(s, fmt=None): """This function is available in the datetime module only @@ -142,11 +141,10 @@ class TestSavezLoad(RoundtripTest, TestCase): for n, arr in enumerate(self.arr): assert_equal(arr, self.arr_reloaded['arr_%d' % n]) - - @np.testing.dec.skipif(not is_64bit_platform(), "Works only with 64bit systems") + @np.testing.dec.skipif(not IS_64BIT, "Works only with 64bit systems") @np.testing.dec.slow def test_big_arrays(self): - L = 2**31+1 + L = (1 << 31) + 100000 tmp = mktemp(suffix='.npz') a = np.empty(L, dtype=np.uint8) np.savez(tmp, a=a) |
