diff options
-rw-r--r-- | numpy/lib/format.py | 11 | ||||
-rw-r--r-- | numpy/lib/tests/test_format.py | 27 |
2 files changed, 32 insertions, 6 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 625768b62..3cebdd173 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -177,6 +177,8 @@ MAGIC_PREFIX = b'\x93NUMPY' MAGIC_LEN = len(MAGIC_PREFIX) + 2 ARRAY_ALIGN = 64 # plausible values are powers of 2 between 16 and 4096 BUFFER_SIZE = 2**18 # size of buffer for reading npz files in bytes +# allow growth within the address space of a 64 bit machine along one axis +GROWTH_AXIS_MAX_DIGITS = 21 # = len(str(8*2**64-1)) hypothetical int1 dtype # difference between version 1.0 and 2.0 is a 4 byte (I) header length # instead of 2 bytes (H) allowing storage of large structured arrays @@ -431,6 +433,15 @@ def _write_array_header(fp, d, version=None): header.append("'%s': %s, " % (key, repr(value))) header.append("}") header = "".join(header) + + # Add some spare space so that the array header can be modified in-place + # when changing the array size, e.g. when growing it by appending data at + # the end. + shape = d['shape'] + header += " " * ((GROWTH_AXIS_MAX_DIGITS - len(repr( + shape[-1 if d['fortran_order'] else 0] + ))) if len(shape) > 0 else 0) + if version is None: header = _wrap_header_guess_version(header) else: diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index 581d067de..b8964f231 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -791,11 +791,11 @@ def test_bad_magic_args(): def test_large_header(): s = BytesIO() - d = {'a': 1, 'b': 2} + d = {'shape': tuple(), 'fortran_order': False, 'descr': '<i8'} format.write_array_header_1_0(s, d) s = BytesIO() - d = {'a': 1, 'b': 2, 'c': 'x'*256*256} + d['descr'] = [('x'*256*256, '<i8')] assert_raises(ValueError, format.write_array_header_1_0, s, d) @@ -837,10 +837,12 @@ def test_bad_header(): assert_raises(ValueError, format.read_array_header_1_0, s) # headers without the exact keys required should fail - d = {"shape": (1, 2), - "descr": "x"} - s = BytesIO() - format.write_array_header_1_0(s, d) + # d = {"shape": (1, 2), + # "descr": "x"} + s = BytesIO( + b"\x93NUMPY\x01\x006\x00{'descr': 'x', 'shape': (1, 2), }" + + b" \n" + ) assert_raises(ValueError, format.read_array_header_1_0, s) d = {"shape": (1, 2), @@ -934,6 +936,19 @@ def test_unicode_field_names(tmpdir): with assert_warns(UserWarning): format.write_array(f, arr, version=None) +def test_header_growth_axis(): + for is_fortran_array, dtype_space, expected_header_length in [ + [False, 22, 128], [False, 23, 192], [True, 23, 128], [True, 24, 192] + ]: + for size in [10**i for i in range(format.GROWTH_AXIS_MAX_DIGITS)]: + fp = BytesIO() + format.write_array_header_1_0(fp, { + 'shape': (2, size) if is_fortran_array else (size, 2), + 'fortran_order': is_fortran_array, + 'descr': np.dtype([(' '*dtype_space, int)]) + }) + + assert len(fp.getvalue()) == expected_header_length @pytest.mark.parametrize('dt, fail', [ (np.dtype({'names': ['a', 'b'], 'formats': [float, np.dtype('S3', |