summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/lib/format.py11
-rw-r--r--numpy/lib/tests/test_format.py27
2 files changed, 32 insertions, 6 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 625768b62..3cebdd173 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -177,6 +177,8 @@ MAGIC_PREFIX = b'\x93NUMPY'
MAGIC_LEN = len(MAGIC_PREFIX) + 2
ARRAY_ALIGN = 64 # plausible values are powers of 2 between 16 and 4096
BUFFER_SIZE = 2**18 # size of buffer for reading npz files in bytes
+# allow growth within the address space of a 64 bit machine along one axis
+GROWTH_AXIS_MAX_DIGITS = 21 # = len(str(8*2**64-1)) hypothetical int1 dtype
# difference between version 1.0 and 2.0 is a 4 byte (I) header length
# instead of 2 bytes (H) allowing storage of large structured arrays
@@ -431,6 +433,15 @@ def _write_array_header(fp, d, version=None):
header.append("'%s': %s, " % (key, repr(value)))
header.append("}")
header = "".join(header)
+
+ # Add some spare space so that the array header can be modified in-place
+ # when changing the array size, e.g. when growing it by appending data at
+ # the end.
+ shape = d['shape']
+ header += " " * ((GROWTH_AXIS_MAX_DIGITS - len(repr(
+ shape[-1 if d['fortran_order'] else 0]
+ ))) if len(shape) > 0 else 0)
+
if version is None:
header = _wrap_header_guess_version(header)
else:
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index 581d067de..b8964f231 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -791,11 +791,11 @@ def test_bad_magic_args():
def test_large_header():
s = BytesIO()
- d = {'a': 1, 'b': 2}
+ d = {'shape': tuple(), 'fortran_order': False, 'descr': '<i8'}
format.write_array_header_1_0(s, d)
s = BytesIO()
- d = {'a': 1, 'b': 2, 'c': 'x'*256*256}
+ d['descr'] = [('x'*256*256, '<i8')]
assert_raises(ValueError, format.write_array_header_1_0, s, d)
@@ -837,10 +837,12 @@ def test_bad_header():
assert_raises(ValueError, format.read_array_header_1_0, s)
# headers without the exact keys required should fail
- d = {"shape": (1, 2),
- "descr": "x"}
- s = BytesIO()
- format.write_array_header_1_0(s, d)
+ # d = {"shape": (1, 2),
+ # "descr": "x"}
+ s = BytesIO(
+ b"\x93NUMPY\x01\x006\x00{'descr': 'x', 'shape': (1, 2), }" +
+ b" \n"
+ )
assert_raises(ValueError, format.read_array_header_1_0, s)
d = {"shape": (1, 2),
@@ -934,6 +936,19 @@ def test_unicode_field_names(tmpdir):
with assert_warns(UserWarning):
format.write_array(f, arr, version=None)
+def test_header_growth_axis():
+ for is_fortran_array, dtype_space, expected_header_length in [
+ [False, 22, 128], [False, 23, 192], [True, 23, 128], [True, 24, 192]
+ ]:
+ for size in [10**i for i in range(format.GROWTH_AXIS_MAX_DIGITS)]:
+ fp = BytesIO()
+ format.write_array_header_1_0(fp, {
+ 'shape': (2, size) if is_fortran_array else (size, 2),
+ 'fortran_order': is_fortran_array,
+ 'descr': np.dtype([(' '*dtype_space, int)])
+ })
+
+ assert len(fp.getvalue()) == expected_header_length
@pytest.mark.parametrize('dt, fail', [
(np.dtype({'names': ['a', 'b'], 'formats': [float, np.dtype('S3',