summaryrefslogtreecommitdiff
path: root/numpy/lib/format.py
diff options
context:
space:
mode:
authorErik M. Bray <erik.bray@lri.fr>2016-05-06 13:21:53 +0200
committerErik M. Bray <erik.bray@lri.fr>2016-05-31 16:33:30 +0200
commitee02cdda6c8d135098baa1d5afe41fd4996d587c (patch)
treecf5c999ae5517ea225831e12602c8448b6476453 /numpy/lib/format.py
parentdb7c0b74d3d85521769042ff91309baeb57aae0d (diff)
downloadnumpy-ee02cdda6c8d135098baa1d5afe41fd4996d587c.tar.gz
BUG: Fixes to reading and writing of empty arrays, and in particular arrays with empty dtypes. See #6430
Diffstat (limited to 'numpy/lib/format.py')
-rw-r--r--numpy/lib/format.py31
1 files changed, 20 insertions, 11 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index cfe0e62ac..e62677bd8 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -558,8 +558,11 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None):
warnings.warn("Stored array in format 2.0. It can only be"
"read by NumPy >= 1.9", UserWarning)
- # Set buffer size to 16 MiB to hide the Python loop overhead.
- buffersize = max(16 * 1024 ** 2 // array.itemsize, 1)
+ if array.itemsize == 0:
+ buffersize = 0
+ else:
+ # Set buffer size to 16 MiB to hide the Python loop overhead.
+ buffersize = max(16 * 1024 ** 2 // array.itemsize, 1)
if array.dtype.hasobject:
# We contain Python objects so we cannot write out the data
@@ -655,15 +658,21 @@ def read_array(fp, allow_pickle=True, pickle_kwargs=None):
# of the read. In non-chunked case count < max_read_count, so
# only one read is performed.
- max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dtype.itemsize)
-
- array = numpy.empty(count, dtype=dtype)
- for i in range(0, count, max_read_count):
- read_count = min(max_read_count, count - i)
- read_size = int(read_count * dtype.itemsize)
- data = _read_bytes(fp, read_size, "array data")
- array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype,
- count=read_count)
+ # Use np.ndarray instead of np.empty since the latter does
+ # not correctly instantiate zero-width string dtypes; see
+ # https://github.com/numpy/numpy/pull/6430
+ array = numpy.ndarray(count, dtype=dtype)
+
+ if dtype.itemsize > 0:
+ # If dtype.itemsize == 0 then there's nothing more to read
+ max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dtype.itemsize)
+
+ for i in range(0, count, max_read_count):
+ read_count = min(max_read_count, count - i)
+ read_size = int(read_count * dtype.itemsize)
+ data = _read_bytes(fp, read_size, "array data")
+ array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype,
+ count=read_count)
if fortran_order:
array.shape = shape[::-1]