From b87fca27261f79be20ab06a222ed2330d60d9f2c Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 25 Mar 2017 10:11:43 +0000 Subject: MAINT: Remove asbytes where a b prefix would suffice Since we only need to support python 2, we can remove any case where we just pass a single string literal and use the b prefix instead. What we can't do is transform asbytes("tests %d" % num), because %-formatting fails on bytes in python 3.x < 3.5. --- numpy/lib/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 633aee675..14dec01d5 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -161,7 +161,7 @@ if sys.version_info[0] >= 3: else: import cPickle as pickle -MAGIC_PREFIX = asbytes('\x93NUMPY') +MAGIC_PREFIX = b'\x93NUMPY' MAGIC_LEN = len(MAGIC_PREFIX) + 2 BUFFER_SIZE = 2**18 # size of buffer for reading npz files in bytes -- cgit v1.2.1 From 03f3789efe4da2c56d2841ed027ef6735ca2f11b Mon Sep 17 00:00:00 2001 From: John Zwinck Date: Thu, 14 Sep 2017 00:16:23 +0800 Subject: ENH: Align data in np.save() at 64 bytes (#9025) Previously, saving format version 1 would align to 16 bytes, and saving version 2 would align improperly (bug #8085). Alignment is now always at least 64 bytes in either version, which supports memory mapping of the saved files on Linux, where mmap() offset must be a multiple of the page size. Why 64 bytes? Simply because we don't know of a case where more is needed. AVX alignment is 32 bytes; AVX-512 is 64. Fixes #8085, closes #8598. --- numpy/lib/format.py | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 14dec01d5..84af2afc8 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -100,9 +100,9 @@ the header data HEADER_LEN. The next HEADER_LEN bytes form the header data describing the array's format. It is an ASCII string which contains a Python literal expression of a dictionary. It is terminated by a newline (``\\n``) and padded with -spaces (``\\x20``) to make the total length of -``magic string + 4 + HEADER_LEN`` be evenly divisible by 16 for alignment -purposes. +spaces (``\\x20``) to make the total of +``len(magic string) + 2 + len(length) + HEADER_LEN`` be evenly divisible +by 64 for alignment purposes. The dictionary contains three keys: @@ -163,6 +163,7 @@ else: MAGIC_PREFIX = b'\x93NUMPY' MAGIC_LEN = len(MAGIC_PREFIX) + 2 +ARRAY_ALIGN = 64 # plausible values are powers of 2 between 16 and 4096 BUFFER_SIZE = 2**18 # size of buffer for reading npz files in bytes # difference between version 1.0 and 2.0 is a 4 byte (I) header length @@ -304,27 +305,33 @@ def _write_array_header(fp, d, version=None): header.append("'%s': %s, " % (key, repr(value))) header.append("}") header = "".join(header) - # Pad the header with spaces and a final newline such that the magic - # string, the header-length short and the header are aligned on a - # 16-byte boundary. Hopefully, some system, possibly memory-mapping, - # can take advantage of our premature optimization. - current_header_len = MAGIC_LEN + 2 + len(header) + 1 # 1 for the newline - topad = 16 - (current_header_len % 16) - header = header + ' '*topad + '\n' header = asbytes(_filter_header(header)) - hlen = len(header) - if hlen < 256*256 and version in (None, (1, 0)): + hlen = len(header) + 1 # 1 for newline + padlen_v1 = ARRAY_ALIGN - ((MAGIC_LEN + struct.calcsize(' Date: Mon, 5 Feb 2018 00:42:33 +0100 Subject: python 2.7.5 bugfix --- numpy/lib/format.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 84af2afc8..89a8cb42f 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -454,7 +454,9 @@ def _filter_header(s): tokens = [] last_token_was_number = False - for token in tokenize.generate_tokens(StringIO(asstr(s)).read): + # adding newline as python 2.7.5 workaround + s += "\n" + for token in tokenize.generate_tokens(StringIO(asstr(s)).readline): token_type = token[0] token_string = token[1] if (last_token_was_number and @@ -464,7 +466,8 @@ def _filter_header(s): else: tokens.append(token) last_token_was_number = (token_type == tokenize.NUMBER) - return tokenize.untokenize(tokens) + # removing newline (see above) as python 2.7.5 workaround + return tokenize.untokenize(tokens)[:-1] def _read_array_header(fp, version): -- cgit v1.2.1 From 8d5bdd145f622aa948026259b1a9e362d44306b2 Mon Sep 17 00:00:00 2001 From: Dennis Weyland Date: Mon, 5 Feb 2018 03:29:18 +0100 Subject: minor fix for python3 compatibility --- numpy/lib/format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 89a8cb42f..363bb2101 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -455,8 +455,8 @@ def _filter_header(s): tokens = [] last_token_was_number = False # adding newline as python 2.7.5 workaround - s += "\n" - for token in tokenize.generate_tokens(StringIO(asstr(s)).readline): + string = asstr(s) + "\n" + for token in tokenize.generate_tokens(StringIO(string).readline): token_type = token[0] token_string = token[1] if (last_token_was_number and -- cgit v1.2.1 From 85282a5dac927ed731655e0a58fb67d2483f18b9 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 May 2018 11:09:05 +0300 Subject: DOC: link to updated module docstring, not NEP --- numpy/lib/format.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 363bb2101..afa154cc5 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -1,5 +1,8 @@ """ -Define a simple format for saving numpy arrays to disk with the full +Binary Serialization +==================== + +A simple format for saving numpy arrays to disk with the full information about them. The ``.npy`` format is the standard binary file format in NumPy for @@ -143,8 +146,10 @@ data HEADER_LEN." Notes ----- -The ``.npy`` format, including reasons for creating it and a comparison of -alternatives, is described fully in the "npy-format" NEP. +The ``.npy`` format, including motivation for creating it and a comparison of +alternatives, is described in the `"npy-format" NEP +`_, however details have +evolved with time and this document is more current. """ from __future__ import division, absolute_import, print_function -- cgit v1.2.1 From c759466acbcb2c8ce6cce0ae971ba4ada8055a7a Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 May 2018 20:28:27 +0300 Subject: DOC: create label and ref, fixes broken link --- numpy/lib/format.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index afa154cc5..a0ec55f01 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -1,4 +1,7 @@ """ + +.. _binary-serialization: + Binary Serialization ==================== -- cgit v1.2.1 From 7a01f661cef8fe492cbbf5ed1e2474c11ce0527b Mon Sep 17 00:00:00 2001 From: mattip Date: Mon, 14 May 2018 13:46:00 +0300 Subject: DOC: add numpy.lib.format to docs and link to it --- numpy/lib/format.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index a0ec55f01..23eac7e7d 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -1,9 +1,8 @@ """ +Binary serialization -.. _binary-serialization: - -Binary Serialization -==================== +NPY format +========== A simple format for saving numpy arrays to disk with the full information about them. -- cgit v1.2.1 From 83828f52b287fefb3d8753a21bd3441997a4d687 Mon Sep 17 00:00:00 2001 From: Mike Toews Date: Sat, 16 Jun 2018 18:18:19 +1200 Subject: HTTP -> HTTPS, and other linkrot fixes --- numpy/lib/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 23eac7e7d..ef5ec57e3 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -150,7 +150,7 @@ Notes ----- The ``.npy`` format, including motivation for creating it and a comparison of alternatives, is described in the `"npy-format" NEP -`_, however details have +`_, however details have evolved with time and this document is more current. """ -- cgit v1.2.1 From 7372f8dcc6af4446e502c0daec3199dace27e863 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 19 Sep 2018 17:07:25 +0200 Subject: MAINT, TST import pickle from numpy.core.numeric All imports of pickle from numpy modules are now done this way: >>> from numpy.core.numeric import pickle Also, some loops on protocol numbers are added over pickle tests that were not caught from #12090 --- numpy/lib/format.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index ef5ec57e3..e25868236 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -162,11 +162,8 @@ import io import warnings from numpy.lib.utils import safe_eval from numpy.compat import asbytes, asstr, isfileobj, long, basestring +from numpy.core.numeric import pickle -if sys.version_info[0] >= 3: - import pickle -else: - import cPickle as pickle MAGIC_PREFIX = b'\x93NUMPY' MAGIC_LEN = len(MAGIC_PREFIX) + 2 -- cgit v1.2.1 From cc761fe87b846228c38eb9b1d71d48cc423f53eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20M=C3=BCller?= Date: Thu, 1 Nov 2018 18:28:28 +0100 Subject: ENH: Improve support for pathlib.Path objects in load functions (#11348) * ENH: Improve support for pathlib.Path objects in more functions --- numpy/lib/format.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index e25868236..1ef3dca47 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -161,7 +161,9 @@ import sys import io import warnings from numpy.lib.utils import safe_eval -from numpy.compat import asbytes, asstr, isfileobj, long, basestring +from numpy.compat import ( + asbytes, asstr, isfileobj, long, os_fspath + ) from numpy.core.numeric import pickle @@ -706,7 +708,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, Parameters ---------- - filename : str + filename : str or path-like The name of the file on disk. This may *not* be a file-like object. mode : str, optional @@ -747,9 +749,9 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, memmap """ - if not isinstance(filename, basestring): - raise ValueError("Filename must be a string. Memmap cannot use" - " existing file handles.") + if isfileobj(filename): + raise ValueError("Filename must be a string or a path-like object." + " Memmap cannot use existing file handles.") if 'w' in mode: # We are creating the file, not reading it. @@ -767,7 +769,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, shape=shape, ) # If we got here, then it should be safe to create the file. - fp = open(filename, mode+'b') + fp = open(os_fspath(filename), mode+'b') try: used_ver = _write_array_header(fp, d, version) # this warning can be removed when 1.9 has aged enough @@ -779,7 +781,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, fp.close() else: # Read the header of the file first. - fp = open(filename, 'rb') + fp = open(os_fspath(filename), 'rb') try: version = read_magic(fp) _check_version(version) -- cgit v1.2.1 From 1956ada852f950468e028cf108766e089f4575cc Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 9 Nov 2018 14:28:44 -0800 Subject: BUG: test, fix loading structured dtypes with padding --- numpy/lib/format.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 1ef3dca47..ad26cf46a 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -259,6 +259,33 @@ def dtype_to_descr(dtype): else: return dtype.str +def descr_to_dtype(descr): + if isinstance(descr, str): + # descr was produced by dtype.str, so this always works + return numpy.dtype(descr) + + fields = [] + offset = 0 + for field in descr: + if len(field) == 2: + name, descr_str = field + dt = descr_to_dtype(descr_str) + else: + name, descr_str, shape = field + dt = numpy.dtype((descr_to_dtype(descr_str), shape)) + + # ignore padding bytes, which will be void bytes with '' as name + # (once blank fieldnames are deprecated, only "if name == ''" needed) + is_pad = (name == '' and dt.type is numpy.void and dt.names is None) + if not is_pad: + fields.append((name, dt, offset)) + + offset += dt.itemsize + + names, formats, offsets = zip(*fields) + return numpy.dtype({'names': names, 'formats': formats, + 'offsets': offsets, 'itemsize': offset}) + def header_data_from_array_1_0(array): """ Get the dictionary of header metadata from a numpy.ndarray. @@ -523,7 +550,8 @@ def _read_array_header(fp, version): msg = "fortran_order is not a valid bool: %r" raise ValueError(msg % (d['fortran_order'],)) try: - dtype = numpy.dtype(d['descr']) + descr = descr_to_dtype(d['descr']) + dtype = numpy.dtype(descr) except TypeError as e: msg = "descr is not a valid dtype descriptor: %r" raise ValueError(msg % (d['descr'],)) -- cgit v1.2.1 From 62e47c34b39aebb9a7a6aa41e9af3b2e119a2d74 Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 13 Nov 2018 11:15:14 -0800 Subject: BUG: fix for titles, cleanup, fixes from review --- numpy/lib/format.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index ad26cf46a..e58ffa017 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -260,8 +260,15 @@ def dtype_to_descr(dtype): return dtype.str def descr_to_dtype(descr): - if isinstance(descr, str): - # descr was produced by dtype.str, so this always works + ''' + descr may be stored as dtype.descr, which is a list of + (name, format, [shape]) tuples. Offsets are not explicitly saved, rather + empty fields with name,format == '', '|Vn' are added as padding. + + This function reverses the process, eliminating the empty padding fields. + ''' + if isinstance(descr, (str, dict)): + # No padding removal needed return numpy.dtype(descr) fields = [] @@ -274,8 +281,8 @@ def descr_to_dtype(descr): name, descr_str, shape = field dt = numpy.dtype((descr_to_dtype(descr_str), shape)) - # ignore padding bytes, which will be void bytes with '' as name - # (once blank fieldnames are deprecated, only "if name == ''" needed) + # Ignore padding bytes, which will be void bytes with '' as name + # Once support for blank names is removed, only "if name == ''" needed) is_pad = (name == '' and dt.type is numpy.void and dt.names is None) if not is_pad: fields.append((name, dt, offset)) @@ -283,7 +290,14 @@ def descr_to_dtype(descr): offset += dt.itemsize names, formats, offsets = zip(*fields) - return numpy.dtype({'names': names, 'formats': formats, + # names may be (title, names) tuples + names = list(names) + titles = [None] * len(names) + for i, n in enumerate(names): + if isinstance(n, tuple): + titles[i] = n[0] + names[i] = n[1] + return numpy.dtype({'names': names, 'formats': formats, 'titles': titles, 'offsets': offsets, 'itemsize': offset}) def header_data_from_array_1_0(array): @@ -550,8 +564,7 @@ def _read_array_header(fp, version): msg = "fortran_order is not a valid bool: %r" raise ValueError(msg % (d['fortran_order'],)) try: - descr = descr_to_dtype(d['descr']) - dtype = numpy.dtype(descr) + dtype = descr_to_dtype(d['descr']) except TypeError as e: msg = "descr is not a valid dtype descriptor: %r" raise ValueError(msg % (d['descr'],)) -- cgit v1.2.1 From a2227556885c3b68a33285ae3ddb4a65d71b4497 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 14 Nov 2018 10:42:01 -0800 Subject: MAINT: fix from review --- numpy/lib/format.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index e58ffa017..10945e5e8 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -291,12 +291,8 @@ def descr_to_dtype(descr): names, formats, offsets = zip(*fields) # names may be (title, names) tuples - names = list(names) - titles = [None] * len(names) - for i, n in enumerate(names): - if isinstance(n, tuple): - titles[i] = n[0] - names[i] = n[1] + nametups = (n if isinstance(n, tuple) else (None, n) for n in names) + titles, names = zip(*nametups) return numpy.dtype({'names': names, 'formats': formats, 'titles': titles, 'offsets': offsets, 'itemsize': offset}) -- cgit v1.2.1 From b6dc039961768bd5f3a3d7f57e8c396f8fa02815 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Thu, 21 Feb 2019 12:49:33 -0700 Subject: MAINT: Move pickle import to numpy.compat The pickle module was being imported from numpy.core.numeric. It was defined there in order to use pickle5 when available in Python3 and cpickle in Python2. The numpy.compat module seems a better place for that. --- numpy/lib/format.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 10945e5e8..7648be615 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -162,9 +162,8 @@ import io import warnings from numpy.lib.utils import safe_eval from numpy.compat import ( - asbytes, asstr, isfileobj, long, os_fspath + asbytes, asstr, isfileobj, long, os_fspath, pickle ) -from numpy.core.numeric import pickle MAGIC_PREFIX = b'\x93NUMPY' -- cgit v1.2.1 From ae423c3de5e0768df7ff7a4e2a09f17a9531698d Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 2 Mar 2019 14:28:18 -0800 Subject: BUG: Fix errors in string formatting while producing an error `"Invalid version %r" % (1, 2)` would fail with `TypeError: not all arguments converted during string formatting` The `Header is not a dictionary` error had a similar problem. Fixed by changing this entire function to use `.format` in place of `%`, which does not have this gotcha. Found using LGTM.com --- numpy/lib/format.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'numpy/lib/format.py') diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 7648be615..4da1022ca 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -524,7 +524,7 @@ def _read_array_header(fp, version): elif version == (2, 0): hlength_type = '