diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2014-10-12 13:01:57 -0600 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2014-10-12 17:40:49 -0600 |
commit | 8f068b7866a2959392fae2a15c1a5a19ff79bae9 (patch) | |
tree | d4a5c17802b5cb8e63fe0546cb93d4ded9343edf /numpy/lib | |
parent | 017969b193a135cd3bb63d5704bd11b62449f34e (diff) | |
download | numpy-8f068b7866a2959392fae2a15c1a5a19ff79bae9.tar.gz |
BUG: Make python2 *.npy files readable in python3.
The Python2 generated file had long integer literals like '1L' that
broke in Python3. The fix here is to filter out the 'L' and let
safe_eval take care of the integer type in converting the string.
The fix here comes from Nathaniel Smith with a few added fixups.
Closes #5170.
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/format.py | 42 |
1 files changed, 41 insertions, 1 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 98743b6ad..7c8dfbafa 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -141,7 +141,7 @@ import sys import io import warnings from numpy.lib.utils import safe_eval -from numpy.compat import asbytes, isfileobj, long, basestring +from numpy.compat import asbytes, asstr, isfileobj, long, basestring if sys.version_info[0] >= 3: import pickle @@ -410,6 +410,45 @@ def read_array_header_2_0(fp): """ _read_array_header(fp, version=(2, 0)) + +def _filter_header(s): + """Clean up 'L' in npz header ints. + + Cleans up the 'L' in strings representing integers. Needed to allow npz + headers produced in Python2 to be read in Python3. + + Parameters + ---------- + s : byte string + Npy file header. + + Returns + ------- + header : str + Cleaned up header. + + """ + import tokenize + if sys.version_info[0] >= 3: + from io import StringIO + else: + from StringIO import StringIO + + tokens = [] + last_token_was_number = False + for token in tokenize.generate_tokens(StringIO(asstr(s)).read): + token_type = token[0] + token_string = token[1] + if (last_token_was_number and + token_type == tokenize.NAME and + token_string == "L"): + continue + else: + tokens.append(token) + last_token_was_number = (token_type == tokenize.NUMBER) + return tokenize.untokenize(tokens) + + def _read_array_header(fp, version): """ see read_array_header_1_0 @@ -434,6 +473,7 @@ def _read_array_header(fp, version): # "shape" : tuple of int # "fortran_order" : bool # "descr" : dtype.descr + header = _filter_header(header) try: d = safe_eval(header) except SyntaxError as e: |