diff options
author | Pauli Virtanen <pav@iki.fi> | 2010-02-20 18:18:18 +0000 |
---|---|---|
committer | Pauli Virtanen <pav@iki.fi> | 2010-02-20 18:18:18 +0000 |
commit | 9c77c439698e34656d21f5e13bdf12210f659735 (patch) | |
tree | 0a73fe08e4c31ddf9fc066c0b95486412915b097 /numpy | |
parent | fe8b7034708ffdf0d2efec268c9852162da56078 (diff) | |
download | numpy-9c77c439698e34656d21f5e13bdf12210f659735.tar.gz |
3K: lib: more str vs bytes issues in the lib/io loadtxt, savetxt and genfromtxt
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/compat/py3k.py | 2 | ||||
-rw-r--r-- | numpy/lib/_iotools.py | 11 | ||||
-rw-r--r-- | numpy/lib/io.py | 60 | ||||
-rw-r--r-- | numpy/lib/tests/test__iotools.py | 27 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 71 |
5 files changed, 104 insertions, 67 deletions
diff --git a/numpy/compat/py3k.py b/numpy/compat/py3k.py index 7af73c3d0..7357bacff 100644 --- a/numpy/compat/py3k.py +++ b/numpy/compat/py3k.py @@ -21,7 +21,7 @@ if sys.version_info[0] >= 3: def asstr(s): if isinstance(s, str): return s - return bytes.decode('latin1') + return s.decode('latin1') def isfileobj(f): return isinstance(f, io.FileIO) strchar = 'U' diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index a19852ac6..5eb4c0005 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -6,7 +6,7 @@ import numpy as np import numpy.core.numeric as nx from __builtin__ import bool, int, long, float, complex, object, unicode, str -from numpy.compat import asbytes, bytes +from numpy.compat import asbytes, bytes, asbytes_nested if sys.version_info[0] >= 3: def _bytes_to_complex(s): @@ -542,6 +542,11 @@ class StringConverter: # def __init__(self, dtype_or_func=None, default=None, missing_values=None, locked=False): + # Convert unicode (for Py3) + if isinstance(missing_values, unicode): + missing_values = asbytes(missing_values) + elif isinstance(missing_values, (list, tuple)): + missing_values = asbytes_nested(missing_values) # Defines a lock for upgrade self._locked = bool(locked) # No input dtype: minimal initialization @@ -566,7 +571,7 @@ class StringConverter: # If we don't have a default, try to guess it or set it to None if default is None: try: - default = self.func('0') + default = self.func(asbytes('0')) except ValueError: default = None ttype = self._getsubdtype(default) @@ -729,7 +734,7 @@ class StringConverter: self.type = self._getsubdtype(default) else: try: - tester = func('1') + tester = func(asbytes('1')) except (TypeError, ValueError): tester = None self.type = self._getsubdtype(tester) diff --git a/numpy/lib/io.py b/numpy/lib/io.py index f57231c56..8233fc7a0 100644 --- a/numpy/lib/io.py +++ b/numpy/lib/io.py @@ -8,7 +8,7 @@ __all__ = ['savetxt', 'loadtxt', import numpy as np import format -import cStringIO +import sys import os import sys import itertools @@ -24,7 +24,13 @@ from _iotools import LineSplitter, NameValidator, StringConverter, \ _is_string_like, has_nested_fields, flatten_dtype, \ easy_dtype, _bytes_to_name -from numpy.compat import asbytes, asstr +from numpy.compat import asbytes, asstr, asbytes_nested + +if sys.version_info[0] >= 3: + import io + BytesIO = io.BytesIO +else: + from cStringIO import StringIO as BytesIO _file = open _string_like = _is_string_like @@ -34,7 +40,7 @@ def seek_gzip_factory(f): import on gzip. """ - import gzip, new + import gzip def seek(self, offset, whence=0): # figure out new position (we can only seek forwards) @@ -58,8 +64,14 @@ def seek_gzip_factory(f): if isinstance(f, str): f = gzip.GzipFile(f) - f.seek = new.instancemethod(seek, f) - f.tell = new.instancemethod(tell, f) + if sys.version_info[0] >= 3: + import types + f.seek = types.MethodType(seek, f) + f.tell = types.MethodType(tell, f) + else: + import new + f.seek = new.instancemethod(seek, f) + f.tell = new.instancemethod(tell, f) return f @@ -180,7 +192,7 @@ class NpzFile(object): if member: bytes = self.zip.read(key) if bytes.startswith(format.MAGIC_PREFIX): - value = cStringIO.StringIO(bytes) + value = BytesIO(bytes) return format.read_array(value) else: return bytes @@ -474,12 +486,14 @@ def _getconv(dtype): return float elif issubclass(typ, np.complex): return complex + elif issubclass(typ, np.bytes_): + return bytes else: return str -def loadtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None, +def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False): """ Load data from a text file. @@ -555,6 +569,11 @@ def loadtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None, array([ 2., 4.]) """ + # Type conversions for Py3 convenience + comments = asbytes(comments) + if delimiter is not None: + delimiter = asbytes(delimiter) + user_converters = converters if usecols is not None: @@ -768,9 +787,9 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'): """ # Py3 conversions first - if isinstance(format, bytes): - format = asstr(format) - delimiter = asbytes(delimiter) + if isinstance(fmt, bytes): + fmt = asstr(fmt) + delimiter = asstr(delimiter) if _is_string_like(fname): if fname.endswith('.gz'): @@ -877,9 +896,9 @@ def fromregex(file, regexp, dtype): """ if not hasattr(file, "read"): - file = open(file, 'r') + file = open(file, 'rb') if not hasattr(regexp, 'match'): - regexp = re.compile(regexp) + regexp = re.compile(asbytes(regexp)) if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) @@ -905,9 +924,9 @@ def fromregex(file, regexp, dtype): -def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None, +def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0, skip_header=0, skip_footer=0, converters=None, - missing=asbytes(''), missing_values=None, filling_values=None, + missing='', missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, autostrip=False, case_sensitive=True, defaultfmt="f%i", unpack=None, usemask=False, loose=True, invalid_raise=True): @@ -1042,6 +1061,15 @@ def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None, dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')]) """ + # Py3 data conversions to bytes, for convenience + comments = asbytes(comments) + if isinstance(delimiter, unicode): + delimiter = asbytes(delimiter) + if isinstance(missing, unicode): + missing = asbytes(missing) + if isinstance(missing_values, (unicode, list, tuple)): + missing_values = asbytes_nested(missing_values) + # if usemask: from numpy.ma import MaskedArray, make_mask_descr @@ -1182,7 +1210,7 @@ def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None, entry.append(value) # We have a string : apply it to all entries elif isinstance(user_missing_values, basestring): - user_value = user_missing_values.split(",") + user_value = user_missing_values.split(asbytes(",")) for entry in missing_values: entry.extend(user_value) # We have something else: apply it to all entries @@ -1195,7 +1223,7 @@ def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None, warnings.warn("The use of `missing` is deprecated.\n"\ "Please use `missing_values` instead.", DeprecationWarning) - values = [str(_) for _ in missing.split(",")] + values = [str(_) for _ in missing.split(asbytes(","))] for entry in missing_values: entry.extend(values) diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index d105cf835..7c45b3527 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -6,6 +6,9 @@ if sys.version_info[0] >= 3: else: from StringIO import StringIO +from datetime import date +import time + import numpy as np from numpy.lib._iotools import LineSplitter, NameValidator, StringConverter,\ has_nested_fields, easy_dtype @@ -130,6 +133,12 @@ class TestNameValidator(TestCase): #------------------------------------------------------------------------------- +def _bytes_to_date(s): + if sys.version_info[0] >= 3: + return date(*time.strptime(s.decode('latin1'), "%Y-%m-%d")[:3]) + else: + return date(*time.strptime(s, "%Y-%m-%d")[:3]) + class TestStringConverter(TestCase): "Test StringConverter" # @@ -168,27 +177,19 @@ class TestStringConverter(TestCase): # def test_upgrademapper(self): "Tests updatemapper" - from datetime import date - import time - if sys.version_info[0] >= 3: - dateparser = lambda s : date(*time.strptime(s.decode('latin1'), - "%Y-%m-%d")[:3]) - else: - dateparser = lambda s : date(*time.strptime(s, "%Y-%m-%d")[:3]) + dateparser = _bytes_to_date StringConverter.upgrade_mapper(dateparser, date(2000,1,1)) convert = StringConverter(dateparser, date(2000, 1, 1)) - test = convert('2001-01-01') + test = convert(asbytes('2001-01-01')) assert_equal(test, date(2001, 01, 01)) - test = convert('2009-01-01') + test = convert(asbytes('2009-01-01')) assert_equal(test, date(2009, 01, 01)) - test = convert('') + test = convert(asbytes('')) assert_equal(test, date(2000, 01, 01)) # def test_string_to_object(self): "Make sure that string-to-object functions are properly recognized" - from datetime import date - import time - conv = StringConverter(lambda s: date(*(time.strptime(s)[:3]))) + conv = StringConverter(_bytes_to_date) assert_equal(conv._mapper[-2][0](0), 0j) assert(hasattr(conv, 'default')) # diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index dd1bfbad8..2b4d542c7 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -15,7 +15,7 @@ from datetime import datetime from numpy.lib._iotools import ConverterError, ConverterLockError, \ ConversionWarning -from numpy.compat import asbytes +from numpy.compat import asbytes, asbytes_nested if sys.version_info[0] >= 3: from io import BytesIO @@ -31,7 +31,10 @@ def strptime(s, fmt=None): from Python >= 2.5. """ - return datetime(*time.strptime(s, fmt)[:3]) + if sys.version_info[0] >= 3: + return datetime(*time.strptime(s.decode('latin1'), fmt)[:3]) + else: + return datetime(*time.strptime(s, fmt)[:3]) class RoundtripTest(object): def roundtrip(self, save_func, *args, **kwargs): @@ -175,7 +178,7 @@ class TestSaveTxt(TestCase): c = StringIO() np.savetxt(c, a, fmt='%d') c.seek(0) - assert_equal(c.readlines(), ['1 2\n', '3 4\n']) + assert_equal(c.readlines(), asbytes_nested(['1 2\n', '3 4\n'])) def test_1D(self): a = np.array([1, 2, 3, 4], int) @@ -190,7 +193,7 @@ class TestSaveTxt(TestCase): c = StringIO() np.savetxt(c, a, fmt='%d') c.seek(0) - assert_equal(c.readlines(), ['1 2\n', '3 4\n']) + assert_equal(c.readlines(), asbytes_nested(['1 2\n', '3 4\n'])) def test_delimiter(self): a = np.array([[1., 2.], [3., 4.]]) @@ -205,34 +208,34 @@ class TestSaveTxt(TestCase): # Sequence of formats np.savetxt(c, a, fmt=['%02d', '%3.1f']) c.seek(0) - assert_equal(c.readlines(), ['01 2.0\n', '03 4.0\n']) + assert_equal(c.readlines(), asbytes_nested(['01 2.0\n', '03 4.0\n'])) # A single multiformat string c = StringIO() np.savetxt(c, a, fmt='%02d : %3.1f') c.seek(0) lines = c.readlines() - assert_equal(lines, ['01 : 2.0\n', '03 : 4.0\n']) + assert_equal(lines, asbytes_nested(['01 : 2.0\n', '03 : 4.0\n'])) # Specify delimiter, should be overiden c = StringIO() np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',') c.seek(0) lines = c.readlines() - assert_equal(lines, ['01 : 2.0\n', '03 : 4.0\n']) + assert_equal(lines, asbytes_nested(['01 : 2.0\n', '03 : 4.0\n'])) class TestLoadTxt(TestCase): def test_record(self): c = StringIO() - c.write('1 2\n3 4') + c.write(asbytes('1 2\n3 4')) c.seek(0) x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)]) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) assert_array_equal(x, a) d = StringIO() - d.write('M 64.0 75.0\nF 25.0 60.0') + d.write(asbytes('M 64.0 75.0\nF 25.0 60.0')) d.seek(0) mydescriptor = {'names': ('gender', 'age', 'weight'), 'formats': ('S1', @@ -244,7 +247,7 @@ class TestLoadTxt(TestCase): def test_array(self): c = StringIO() - c.write('1 2\n3 4') + c.write(asbytes('1 2\n3 4')) c.seek(0) x = np.loadtxt(c, dtype=int) @@ -258,14 +261,14 @@ class TestLoadTxt(TestCase): def test_1D(self): c = StringIO() - c.write('1\n2\n3\n4\n') + c.write(asbytes('1\n2\n3\n4\n')) c.seek(0) x = np.loadtxt(c, dtype=int) a = np.array([1, 2, 3, 4], int) assert_array_equal(x, a) c = StringIO() - c.write('1,2,3,4\n') + c.write(asbytes('1,2,3,4\n')) c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',') a = np.array([1, 2, 3, 4], int) @@ -273,7 +276,7 @@ class TestLoadTxt(TestCase): def test_missing(self): c = StringIO() - c.write('1,2,3,,5\n') + c.write(asbytes('1,2,3,,5\n')) c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', \ converters={3:lambda s: int(s or - 999)}) @@ -282,7 +285,7 @@ class TestLoadTxt(TestCase): def test_converters_with_usecols(self): c = StringIO() - c.write('1,2,3,,5\n6,7,8,9,10\n') + c.write(asbytes('1,2,3,,5\n6,7,8,9,10\n')) c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', \ converters={3:lambda s: int(s or - 999)}, \ @@ -292,7 +295,7 @@ class TestLoadTxt(TestCase): def test_comments(self): c = StringIO() - c.write('# comment\n1,2,3,5\n') + c.write(asbytes('# comment\n1,2,3,5\n')) c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', \ comments='#') @@ -301,7 +304,7 @@ class TestLoadTxt(TestCase): def test_skiprows(self): c = StringIO() - c.write('comment\n1,2,3,5\n') + c.write(asbytes('comment\n1,2,3,5\n')) c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', \ skiprows=1) @@ -309,7 +312,7 @@ class TestLoadTxt(TestCase): assert_array_equal(x, a) c = StringIO() - c.write('# comment\n1,2,3,5\n') + c.write(asbytes('# comment\n1,2,3,5\n')) c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', \ skiprows=1) @@ -344,12 +347,12 @@ class TestLoadTxt(TestCase): names = ['stid', 'temp'] dtypes = ['S4', 'f8'] arr = np.loadtxt(c, usecols=(0, 2), dtype=zip(names, dtypes)) - assert_equal(arr['stid'], ["JOE", "BOB"]) + assert_equal(arr['stid'], asbytes_nested(["JOE", "BOB"])) assert_equal(arr['temp'], [25.3, 27.9]) def test_fancy_dtype(self): c = StringIO() - c.write('1,2,3.0\n4,5,6.0\n') + c.write(asbytes('1,2,3.0\n4,5,6.0\n')) c.seek(0) dt = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) x = np.loadtxt(c, dtype=dt, delimiter=',') @@ -371,7 +374,7 @@ class TestLoadTxt(TestCase): def test_unused_converter(self): c = StringIO() - c.writelines(['1 21\n', '3 42\n']) + c.writelines([asbytes('1 21\n'), asbytes('3 42\n')]) c.seek(0) data = np.loadtxt(c, usecols=(1,), converters={0: lambda s: int(s, 16)}) @@ -404,7 +407,7 @@ class TestLoadTxt(TestCase): class Testfromregex(TestCase): def test_record(self): c = StringIO() - c.write('1.312 foo\n1.534 bar\n4.444 qux') + c.write(asbytes('1.312 foo\n1.534 bar\n4.444 qux')) c.seek(0) dt = [('num', np.float64), ('val', 'S3')] @@ -415,7 +418,7 @@ class Testfromregex(TestCase): def test_record_2(self): c = StringIO() - c.write('1312 foo\n1534 bar\n4444 qux') + c.write(asbytes('1312 foo\n1534 bar\n4444 qux')) c.seek(0) dt = [('num', np.int32), ('val', 'S3')] @@ -426,7 +429,7 @@ class Testfromregex(TestCase): def test_record_3(self): c = StringIO() - c.write('1312 foo\n1534 bar\n4444 qux') + c.write(asbytes('1312 foo\n1534 bar\n4444 qux')) c.seek(0) dt = [('num', np.float64)] @@ -521,7 +524,7 @@ class TestFromTxt(TestCase): "Test retrieving a header" data = StringIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') test = np.ndfromtxt(data, dtype=None, names=True) - control = {'gender': np.array(['M', 'F']), + control = {'gender': np.array(asbytes_nested(['M', 'F'])), 'age': np.array([64.0, 25.0]), 'weight': np.array([75.0, 60.0])} assert_equal(test['gender'], control['gender']) @@ -532,7 +535,7 @@ class TestFromTxt(TestCase): "Test the automatic definition of the output dtype" data = StringIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') test = np.ndfromtxt(data, dtype=None) - control = [np.array(['A', 'BCD']), + control = [np.array(asbytes_nested(['A', 'BCD'])), np.array([64, 25]), np.array([75.0, 60.0]), np.array([3 + 4j, 5 + 6j]), @@ -649,10 +652,10 @@ M 33 21.99 def test_invalid_converter(self): - strip_rand = lambda x : float(('r' in x.lower() and x.split()[-1]) or - (not 'r' in x.lower() and x.strip() or 0.0)) - strip_per = lambda x : float(('%' in x.lower() and x.split()[0]) or - (not '%' in x.lower() and x.strip() or 0.0)) + strip_rand = lambda x : float((asbytes('r') in x.lower() and x.split()[-1]) or + (not asbytes('r') in x.lower() and x.strip() or 0.0)) + strip_per = lambda x : float((asbytes('%') in x.lower() and x.split()[0]) or + (not asbytes('%') in x.lower() and x.strip() or 0.0)) s = StringIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" \ "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n" "D02N03,10/10/2004,R 1,,7,145.55") @@ -678,10 +681,10 @@ M 33 21.99 "Test using an explicit dtype with an object" from datetime import date import time - data = """ + data = asbytes(""" 1; 2001-01-01 2; 2002-01-31 - """ + """) ndtype = [('idx', int), ('code', np.object)] func = lambda s: strptime(s.strip(), "%Y-%m-%d") converters = {1: func} @@ -775,7 +778,7 @@ M 33 21.99 names = ['stid', 'temp'] dtypes = ['S4', 'f8'] test = np.ndfromtxt(data, usecols=(0, 2), dtype=zip(names, dtypes)) - assert_equal(test['stid'], ["JOE", "BOB"]) + assert_equal(test['stid'], asbytes_nested(["JOE", "BOB"])) assert_equal(test['temp'], [25.3, 27.9]) def test_usecols_with_integer(self): @@ -1153,7 +1156,7 @@ def test_gzip_loadtxt(): # which is then read from by the loadtxt function s = StringIO() g = gzip.GzipFile(fileobj=s, mode='w') - g.write('1 2 3\n') + g.write(asbytes('1 2 3\n')) g.close() s.seek(0) @@ -1169,7 +1172,7 @@ def test_gzip_loadtxt(): def test_gzip_loadtxt_from_string(): s = StringIO() f = gzip.GzipFile(fileobj=s, mode="w") - f.write('1 2 3\n') + f.write(asbytes('1 2 3\n')) f.close() s.seek(0) |