diff options
author | pierregm <pierregm@localhost> | 2009-10-06 03:47:07 +0000 |
---|---|---|
committer | pierregm <pierregm@localhost> | 2009-10-06 03:47:07 +0000 |
commit | 6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9 (patch) | |
tree | c5ef08d00870e076b37298cf37c9bb4ca868d5fb /numpy | |
parent | 9a41e774079340e1a8887d9f2310c2458580ec6c (diff) | |
download | numpy-6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9.tar.gz |
* _iotools.StringConverter
- use '1' instead of '0' to test the update
- add `iterupgrade` to upgrade from an iterator
* io.genfromtxt (bug #1212)
- use `iterupgrade` to upgrade the converters, and reprocess if there's a problem to catch the offending line
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/_iotools.py | 37 | ||||
-rw-r--r-- | numpy/lib/io.py | 40 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 25 |
3 files changed, 88 insertions, 14 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index c06275d1b..02385305b 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -370,6 +370,12 @@ def str2bool(value): raise ValueError("Invalid boolean") +class ConverterError(Exception): + pass + +class ConverterLockError(ConverterError): + pass + class StringConverter: """ @@ -574,16 +580,39 @@ class StringConverter: except ValueError: # Raise an exception if we locked the converter... if self._locked: - raise ValueError("Converter is locked and cannot be upgraded") + errmsg = "Converter is locked and cannot be upgraded" + raise ConverterLockError(errmsg) _statusmax = len(self._mapper) # Complains if we try to upgrade by the maximum if self._status == _statusmax: - raise ValueError("Could not find a valid conversion function") + errmsg = "Could not find a valid conversion function" + raise ConverterError(errmsg) elif self._status < _statusmax - 1: self._status += 1 (self.type, self.func, self.default) = self._mapper[self._status] self.upgrade(value) - # + + def iterupgrade(self, value): + self._checked = True + if not hasattr(value, '__iter__'): + value = (value,) + _strict_call = self._strict_call + try: + map(_strict_call, value) + except ValueError: + # Raise an exception if we locked the converter... + if self._locked: + errmsg = "Converter is locked and cannot be upgraded" + raise ConverterLockError(errmsg) + _statusmax = len(self._mapper) + # Complains if we try to upgrade by the maximum + if self._status == _statusmax: + raise ConverterError("Could not find a valid conversion function") + elif self._status < _statusmax - 1: + self._status += 1 + (self.type, self.func, self.default) = self._mapper[self._status] + self.iterupgrade(value) + def update(self, func, default=None, missing_values='', locked=False): """ Set StringConverter attributes directly. @@ -617,7 +646,7 @@ class StringConverter: self.type = self._getsubdtype(default) else: try: - tester = func('0') + tester = func('1') except (TypeError, ValueError): tester = None self.type = self._getsubdtype(tester) diff --git a/numpy/lib/io.py b/numpy/lib/io.py index ba804e43f..255c5a7f5 100644 --- a/numpy/lib/io.py +++ b/numpy/lib/io.py @@ -12,12 +12,14 @@ import cStringIO import os import itertools import warnings +from operator import itemgetter from cPickle import load as _cload, loads from _datasource import DataSource from _compiled_base import packbits, unpackbits from _iotools import LineSplitter, NameValidator, StringConverter, \ + ConverterError, ConverterLockError, \ _is_string_like, has_nested_fields, flatten_dtype _file = file @@ -1176,16 +1178,29 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0, elif nbvalues != nbcols: append_to_invalid((i, nbvalues)) continue - # Check whether we need to update the converter - if dtype is None: - for (converter, item) in zip(converters, values): - converter.upgrade(item) # Store the values append_to_rows(tuple(values)) if usemask: append_to_masks(tuple([val.strip() in mss for (val, mss) in zip(values, missing)])) + # Upgrade the converters (if needed) + if dtype is None: + for (i, converter) in enumerate(converters): + current_column = map(itemgetter(i), rows) + try: + converter.iterupgrade(current_column) + except ConverterLockError: + errmsg = "Converter #%i is locked and cannot be upgraded: " % i + current_column = itertools.imap(itemgetter(i), rows) + for (j, value) in enumerate(current_column): + try: + converter.upgrade(value) + except (ConverterError, ValueError): + errmsg += "(occurred line #%i for value '%s')" + errmsg %= (j + 1 + skiprows, value) + raise ConverterError(errmsg) + # Check that we don't have invalid values if len(invalid) > 0: # Construct the error message @@ -1202,14 +1217,19 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0, # Convert each value according to the converter: # We want to modify the list in place to avoid creating a new one... +# if loose: +# conversionfuncs = [conv._loose_call for conv in converters] +# else: +# conversionfuncs = [conv._strict_call for conv in converters] +# for (i, vals) in enumerate(rows): +# rows[i] = tuple([convert(val) +# for (convert, val) in zip(conversionfuncs, vals)]) if loose: - conversionfuncs = [conv._loose_call for conv in converters] + rows = zip(*(map(converter._loose_call, map(itemgetter(i), rows)) + for (i, converter) in enumerate(converters))) else: - conversionfuncs = [conv._strict_call for conv in converters] - for (i, vals) in enumerate(rows): - rows[i] = tuple([convert(val) - for (convert, val) in zip(conversionfuncs, vals)]) - + rows = zip(*(map(converter._strict_call, map(itemgetter(i), rows)) + for (i, converter) in enumerate(converters))) # Reset the dtype data = rows if dtype is None: diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index ecf87980b..bc05ed4d3 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -11,6 +11,8 @@ from tempfile import mkstemp, NamedTemporaryFile import sys, time from datetime import datetime +from numpy.lib._iotools import ConverterError, ConverterLockError + MAJVER, MINVER = sys.version_info[:2] @@ -624,6 +626,19 @@ M 33 21.99 assert_equal(test, [33, 66]) + def test_invalid_converter(self): + strip_rand = lambda x : float(('r' in x.lower() and x.split()[-1]) or + (not 'r' in x.lower() and x.strip() or 0.0)) + strip_per = lambda x : float(('%' in x.lower() and x.split()[0]) or + (not '%' in x.lower() and x.strip() or 0.0)) + s = StringIO.StringIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" \ + "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n" + "D02N03,10/10/2004,R 1,,7,145.55") + kwargs = dict(converters={2 : strip_per, 3 : strip_rand}, delimiter=",", + dtype=None) + assert_raises(ConverterError, np.genfromtxt, s, **kwargs) + + def test_dtype_with_converters(self): dstr = "2009; 23; 46" test = np.ndfromtxt(StringIO.StringIO(dstr,), @@ -863,6 +878,16 @@ M 33 21.99 assert_equal(mtest, control) + def test_inconsistent_dtype(self): + data = ["1, 1, 1, 1, -1.1"] * 50 + mdata = StringIO.StringIO("\n".join(data)) + + converters = {4: lambda x:np.sqrt(float(x))} + kwargs = dict(delimiter=",", converters=converters, + dtype=[(_, int) for _ in 'abcde'],) + assert_raises(TypeError, np.genfromtxt, mdata, **kwargs) + + def test_recfromtxt(self): # data = StringIO.StringIO('A,B\n0,1\n2,3') |