diff options
| author | Gavin Wahl <gwahl@fusionbox.com> | 2014-06-24 15:22:12 -0600 |
|---|---|---|
| committer | Gavin Wahl <gwahl@fusionbox.com> | 2014-06-24 15:22:12 -0600 |
| commit | 7a2842a8af28eeb1be9fa25da008b2070a99c8ed (patch) | |
| tree | db71526abe4c7dcf2d7beb9393d80ced837f8185 /tablib/packages | |
| parent | 7acaa8460dc69f172dee4db6e0492af1db86492e (diff) | |
| download | tablib-7a2842a8af28eeb1be9fa25da008b2070a99c8ed.tar.gz | |
Update the vendored unicodecsv to fix None handling
The old version of unicodecsv incorrectly (according
https://docs.python.org/2/library/csv.html#csv.writer) encoding None
values as the string 'None', instead of the string '' as the python
documentation specifies.
The newest version of unicodecsv has fixed this.
Fixes #121
Diffstat (limited to 'tablib/packages')
| -rw-r--r-- | tablib/packages/unicodecsv/__init__.py | 194 |
1 files changed, 143 insertions, 51 deletions
diff --git a/tablib/packages/unicodecsv/__init__.py b/tablib/packages/unicodecsv/__init__.py index e640987..6a20118 100644 --- a/tablib/packages/unicodecsv/__init__.py +++ b/tablib/packages/unicodecsv/__init__.py @@ -1,22 +1,65 @@ # -*- coding: utf-8 -*- import csv -from csv import * +try: + from itertools import izip +except ImportError: + izip = zip #http://semver.org/ -VERSION = (0, 8, 0) +VERSION = (0, 10, 1) __version__ = ".".join(map(str,VERSION)) -def _stringify(s, encoding): - if type(s)==unicode: - return s.encode(encoding) +pass_throughs = [ + 'register_dialect', + 'unregister_dialect', + 'get_dialect', + 'list_dialects', + 'field_size_limit', + 'Dialect', + 'excel', + 'excel_tab', + 'Sniffer', + 'QUOTE_ALL', + 'QUOTE_MINIMAL', + 'QUOTE_NONNUMERIC', + 'QUOTE_NONE', + 'Error' +] +__all__ = [ + 'reader', + 'writer', + 'DictReader', + 'DictWriter', +] + pass_throughs + +for prop in pass_throughs: + globals()[prop]=getattr(csv, prop) + +def _stringify(s, encoding, errors): + if s is None: + return '' + if isinstance(s, unicode): + return s.encode(encoding, errors) elif isinstance(s, (int , float)): pass #let csv.QUOTE_NONNUMERIC do its thing. - elif type(s) != str: + elif not isinstance(s, str): s=str(s) return s -def _stringify_list(l, encoding): - return [_stringify(s, encoding) for s in l] +def _stringify_list(l, encoding, errors='strict'): + try: + return [_stringify(s, encoding, errors) for s in iter(l)] + except TypeError as e: + raise csv.Error(str(e)) + +def _unicodify(s, encoding): + if s is None: + return None + if isinstance(s, (unicode, int, float)): + return s + elif isinstance(s, str): + return s.decode(encoding) + return s class UnicodeWriter(object): """ @@ -28,78 +71,127 @@ class UnicodeWriter(object): >>> f.seek(0) >>> r = unicodecsv.reader(f, encoding='utf-8') >>> row = r.next() - >>> print row[0], row[1] - é ñ + >>> row[0] == u'é' + True + >>> row[1] == u'ñ' + True """ - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.writer = csv.writer(f) - self.dialect = dialect + def __init__(self, f, dialect=csv.excel, encoding='utf-8', errors='strict', + *args, **kwds): self.encoding = encoding - self.writer = csv.writer(f, dialect=dialect, **kwds) + self.writer = csv.writer(f, dialect, *args, **kwds) + self.encoding_errors = errors def writerow(self, row): - self.writer.writerow(_stringify_list(row, self.encoding)) + self.writer.writerow(_stringify_list(row, self.encoding, self.encoding_errors)) def writerows(self, rows): for row in rows: self.writerow(row) + + @property + def dialect(self): + return self.writer.dialect writer = UnicodeWriter class UnicodeReader(object): - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.reader = csv.reader(f, dialect=dialect, **kwds) + def __init__(self, f, dialect=None, encoding='utf-8', errors='strict', + **kwds): + format_params = ['delimiter', 'doublequote', 'escapechar', 'lineterminator', 'quotechar', 'quoting', 'skipinitialspace'] + if dialect is None: + if not any([kwd_name in format_params for kwd_name in kwds.keys()]): + dialect = csv.excel + self.reader = csv.reader(f, dialect, **kwds) self.encoding = encoding + self.encoding_errors = errors def next(self): row = self.reader.next() - return [unicode(s, self.encoding) for s in row] + encoding = self.encoding + encoding_errors = self.encoding_errors + float_ = float + unicode_ = unicode + return [(value if isinstance(value, float_) else + unicode_(value, encoding, encoding_errors)) for value in row] def __iter__(self): return self + + @property + def dialect(self): + return self.reader.dialect + + @property + def line_num(self): + return self.reader.line_num reader = UnicodeReader class DictWriter(csv.DictWriter): """ >>> from cStringIO import StringIO >>> f = StringIO() - >>> w = DictWriter(f, ['a', 'b'], restval=u'î') - >>> w.writerow({'a':'1'}) - >>> w.writerow({'a':'1', 'b':u'ø'}) - >>> w.writerow({'a':u'é'}) + >>> w = DictWriter(f, ['a', u'ñ', 'b'], restval=u'î') + >>> w.writerow({'a':'1', u'ñ':'2'}) + >>> w.writerow({'a':'1', u'ñ':'2', 'b':u'ø'}) + >>> w.writerow({'a':u'é', u'ñ':'2'}) >>> f.seek(0) - >>> r = DictReader(f, fieldnames=['a'], restkey='r') - >>> r.next() == {'a':u'1', 'r':[u"î"]} + >>> r = DictReader(f, fieldnames=['a', u'ñ'], restkey='r') + >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'î']} True - >>> r.next() == {'a':u'1', 'r':[u"ø"]} + >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'\xc3\xb8']} + True + >>> r.next() == {'a': u'\xc3\xa9', u'ñ':'2', 'r': [u'\xc3\xae']} True - >>> r.next() == {'a':u'é', 'r':[u"î"]} """ - def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', *args, **kwds): - self.fieldnames = fieldnames + def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', errors='strict', *args, **kwds): self.encoding = encoding - self.restval = restval - self.writer = csv.DictWriter(csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds) - def writerow(self, d): - for fieldname in self.fieldnames: - if fieldname in d: - d[fieldname] = _stringify(d[fieldname], self.encoding) - else: - d[fieldname] = _stringify(self.restval, self.encoding) - self.writer.writerow(d) + csv.DictWriter.__init__(self, csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds) + self.writer = UnicodeWriter(csvfile, dialect, encoding=encoding, errors=errors, *args, **kwds) + self.encoding_errors = errors + + def writeheader(self): + fieldnames = _stringify_list(self.fieldnames, self.encoding, self.encoding_errors) + header = dict(zip(self.fieldnames, self.fieldnames)) + self.writerow(header) class DictReader(csv.DictReader): - def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, dialect='excel', encoding='utf-8', *args, **kwds): - self.restkey = restkey - self.encoding = encoding - self.reader = csv.DictReader(csvfile, fieldnames, restkey, restval, dialect, *args, **kwds) + """ + >>> from cStringIO import StringIO + >>> f = StringIO() + >>> w = DictWriter(f, fieldnames=['name', 'place']) + >>> w.writerow({'name': 'Cary Grant', 'place': 'hollywood'}) + >>> w.writerow({'name': 'Nathan Brillstone', 'place': u'øLand'}) + >>> w.writerow({'name': u'Willam ø. Unicoder', 'place': u'éSpandland'}) + >>> f.seek(0) + >>> r = DictReader(f, fieldnames=['name', 'place']) + >>> print r.next() == {'name': 'Cary Grant', 'place': 'hollywood'} + True + >>> print r.next() == {'name': 'Nathan Brillstone', 'place': u'øLand'} + True + >>> print r.next() == {'name': u'Willam ø. Unicoder', 'place': u'éSpandland'} + True + """ + def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, + dialect='excel', encoding='utf-8', errors='strict', *args, + **kwds): + if fieldnames is not None: + fieldnames = _stringify_list(fieldnames, encoding) + csv.DictReader.__init__(self, csvfile, fieldnames, restkey, restval, dialect, *args, **kwds) + self.reader = UnicodeReader(csvfile, dialect, encoding=encoding, + errors=errors, *args, **kwds) + if fieldnames is None and not hasattr(csv.DictReader, 'fieldnames'): + # Python 2.5 fieldnames workaround. (http://bugs.python.org/issue3436) + reader = UnicodeReader(csvfile, dialect, encoding=encoding, *args, **kwds) + self.fieldnames = _stringify_list(reader.next(), reader.encoding) + self.unicode_fieldnames = [_unicodify(f, encoding) for f in + self.fieldnames] + self.unicode_restkey = _unicodify(restkey, encoding) + def next(self): - d = self.reader.next() - for k, v in d.items(): - if k == self.restkey: - rest = v - if rest: - d[self.restkey] = [unicode(v, self.encoding) for v in rest] - else: - if v is not None: - d[k] = unicode(v, self.encoding) - return d + row = csv.DictReader.next(self) + result = dict((uni_key, row[str_key]) for (str_key, uni_key) in + izip(self.fieldnames, self.unicode_fieldnames)) + rest = row.get(self.restkey) + if rest: + result[self.unicode_restkey] = rest + return result |
