summaryrefslogtreecommitdiff
path: root/tablib/packages
diff options
context:
space:
mode:
authorGavin Wahl <gwahl@fusionbox.com>2014-06-24 15:22:12 -0600
committerGavin Wahl <gwahl@fusionbox.com>2014-06-24 15:22:12 -0600
commit7a2842a8af28eeb1be9fa25da008b2070a99c8ed (patch)
treedb71526abe4c7dcf2d7beb9393d80ced837f8185 /tablib/packages
parent7acaa8460dc69f172dee4db6e0492af1db86492e (diff)
downloadtablib-7a2842a8af28eeb1be9fa25da008b2070a99c8ed.tar.gz
Update the vendored unicodecsv to fix None handling
The old version of unicodecsv incorrectly (according https://docs.python.org/2/library/csv.html#csv.writer) encoding None values as the string 'None', instead of the string '' as the python documentation specifies. The newest version of unicodecsv has fixed this. Fixes #121
Diffstat (limited to 'tablib/packages')
-rw-r--r--tablib/packages/unicodecsv/__init__.py194
1 files changed, 143 insertions, 51 deletions
diff --git a/tablib/packages/unicodecsv/__init__.py b/tablib/packages/unicodecsv/__init__.py
index e640987..6a20118 100644
--- a/tablib/packages/unicodecsv/__init__.py
+++ b/tablib/packages/unicodecsv/__init__.py
@@ -1,22 +1,65 @@
# -*- coding: utf-8 -*-
import csv
-from csv import *
+try:
+ from itertools import izip
+except ImportError:
+ izip = zip
#http://semver.org/
-VERSION = (0, 8, 0)
+VERSION = (0, 10, 1)
__version__ = ".".join(map(str,VERSION))
-def _stringify(s, encoding):
- if type(s)==unicode:
- return s.encode(encoding)
+pass_throughs = [
+ 'register_dialect',
+ 'unregister_dialect',
+ 'get_dialect',
+ 'list_dialects',
+ 'field_size_limit',
+ 'Dialect',
+ 'excel',
+ 'excel_tab',
+ 'Sniffer',
+ 'QUOTE_ALL',
+ 'QUOTE_MINIMAL',
+ 'QUOTE_NONNUMERIC',
+ 'QUOTE_NONE',
+ 'Error'
+]
+__all__ = [
+ 'reader',
+ 'writer',
+ 'DictReader',
+ 'DictWriter',
+] + pass_throughs
+
+for prop in pass_throughs:
+ globals()[prop]=getattr(csv, prop)
+
+def _stringify(s, encoding, errors):
+ if s is None:
+ return ''
+ if isinstance(s, unicode):
+ return s.encode(encoding, errors)
elif isinstance(s, (int , float)):
pass #let csv.QUOTE_NONNUMERIC do its thing.
- elif type(s) != str:
+ elif not isinstance(s, str):
s=str(s)
return s
-def _stringify_list(l, encoding):
- return [_stringify(s, encoding) for s in l]
+def _stringify_list(l, encoding, errors='strict'):
+ try:
+ return [_stringify(s, encoding, errors) for s in iter(l)]
+ except TypeError as e:
+ raise csv.Error(str(e))
+
+def _unicodify(s, encoding):
+ if s is None:
+ return None
+ if isinstance(s, (unicode, int, float)):
+ return s
+ elif isinstance(s, str):
+ return s.decode(encoding)
+ return s
class UnicodeWriter(object):
"""
@@ -28,78 +71,127 @@ class UnicodeWriter(object):
>>> f.seek(0)
>>> r = unicodecsv.reader(f, encoding='utf-8')
>>> row = r.next()
- >>> print row[0], row[1]
- é ñ
+ >>> row[0] == u'é'
+ True
+ >>> row[1] == u'ñ'
+ True
"""
- def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
- self.writer = csv.writer(f)
- self.dialect = dialect
+ def __init__(self, f, dialect=csv.excel, encoding='utf-8', errors='strict',
+ *args, **kwds):
self.encoding = encoding
- self.writer = csv.writer(f, dialect=dialect, **kwds)
+ self.writer = csv.writer(f, dialect, *args, **kwds)
+ self.encoding_errors = errors
def writerow(self, row):
- self.writer.writerow(_stringify_list(row, self.encoding))
+ self.writer.writerow(_stringify_list(row, self.encoding, self.encoding_errors))
def writerows(self, rows):
for row in rows:
self.writerow(row)
+
+ @property
+ def dialect(self):
+ return self.writer.dialect
writer = UnicodeWriter
class UnicodeReader(object):
- def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
- self.reader = csv.reader(f, dialect=dialect, **kwds)
+ def __init__(self, f, dialect=None, encoding='utf-8', errors='strict',
+ **kwds):
+ format_params = ['delimiter', 'doublequote', 'escapechar', 'lineterminator', 'quotechar', 'quoting', 'skipinitialspace']
+ if dialect is None:
+ if not any([kwd_name in format_params for kwd_name in kwds.keys()]):
+ dialect = csv.excel
+ self.reader = csv.reader(f, dialect, **kwds)
self.encoding = encoding
+ self.encoding_errors = errors
def next(self):
row = self.reader.next()
- return [unicode(s, self.encoding) for s in row]
+ encoding = self.encoding
+ encoding_errors = self.encoding_errors
+ float_ = float
+ unicode_ = unicode
+ return [(value if isinstance(value, float_) else
+ unicode_(value, encoding, encoding_errors)) for value in row]
def __iter__(self):
return self
+
+ @property
+ def dialect(self):
+ return self.reader.dialect
+
+ @property
+ def line_num(self):
+ return self.reader.line_num
reader = UnicodeReader
class DictWriter(csv.DictWriter):
"""
>>> from cStringIO import StringIO
>>> f = StringIO()
- >>> w = DictWriter(f, ['a', 'b'], restval=u'î')
- >>> w.writerow({'a':'1'})
- >>> w.writerow({'a':'1', 'b':u'ø'})
- >>> w.writerow({'a':u'é'})
+ >>> w = DictWriter(f, ['a', u'ñ', 'b'], restval=u'î')
+ >>> w.writerow({'a':'1', u'ñ':'2'})
+ >>> w.writerow({'a':'1', u'ñ':'2', 'b':u'ø'})
+ >>> w.writerow({'a':u'é', u'ñ':'2'})
>>> f.seek(0)
- >>> r = DictReader(f, fieldnames=['a'], restkey='r')
- >>> r.next() == {'a':u'1', 'r':[u"î"]}
+ >>> r = DictReader(f, fieldnames=['a', u'ñ'], restkey='r')
+ >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'î']}
True
- >>> r.next() == {'a':u'1', 'r':[u"ø"]}
+ >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'\xc3\xb8']}
+ True
+ >>> r.next() == {'a': u'\xc3\xa9', u'ñ':'2', 'r': [u'\xc3\xae']}
True
- >>> r.next() == {'a':u'é', 'r':[u"î"]}
"""
- def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', *args, **kwds):
- self.fieldnames = fieldnames
+ def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', errors='strict', *args, **kwds):
self.encoding = encoding
- self.restval = restval
- self.writer = csv.DictWriter(csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds)
- def writerow(self, d):
- for fieldname in self.fieldnames:
- if fieldname in d:
- d[fieldname] = _stringify(d[fieldname], self.encoding)
- else:
- d[fieldname] = _stringify(self.restval, self.encoding)
- self.writer.writerow(d)
+ csv.DictWriter.__init__(self, csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds)
+ self.writer = UnicodeWriter(csvfile, dialect, encoding=encoding, errors=errors, *args, **kwds)
+ self.encoding_errors = errors
+
+ def writeheader(self):
+ fieldnames = _stringify_list(self.fieldnames, self.encoding, self.encoding_errors)
+ header = dict(zip(self.fieldnames, self.fieldnames))
+ self.writerow(header)
class DictReader(csv.DictReader):
- def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, dialect='excel', encoding='utf-8', *args, **kwds):
- self.restkey = restkey
- self.encoding = encoding
- self.reader = csv.DictReader(csvfile, fieldnames, restkey, restval, dialect, *args, **kwds)
+ """
+ >>> from cStringIO import StringIO
+ >>> f = StringIO()
+ >>> w = DictWriter(f, fieldnames=['name', 'place'])
+ >>> w.writerow({'name': 'Cary Grant', 'place': 'hollywood'})
+ >>> w.writerow({'name': 'Nathan Brillstone', 'place': u'øLand'})
+ >>> w.writerow({'name': u'Willam ø. Unicoder', 'place': u'éSpandland'})
+ >>> f.seek(0)
+ >>> r = DictReader(f, fieldnames=['name', 'place'])
+ >>> print r.next() == {'name': 'Cary Grant', 'place': 'hollywood'}
+ True
+ >>> print r.next() == {'name': 'Nathan Brillstone', 'place': u'øLand'}
+ True
+ >>> print r.next() == {'name': u'Willam ø. Unicoder', 'place': u'éSpandland'}
+ True
+ """
+ def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None,
+ dialect='excel', encoding='utf-8', errors='strict', *args,
+ **kwds):
+ if fieldnames is not None:
+ fieldnames = _stringify_list(fieldnames, encoding)
+ csv.DictReader.__init__(self, csvfile, fieldnames, restkey, restval, dialect, *args, **kwds)
+ self.reader = UnicodeReader(csvfile, dialect, encoding=encoding,
+ errors=errors, *args, **kwds)
+ if fieldnames is None and not hasattr(csv.DictReader, 'fieldnames'):
+ # Python 2.5 fieldnames workaround. (http://bugs.python.org/issue3436)
+ reader = UnicodeReader(csvfile, dialect, encoding=encoding, *args, **kwds)
+ self.fieldnames = _stringify_list(reader.next(), reader.encoding)
+ self.unicode_fieldnames = [_unicodify(f, encoding) for f in
+ self.fieldnames]
+ self.unicode_restkey = _unicodify(restkey, encoding)
+
def next(self):
- d = self.reader.next()
- for k, v in d.items():
- if k == self.restkey:
- rest = v
- if rest:
- d[self.restkey] = [unicode(v, self.encoding) for v in rest]
- else:
- if v is not None:
- d[k] = unicode(v, self.encoding)
- return d
+ row = csv.DictReader.next(self)
+ result = dict((uni_key, row[str_key]) for (str_key, uni_key) in
+ izip(self.fieldnames, self.unicode_fieldnames))
+ rest = row.get(self.restkey)
+ if rest:
+ result[self.unicode_restkey] = rest
+ return result