diff options
| author | Oleg Broytman <phd@phdru.name> | 2016-08-31 16:10:35 +0300 |
|---|---|---|
| committer | Oleg Broytman <phd@phdru.name> | 2016-08-31 16:11:22 +0300 |
| commit | 843499915e91e0ee324a0407c78ac6f570806370 (patch) | |
| tree | cda75d04543a02e20bf04ab01c8dfc5e670a269d /sqlparse | |
| parent | b05bc5ab586cb06d89c38e2eee7f77e1d3fc03c5 (diff) | |
| download | sqlparse-843499915e91e0ee324a0407c78ac6f570806370.tar.gz | |
Decode bytes to unicode in Lexer.get_tokens().
Raise TypeError if the input is neither bytes in a known encoding nor
unicode nor a file-like object (file, StringIO).
Remove function u(). Add bytes_type to compat. Add tests for non-ascii.
Diffstat (limited to 'sqlparse')
| -rw-r--r-- | sqlparse/compat.py | 16 | ||||
| -rw-r--r-- | sqlparse/lexer.py | 21 |
2 files changed, 18 insertions, 19 deletions
diff --git a/sqlparse/compat.py b/sqlparse/compat.py index d6a9144..933e0be 100644 --- a/sqlparse/compat.py +++ b/sqlparse/compat.py @@ -23,14 +23,10 @@ PY3 = sys.version_info[0] == 3 if PY3: - def u(s, encoding=None): - return str(s) - - def unicode_compatible(cls): return cls - + bytes_type = bytes text_type = str string_types = (str,) from io import StringIO @@ -38,20 +34,12 @@ if PY3: elif PY2: - def u(s, encoding=None): - encoding = encoding or 'unicode-escape' - try: - return unicode(s) - except UnicodeDecodeError: - return unicode(s, encoding) - - def unicode_compatible(cls): cls.__unicode__ = cls.__str__ cls.__str__ = lambda x: x.__unicode__().encode('utf-8') return cls - + bytes_type = str text_type = unicode string_types = (str, unicode,) from StringIO import StringIO diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index e7996b2..15a9aef 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -14,7 +14,7 @@ from sqlparse import tokens from sqlparse.keywords import SQL_REGEX -from sqlparse.compat import file_types, string_types, u +from sqlparse.compat import bytes_type, text_type, file_types from sqlparse.utils import consume @@ -37,10 +37,21 @@ class Lexer(object): ``stack`` is the inital stack (default: ``['root']``) """ - if isinstance(text, string_types): - text = u(text, encoding) - elif isinstance(text, file_types): - text = u(text.read(), encoding) + if isinstance(text, file_types): + text = text.read() + + if isinstance(text, text_type): + pass + elif isinstance(text, bytes_type): + try: + text = text.decode() + except UnicodeDecodeError: + if not encoding: + encoding = 'unicode-escape' + text = text.decode(encoding) + else: + raise TypeError(u"Expected text or file-like object, got {!r}". + format(type(text))) iterable = enumerate(text) for pos, char in iterable: |
