summaryrefslogtreecommitdiff
path: root/sqlparse
diff options
context:
space:
mode:
authorOleg Broytman <phd@phdru.name>2016-08-31 16:10:35 +0300
committerOleg Broytman <phd@phdru.name>2016-08-31 16:11:22 +0300
commit843499915e91e0ee324a0407c78ac6f570806370 (patch)
treecda75d04543a02e20bf04ab01c8dfc5e670a269d /sqlparse
parentb05bc5ab586cb06d89c38e2eee7f77e1d3fc03c5 (diff)
downloadsqlparse-843499915e91e0ee324a0407c78ac6f570806370.tar.gz
Decode bytes to unicode in Lexer.get_tokens().
Raise TypeError if the input is neither bytes in a known encoding nor unicode nor a file-like object (file, StringIO). Remove function u(). Add bytes_type to compat. Add tests for non-ascii.
Diffstat (limited to 'sqlparse')
-rw-r--r--sqlparse/compat.py16
-rw-r--r--sqlparse/lexer.py21
2 files changed, 18 insertions, 19 deletions
diff --git a/sqlparse/compat.py b/sqlparse/compat.py
index d6a9144..933e0be 100644
--- a/sqlparse/compat.py
+++ b/sqlparse/compat.py
@@ -23,14 +23,10 @@ PY3 = sys.version_info[0] == 3
if PY3:
- def u(s, encoding=None):
- return str(s)
-
-
def unicode_compatible(cls):
return cls
-
+ bytes_type = bytes
text_type = str
string_types = (str,)
from io import StringIO
@@ -38,20 +34,12 @@ if PY3:
elif PY2:
- def u(s, encoding=None):
- encoding = encoding or 'unicode-escape'
- try:
- return unicode(s)
- except UnicodeDecodeError:
- return unicode(s, encoding)
-
-
def unicode_compatible(cls):
cls.__unicode__ = cls.__str__
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
return cls
-
+ bytes_type = str
text_type = unicode
string_types = (str, unicode,)
from StringIO import StringIO
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index e7996b2..15a9aef 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -14,7 +14,7 @@
from sqlparse import tokens
from sqlparse.keywords import SQL_REGEX
-from sqlparse.compat import file_types, string_types, u
+from sqlparse.compat import bytes_type, text_type, file_types
from sqlparse.utils import consume
@@ -37,10 +37,21 @@ class Lexer(object):
``stack`` is the inital stack (default: ``['root']``)
"""
- if isinstance(text, string_types):
- text = u(text, encoding)
- elif isinstance(text, file_types):
- text = u(text.read(), encoding)
+ if isinstance(text, file_types):
+ text = text.read()
+
+ if isinstance(text, text_type):
+ pass
+ elif isinstance(text, bytes_type):
+ try:
+ text = text.decode()
+ except UnicodeDecodeError:
+ if not encoding:
+ encoding = 'unicode-escape'
+ text = text.decode(encoding)
+ else:
+ raise TypeError(u"Expected text or file-like object, got {!r}".
+ format(type(text)))
iterable = enumerate(text)
for pos, char in iterable: