summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sqlparse/lexer.py15
-rw-r--r--tests/test_split.py5
2 files changed, 9 insertions, 11 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index dd15212..0fb8936 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -14,7 +14,7 @@
from sqlparse import tokens
from sqlparse.keywords import SQL_REGEX
-from sqlparse.compat import StringIO, string_types, text_type
+from sqlparse.compat import StringIO, string_types, u
from sqlparse.utils import consume
@@ -37,17 +37,10 @@ class Lexer(object):
``stack`` is the inital stack (default: ``['root']``)
"""
- encoding = encoding or 'utf-8'
-
if isinstance(text, string_types):
- text = StringIO(text)
-
- text = text.read()
- if not isinstance(text, text_type):
- try:
- text = text.decode(encoding)
- except UnicodeDecodeError:
- text = text.decode('unicode-escape')
+ text = u(text, encoding)
+ elif isinstance(text, StringIO):
+ text = u(text.read(), encoding)
iterable = enumerate(text)
for pos, char in iterable:
diff --git a/tests/test_split.py b/tests/test_split.py
index f6d5f50..7c2645d 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -133,6 +133,11 @@ class SQLSplitTest(TestCaseBase):
stmts = list(sqlparse.parsestream(stream))
self.assertEqual(type(stmts[0].tokens[0].value), text_type)
+ def test_unicode_parsestream(self):
+ stream = StringIO(u"SELECT ö")
+ stmts = list(sqlparse.parsestream(stream))
+ self.assertEqual(str(stmts[0]), "SELECT ö")
+
def test_split_simple():
stmts = sqlparse.split('select * from foo; select * from bar;')