diff options
| -rw-r--r-- | sqlparse/lexer.py | 15 | ||||
| -rw-r--r-- | tests/test_split.py | 5 |
2 files changed, 9 insertions, 11 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index dd15212..0fb8936 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -14,7 +14,7 @@ from sqlparse import tokens from sqlparse.keywords import SQL_REGEX -from sqlparse.compat import StringIO, string_types, text_type +from sqlparse.compat import StringIO, string_types, u from sqlparse.utils import consume @@ -37,17 +37,10 @@ class Lexer(object): ``stack`` is the inital stack (default: ``['root']``) """ - encoding = encoding or 'utf-8' - if isinstance(text, string_types): - text = StringIO(text) - - text = text.read() - if not isinstance(text, text_type): - try: - text = text.decode(encoding) - except UnicodeDecodeError: - text = text.decode('unicode-escape') + text = u(text, encoding) + elif isinstance(text, StringIO): + text = u(text.read(), encoding) iterable = enumerate(text) for pos, char in iterable: diff --git a/tests/test_split.py b/tests/test_split.py index f6d5f50..7c2645d 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -133,6 +133,11 @@ class SQLSplitTest(TestCaseBase): stmts = list(sqlparse.parsestream(stream)) self.assertEqual(type(stmts[0].tokens[0].value), text_type) + def test_unicode_parsestream(self): + stream = StringIO(u"SELECT ö") + stmts = list(sqlparse.parsestream(stream)) + self.assertEqual(str(stmts[0]), "SELECT ö") + def test_split_simple(): stmts = sqlparse.split('select * from foo; select * from bar;') |
