Decode bytes to unicode in Lexer.get_tokens().

Raise TypeError if the input is neither bytes in a known encoding nor unicode nor a file-like object (file, StringIO). Remove function u(). Add bytes_type to compat. Add tests for non-ascii.
author: Oleg Broytman <phd@phdru.name> 2016-08-31 16:10:35 +0300
committer: Oleg Broytman <phd@phdru.name> 2016-08-31 16:11:22 +0300
commit: 843499915e91e0ee324a0407c78ac6f570806370 (patch)
tree: cda75d04543a02e20bf04ab01c8dfc5e670a269d /tests/test_parse.py
parent: b05bc5ab586cb06d89c38e2eee7f77e1d3fc03c5 (diff)
download: sqlparse-843499915e91e0ee324a0407c78ac6f570806370.tar.gz
1 files changed, 19 insertions, 1 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 8dd1150..0632889 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -6,7 +6,7 @@ import pytest
 
 import sqlparse
 from sqlparse import sql, tokens as T
-from sqlparse.compat import StringIO
+from sqlparse.compat import StringIO, text_type
 
 
 def test_parse_tokenize():
@@ -403,3 +403,21 @@ def test_dbldollar_as_literal(sql, is_literal):
     else:
         for token in p.tokens:
             assert token.ttype != T.Literal
+
+
+def test_non_ascii():
+    _test_non_ascii = u"insert into test (id, name) values (1, 'тест');"
+
+    s = _test_non_ascii
+    stmts = sqlparse.parse(s)
+    assert len(stmts) == 1
+    statement = stmts[0]
+    assert text_type(statement) == s
+    assert statement._pprint_tree() is None
+
+    s = _test_non_ascii.encode('utf-8')
+    stmts = sqlparse.parse(s, 'utf-8')
+    assert len(stmts) == 1
+    statement = stmts[0]
+    assert text_type(statement) == _test_non_ascii
+    assert statement._pprint_tree() is None
author	Oleg Broytman <phd@phdru.name>	2016-08-31 16:10:35 +0300
committer	Oleg Broytman <phd@phdru.name>	2016-08-31 16:11:22 +0300
commit	843499915e91e0ee324a0407c78ac6f570806370 (patch)
tree	cda75d04543a02e20bf04ab01c8dfc5e670a269d /tests/test_parse.py
parent	b05bc5ab586cb06d89c38e2eee7f77e1d3fc03c5 (diff)
download	sqlparse-843499915e91e0ee324a0407c78ac6f570806370.tar.gz