summaryrefslogtreecommitdiff
path: root/tests/test_parse.py
diff options
context:
space:
mode:
authorOleg Broytman <phd@phdru.name>2016-08-31 16:10:35 +0300
committerOleg Broytman <phd@phdru.name>2016-08-31 16:11:22 +0300
commit843499915e91e0ee324a0407c78ac6f570806370 (patch)
treecda75d04543a02e20bf04ab01c8dfc5e670a269d /tests/test_parse.py
parentb05bc5ab586cb06d89c38e2eee7f77e1d3fc03c5 (diff)
downloadsqlparse-843499915e91e0ee324a0407c78ac6f570806370.tar.gz
Decode bytes to unicode in Lexer.get_tokens().
Raise TypeError if the input is neither bytes in a known encoding nor unicode nor a file-like object (file, StringIO). Remove function u(). Add bytes_type to compat. Add tests for non-ascii.
Diffstat (limited to 'tests/test_parse.py')
-rw-r--r--tests/test_parse.py20
1 files changed, 19 insertions, 1 deletions
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 8dd1150..0632889 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -6,7 +6,7 @@ import pytest
import sqlparse
from sqlparse import sql, tokens as T
-from sqlparse.compat import StringIO
+from sqlparse.compat import StringIO, text_type
def test_parse_tokenize():
@@ -403,3 +403,21 @@ def test_dbldollar_as_literal(sql, is_literal):
else:
for token in p.tokens:
assert token.ttype != T.Literal
+
+
+def test_non_ascii():
+ _test_non_ascii = u"insert into test (id, name) values (1, 'ั‚ะตัั‚');"
+
+ s = _test_non_ascii
+ stmts = sqlparse.parse(s)
+ assert len(stmts) == 1
+ statement = stmts[0]
+ assert text_type(statement) == s
+ assert statement._pprint_tree() is None
+
+ s = _test_non_ascii.encode('utf-8')
+ stmts = sqlparse.parse(s, 'utf-8')
+ assert len(stmts) == 1
+ statement = stmts[0]
+ assert text_type(statement) == _test_non_ascii
+ assert statement._pprint_tree() is None