diff options
| author | Andi Albrecht <albrecht.andi@gmail.com> | 2013-04-04 05:54:43 +0200 |
|---|---|---|
| committer | Andi Albrecht <albrecht.andi@gmail.com> | 2013-04-04 05:54:43 +0200 |
| commit | 081b23757c1a534baf42f7c099bab471bf20fe83 (patch) | |
| tree | f751d0eb5f754117bcdf6767cf2600b1f67def62 | |
| parent | e664ae1da02f87f720878c7699cc26d0a8e9e659 (diff) | |
| download | sqlparse-081b23757c1a534baf42f7c099bab471bf20fe83.tar.gz | |
Add encoding parameter to top-level functions (fixes issue20).
| -rw-r--r-- | CHANGES | 5 | ||||
| -rw-r--r-- | docs/source/api.rst | 4 | ||||
| -rw-r--r-- | sqlparse/__init__.py | 34 | ||||
| -rw-r--r-- | sqlparse/engine/__init__.py | 4 | ||||
| -rw-r--r-- | sqlparse/lexer.py | 7 | ||||
| -rw-r--r-- | tests/files/test_cp1251.sql | 1 | ||||
| -rw-r--r-- | tests/test_regressions.py | 12 |
7 files changed, 49 insertions, 18 deletions
@@ -10,6 +10,11 @@ Bug Fixes Enhancements * Improve parsing speed when SQL contains CLOBs or BLOBs (issue86). + * Top-level API functions now accept encoding keyword to parse + statements in certain encodings more reliable (issue20). + +Other + * Documentation updates. Release 0.1.6 (Jan 01, 2013) diff --git a/docs/source/api.rst b/docs/source/api.rst index 2531c9b..99e50e2 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -12,6 +12,10 @@ The :mod:`sqlparse` module provides the following functions on module-level. .. autofunction:: sqlparse.parse +In most cases there's no need to set the `encoding` parameter. If +`encoding` is not set, sqlparse assumes that the given SQL statement +is encoded either in utf-8 or latin-1. + .. _formatting: diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index b338350..e4de928 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -18,24 +18,26 @@ from sqlparse import formatter from sqlparse.exceptions import SQLParseError -def parse(sql): +def parse(sql, encoding=None): """Parse sql and return a list of statements. - *sql* is a single string containting one or more SQL statements. - - Returns a tuple of :class:`~sqlparse.sql.Statement` instances. + :param sql: A string containting one or more SQL statements. + :param encoding: The encoding of the statement (optional). + :returns: A tuple of :class:`~sqlparse.sql.Statement` instances. """ - return tuple(parsestream(sql)) + return tuple(parsestream(sql, encoding)) -def parsestream(stream): +def parsestream(stream, encoding=None): """Parses sql statements from file-like object. - Returns a generator of Statement instances. + :param stream: A file-like object. + :param encoding: The encoding of the stream contents (optional). + :returns: A generator of :class:`~sqlparse.sql.Statement` instances. """ stack = engine.FilterStack() stack.full_analyze() - return stack.run(stream) + return stack.run(stream, encoding) def format(sql, **options): @@ -43,23 +45,29 @@ def format(sql, **options): Available options are documented in :ref:`formatting`. - Returns the formatted SQL statement as string. + In addition to the formatting options this function accepts the + keyword "encoding" which determines the encoding of the statement. + + :returns: The formatted SQL statement as string. """ + encoding = options.pop('encoding', None) stack = engine.FilterStack() options = formatter.validate_options(options) stack = formatter.build_filter_stack(stack, options) stack.postprocess.append(filters.SerializerUnicode()) - return ''.join(stack.run(sql)) + return ''.join(stack.run(sql, encoding)) -def split(sql): +def split(sql, encoding=None): """Split *sql* into single statements. - Returns a list of strings. + :param sql: A string containting one or more SQL statements. + :param encoding: The encoding of the statement (optional). + :returns: A list of strings. """ stack = engine.FilterStack() stack.split_statements = True - return [unicode(stmt) for stmt in stack.run(sql)] + return [unicode(stmt) for stmt in stack.run(sql, encoding)] from sqlparse.engine.filter import StatementFilter diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py index 3e2822b..62c82b8 100644 --- a/sqlparse/engine/__init__.py +++ b/sqlparse/engine/__init__.py @@ -36,8 +36,8 @@ class FilterStack(object): def full_analyze(self): self.enable_grouping() - def run(self, sql): - stream = lexer.tokenize(sql) + def run(self, sql, encoding=None): + stream = lexer.tokenize(sql, encoding) # Process token stream if self.preprocess: for filter_ in self.preprocess: diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index e769d7b..4d200a6 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -224,7 +224,8 @@ class Lexer(object): def _decode(self, text): if sys.version_info[0] == 3: - return text + if isinstance(text, str): + return text if self.encoding == 'guess': try: text = text.decode('utf-8') @@ -355,11 +356,13 @@ class Lexer(object): break -def tokenize(sql): +def tokenize(sql, encoding=None): """Tokenize sql. Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream of ``(token type, value)`` items. """ lexer = Lexer() + if encoding is not None: + lexer.encoding = encoding return lexer.get_tokens(sql) diff --git a/tests/files/test_cp1251.sql b/tests/files/test_cp1251.sql new file mode 100644 index 0000000..6c0228b --- /dev/null +++ b/tests/files/test_cp1251.sql @@ -0,0 +1 @@ +insert into foo values (1); -- diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 94d644f..e9d890b 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -2,7 +2,7 @@ import sys -from tests.utils import TestCaseBase +from tests.utils import TestCaseBase, load_file import sqlparse from sqlparse import sql @@ -188,3 +188,13 @@ def test_dont_alias_keywords(): assert len(p.tokens) == 5 assert p.tokens[0].ttype is T.Keyword assert p.tokens[2].ttype is T.Keyword + + +def test_format_accepts_encoding(): # issue20 + sql = load_file('test_cp1251.sql', 'cp1251') + formatted = sqlparse.format(sql, reindent=True, encoding='cp1251') + if sys.version_info < (3,): + tformatted = u'insert into foo\nvalues (1); -- Песня про надежду\n' + else: + tformatted = 'insert into foo\nvalues (1); -- Песня про надежду\n' + assert formatted == tformatted |
