summaryrefslogtreecommitdiff
path: root/sqlparse
diff options
context:
space:
mode:
authorAndi Albrecht <albrecht.andi@gmail.com>2013-04-04 05:54:43 +0200
committerAndi Albrecht <albrecht.andi@gmail.com>2013-04-04 05:54:43 +0200
commit081b23757c1a534baf42f7c099bab471bf20fe83 (patch)
treef751d0eb5f754117bcdf6767cf2600b1f67def62 /sqlparse
parente664ae1da02f87f720878c7699cc26d0a8e9e659 (diff)
downloadsqlparse-081b23757c1a534baf42f7c099bab471bf20fe83.tar.gz
Add encoding parameter to top-level functions (fixes issue20).
Diffstat (limited to 'sqlparse')
-rw-r--r--sqlparse/__init__.py34
-rw-r--r--sqlparse/engine/__init__.py4
-rw-r--r--sqlparse/lexer.py7
3 files changed, 28 insertions, 17 deletions
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index b338350..e4de928 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -18,24 +18,26 @@ from sqlparse import formatter
from sqlparse.exceptions import SQLParseError
-def parse(sql):
+def parse(sql, encoding=None):
"""Parse sql and return a list of statements.
- *sql* is a single string containting one or more SQL statements.
-
- Returns a tuple of :class:`~sqlparse.sql.Statement` instances.
+ :param sql: A string containting one or more SQL statements.
+ :param encoding: The encoding of the statement (optional).
+ :returns: A tuple of :class:`~sqlparse.sql.Statement` instances.
"""
- return tuple(parsestream(sql))
+ return tuple(parsestream(sql, encoding))
-def parsestream(stream):
+def parsestream(stream, encoding=None):
"""Parses sql statements from file-like object.
- Returns a generator of Statement instances.
+ :param stream: A file-like object.
+ :param encoding: The encoding of the stream contents (optional).
+ :returns: A generator of :class:`~sqlparse.sql.Statement` instances.
"""
stack = engine.FilterStack()
stack.full_analyze()
- return stack.run(stream)
+ return stack.run(stream, encoding)
def format(sql, **options):
@@ -43,23 +45,29 @@ def format(sql, **options):
Available options are documented in :ref:`formatting`.
- Returns the formatted SQL statement as string.
+ In addition to the formatting options this function accepts the
+ keyword "encoding" which determines the encoding of the statement.
+
+ :returns: The formatted SQL statement as string.
"""
+ encoding = options.pop('encoding', None)
stack = engine.FilterStack()
options = formatter.validate_options(options)
stack = formatter.build_filter_stack(stack, options)
stack.postprocess.append(filters.SerializerUnicode())
- return ''.join(stack.run(sql))
+ return ''.join(stack.run(sql, encoding))
-def split(sql):
+def split(sql, encoding=None):
"""Split *sql* into single statements.
- Returns a list of strings.
+ :param sql: A string containting one or more SQL statements.
+ :param encoding: The encoding of the statement (optional).
+ :returns: A list of strings.
"""
stack = engine.FilterStack()
stack.split_statements = True
- return [unicode(stmt) for stmt in stack.run(sql)]
+ return [unicode(stmt) for stmt in stack.run(sql, encoding)]
from sqlparse.engine.filter import StatementFilter
diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py
index 3e2822b..62c82b8 100644
--- a/sqlparse/engine/__init__.py
+++ b/sqlparse/engine/__init__.py
@@ -36,8 +36,8 @@ class FilterStack(object):
def full_analyze(self):
self.enable_grouping()
- def run(self, sql):
- stream = lexer.tokenize(sql)
+ def run(self, sql, encoding=None):
+ stream = lexer.tokenize(sql, encoding)
# Process token stream
if self.preprocess:
for filter_ in self.preprocess:
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index e769d7b..4d200a6 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -224,7 +224,8 @@ class Lexer(object):
def _decode(self, text):
if sys.version_info[0] == 3:
- return text
+ if isinstance(text, str):
+ return text
if self.encoding == 'guess':
try:
text = text.decode('utf-8')
@@ -355,11 +356,13 @@ class Lexer(object):
break
-def tokenize(sql):
+def tokenize(sql, encoding=None):
"""Tokenize sql.
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
lexer = Lexer()
+ if encoding is not None:
+ lexer.encoding = encoding
return lexer.get_tokens(sql)