From c43fccfefed0806cca52bea48d232ddf72f842cd Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 5 Jun 2016 13:10:04 -0700 Subject: Organize engine module --- sqlparse/engine/__init__.py | 44 +++--------- sqlparse/engine/filter.py | 123 ---------------------------------- sqlparse/engine/filter_stack.py | 44 ++++++++++++ sqlparse/engine/statement_splitter.py | 123 ++++++++++++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 159 deletions(-) delete mode 100644 sqlparse/engine/filter.py create mode 100644 sqlparse/engine/filter_stack.py create mode 100644 sqlparse/engine/statement_splitter.py (limited to 'sqlparse') diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py index 7f00c57..2c3599e 100644 --- a/sqlparse/engine/__init__.py +++ b/sqlparse/engine/__init__.py @@ -5,40 +5,12 @@ # This module is part of python-sqlparse and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -"""filter""" - -from sqlparse import lexer from sqlparse.engine import grouping -from sqlparse.engine.filter import StatementFilter - - -class FilterStack(object): - def __init__(self): - self.preprocess = [] - self.stmtprocess = [] - self.postprocess = [] - self._grouping = False - - def enable_grouping(self): - self._grouping = True - - def run(self, sql, encoding=None): - stream = lexer.tokenize(sql, encoding) - # Process token stream - for filter_ in self.preprocess: - stream = filter_.process(stream) - - stream = StatementFilter().process(stream) - - # Output: Stream processed Statements - for stmt in stream: - if self._grouping: - stmt = grouping.group(stmt) - - for filter_ in self.stmtprocess: - filter_.process(stmt) - - for filter_ in self.postprocess: - stmt = filter_.process(stmt) - - yield stmt +from sqlparse.engine.filter_stack import FilterStack +from sqlparse.engine.statement_splitter import StatementSplitter + +__all__ = [ + 'grouping', + 'FilterStack', + 'StatementSplitter', +] diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py deleted file mode 100644 index ea2033a..0000000 --- a/sqlparse/engine/filter.py +++ /dev/null @@ -1,123 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com -# -# This module is part of python-sqlparse and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from sqlparse import sql, tokens as T - - -class StatementFilter(object): - """Filter that split stream at individual statements""" - - def __init__(self): - self._reset() - - def _reset(self): - """Set the filter attributes to its default values""" - self._in_declare = False - self._in_dbldollar = False - self._is_create = False - self._begin_depth = 0 - - self.consume_ws = False - self.tokens = [] - self.level = 0 - - def _change_splitlevel(self, ttype, value): - """Get the new split level (increase, decrease or remain equal)""" - # PostgreSQL - if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$': - - # 2nd dbldollar found. $quote$ completed - # decrease level - if self._in_dbldollar: - self._in_dbldollar = False - return -1 - else: - self._in_dbldollar = True - return 1 - - # if inside $$ everything inside is defining function character. - # Nothing inside can create a new statement - elif self._in_dbldollar: - return 0 - - # ANSI - # if normal token return - # wouldn't parenthesis increase/decrease a level? - # no, inside a paranthesis can't start new statement - if ttype not in T.Keyword: - return 0 - - # Everything after here is ttype = T.Keyword - # Also to note, once entered an If statement you are done and basically - # returning - unified = value.upper() - - # three keywords begin with CREATE, but only one of them is DDL - # DDL Create though can contain more words such as "or replace" - if ttype is T.Keyword.DDL and unified.startswith('CREATE'): - self._is_create = True - return 0 - - # can have nested declare inside of being... - if unified == 'DECLARE' and self._is_create and self._begin_depth == 0: - self._in_declare = True - return 1 - - if unified == 'BEGIN': - self._begin_depth += 1 - if self._is_create: - # FIXME(andi): This makes no sense. - return 1 - return 0 - - # Should this respect a preceeding BEGIN? - # In CASE ... WHEN ... END this results in a split level -1. - # Would having multiple CASE WHEN END and a Assigment Operator - # cause the statement to cut off prematurely? - if unified == 'END': - self._begin_depth = max(0, self._begin_depth - 1) - return -1 - - if (unified in ('IF', 'FOR', 'WHILE') and - self._is_create and self._begin_depth > 0): - return 1 - - if unified in ('END IF', 'END FOR', 'END WHILE'): - return -1 - - # Default - return 0 - - def process(self, stream): - """Process the stream""" - EOS_TTYPE = T.Whitespace, T.Comment.Single - - # Run over all stream tokens - for ttype, value in stream: - # Yield token if we finished a statement and there's no whitespaces - # It will count newline token as a non whitespace. In this context - # whitespace ignores newlines. - # why don't multi line comments also count? - if self.consume_ws and ttype not in EOS_TTYPE: - yield sql.Statement(self.tokens) - - # Reset filter and prepare to process next statement - self._reset() - - # Change current split level (increase, decrease or remain equal) - self.level += self._change_splitlevel(ttype, value) - - # Append the token to the current statement - self.tokens.append(sql.Token(ttype, value)) - - # Check if we get the end of a statement - if self.level <= 0 and ttype is T.Punctuation and value == ';': - self.consume_ws = True - - # Yield pending statement (if any) - if self.tokens: - yield sql.Statement(self.tokens) diff --git a/sqlparse/engine/filter_stack.py b/sqlparse/engine/filter_stack.py new file mode 100644 index 0000000..e51c6f2 --- /dev/null +++ b/sqlparse/engine/filter_stack.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +"""filter""" + +from sqlparse import lexer +from sqlparse.engine import grouping +from sqlparse.engine.statement_splitter import StatementSplitter + + +class FilterStack(object): + def __init__(self): + self.preprocess = [] + self.stmtprocess = [] + self.postprocess = [] + self._grouping = False + + def enable_grouping(self): + self._grouping = True + + def run(self, sql, encoding=None): + stream = lexer.tokenize(sql, encoding) + # Process token stream + for filter_ in self.preprocess: + stream = filter_.process(stream) + + stream = StatementSplitter().process(stream) + + # Output: Stream processed Statements + for stmt in stream: + if self._grouping: + stmt = grouping.group(stmt) + + for filter_ in self.stmtprocess: + filter_.process(stmt) + + for filter_ in self.postprocess: + stmt = filter_.process(stmt) + + yield stmt diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py new file mode 100644 index 0000000..1d1d842 --- /dev/null +++ b/sqlparse/engine/statement_splitter.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from sqlparse import sql, tokens as T + + +class StatementSplitter(object): + """Filter that split stream at individual statements""" + + def __init__(self): + self._reset() + + def _reset(self): + """Set the filter attributes to its default values""" + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + self._begin_depth = 0 + + self.consume_ws = False + self.tokens = [] + self.level = 0 + + def _change_splitlevel(self, ttype, value): + """Get the new split level (increase, decrease or remain equal)""" + # PostgreSQL + if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$': + + # 2nd dbldollar found. $quote$ completed + # decrease level + if self._in_dbldollar: + self._in_dbldollar = False + return -1 + else: + self._in_dbldollar = True + return 1 + + # if inside $$ everything inside is defining function character. + # Nothing inside can create a new statement + elif self._in_dbldollar: + return 0 + + # ANSI + # if normal token return + # wouldn't parenthesis increase/decrease a level? + # no, inside a paranthesis can't start new statement + if ttype not in T.Keyword: + return 0 + + # Everything after here is ttype = T.Keyword + # Also to note, once entered an If statement you are done and basically + # returning + unified = value.upper() + + # three keywords begin with CREATE, but only one of them is DDL + # DDL Create though can contain more words such as "or replace" + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + return 0 + + # can have nested declare inside of being... + if unified == 'DECLARE' and self._is_create and self._begin_depth == 0: + self._in_declare = True + return 1 + + if unified == 'BEGIN': + self._begin_depth += 1 + if self._is_create: + # FIXME(andi): This makes no sense. + return 1 + return 0 + + # Should this respect a preceeding BEGIN? + # In CASE ... WHEN ... END this results in a split level -1. + # Would having multiple CASE WHEN END and a Assigment Operator + # cause the statement to cut off prematurely? + if unified == 'END': + self._begin_depth = max(0, self._begin_depth - 1) + return -1 + + if (unified in ('IF', 'FOR', 'WHILE') and + self._is_create and self._begin_depth > 0): + return 1 + + if unified in ('END IF', 'END FOR', 'END WHILE'): + return -1 + + # Default + return 0 + + def process(self, stream): + """Process the stream""" + EOS_TTYPE = T.Whitespace, T.Comment.Single + + # Run over all stream tokens + for ttype, value in stream: + # Yield token if we finished a statement and there's no whitespaces + # It will count newline token as a non whitespace. In this context + # whitespace ignores newlines. + # why don't multi line comments also count? + if self.consume_ws and ttype not in EOS_TTYPE: + yield sql.Statement(self.tokens) + + # Reset filter and prepare to process next statement + self._reset() + + # Change current split level (increase, decrease or remain equal) + self.level += self._change_splitlevel(ttype, value) + + # Append the token to the current statement + self.tokens.append(sql.Token(ttype, value)) + + # Check if we get the end of a statement + if self.level <= 0 and ttype is T.Punctuation and value == ';': + self.consume_ws = True + + # Yield pending statement (if any) + if self.tokens: + yield sql.Statement(self.tokens) -- cgit v1.2.1