From 62423c0d5e2e570341d5d0db74982712ff2348c7 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Thu, 2 Jun 2016 14:09:21 -0700 Subject: Remove undocumented features These features/function/classes were added for AntiORM. Quick look-up didn't show any usage outside of AntiORM. Closes #246 --- sqlparse/__init__.py | 6 -- sqlparse/filters.py | 222 -------------------------------------------------- sqlparse/functions.py | 44 ---------- sqlparse/pipeline.py | 31 ------- sqlparse/utils.py | 71 +--------------- 5 files changed, 1 insertion(+), 373 deletions(-) delete mode 100644 sqlparse/functions.py delete mode 100644 sqlparse/pipeline.py (limited to 'sqlparse') diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 2943997..d69a3d9 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -68,9 +68,3 @@ def split(sql, encoding=None): stack = engine.FilterStack() stack.split_statements = True return [u(stmt).strip() for stmt in stack.run(sql, encoding)] - - -def split2(stream): - from sqlparse.engine.filter import StatementFilter - splitter = StatementFilter() - return list(splitter.process(None, stream)) diff --git a/sqlparse/filters.py b/sqlparse/filters.py index 1cb2f16..ccf8735 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -7,15 +7,8 @@ import re -from os.path import abspath, join - from sqlparse import sql, tokens as T from sqlparse.compat import u, text_type -from sqlparse.engine import FilterStack -from sqlparse.pipeline import Pipeline -from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation, - String, Whitespace) -from sqlparse.utils import memoize_generator from sqlparse.utils import split_unquoted_newlines @@ -74,130 +67,6 @@ class TruncateStringFilter(object): yield ttype, value -class GetComments(object): - """Get the comments from a stack""" - def process(self, stack, stream): - for token_type, value in stream: - if token_type in Comment: - yield token_type, value - - -class StripComments(object): - """Strip the comments from a stack""" - def process(self, stack, stream): - for token_type, value in stream: - if token_type not in Comment: - yield token_type, value - - -def StripWhitespace(stream): - "Strip the useless whitespaces from a stream leaving only the minimal ones" - last_type = None - has_space = False - ignore_group = frozenset((Comparison, Punctuation)) - - for token_type, value in stream: - # We got a previous token (not empty first ones) - if last_type: - if token_type in Whitespace: - has_space = True - continue - - # Ignore first empty spaces and dot-commas - elif token_type in (Whitespace, Whitespace.Newline, ignore_group): - continue - - # Yield a whitespace if it can't be ignored - if has_space: - if not ignore_group.intersection((last_type, token_type)): - yield Whitespace, ' ' - has_space = False - - # Yield the token and set its type for checking with the next one - yield token_type, value - last_type = token_type - - -class IncludeStatement(object): - """Filter that enable a INCLUDE statement""" - - def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False): - if maxrecursive <= 0: - raise ValueError('Max recursion limit reached') - - self.dirpath = abspath(dirpath) - self.maxRecursive = maxrecursive - self.raiseexceptions = raiseexceptions - - self.detected = False - - @memoize_generator - def process(self, stack, stream): - # Run over all tokens in the stream - for token_type, value in stream: - # INCLUDE statement found, set detected mode - if token_type in Name and value.upper() == 'INCLUDE': - self.detected = True - continue - - # INCLUDE statement was found, parse it - elif self.detected: - # Omit whitespaces - if token_type in Whitespace: - continue - - # Found file path to include - if token_type in String.Symbol: - # Get path of file to include - path = join(self.dirpath, value[1:-1]) - - try: - f = open(path) - raw_sql = f.read() - f.close() - - # There was a problem loading the include file - except IOError as err: - # Raise the exception to the interpreter - if self.raiseexceptions: - raise - - # Put the exception as a comment on the SQL code - yield Comment, u'-- IOError: %s\n' % err - - else: - # Create new FilterStack to parse readed file - # and add all its tokens to the main stack recursively - try: - filtr = IncludeStatement(self.dirpath, - self.maxRecursive - 1, - self.raiseexceptions) - - # Max recursion limit reached - except ValueError as err: - # Raise the exception to the interpreter - if self.raiseexceptions: - raise - - # Put the exception as a comment on the SQL code - yield Comment, u'-- ValueError: %s\n' % err - - stack = FilterStack() - stack.preprocess.append(filtr) - - for tv in stack.run(raw_sql): - yield tv - - # Set normal mode - self.detected = False - - # Don't include any token while in detected mode - continue - - # Normal token - yield token_type, value - - # ---------------------- # statement process @@ -520,57 +389,6 @@ class RightMarginFilter(object): group.tokens = self._process(stack, group, group.tokens) -class ColumnsSelect(object): - """Get the columns names of a SELECT query""" - def process(self, stack, stream): - mode = 0 - oldValue = "" - parenthesis = 0 - - for token_type, value in stream: - # Ignore comments - if token_type in Comment: - continue - - # We have not detected a SELECT statement - if mode == 0: - if token_type in Keyword and value == 'SELECT': - mode = 1 - - # We have detected a SELECT statement - elif mode == 1: - if value == 'FROM': - if oldValue: - yield oldValue - - mode = 3 # Columns have been checked - - elif value == 'AS': - oldValue = "" - mode = 2 - - elif (token_type == Punctuation - and value == ',' and not parenthesis): - if oldValue: - yield oldValue - oldValue = "" - - elif token_type not in Whitespace: - if value == '(': - parenthesis += 1 - elif value == ')': - parenthesis -= 1 - - oldValue += value - - # We are processing an AS keyword - elif mode == 2: - # We check also for Keywords because a bug in SQLParse - if token_type == Name or token_type == Keyword: - yield value - mode = 1 - - # --------------------------- # postprocess @@ -583,15 +401,6 @@ class SerializerUnicode(object): return res -def Tokens2Unicode(stream): - result = "" - - for _, value in stream: - result += u(value) - - return result - - class OutputFilter(object): varname_prefix = '' @@ -704,34 +513,3 @@ class OutputPHPFilter(OutputFilter): # Close quote yield sql.Token(T.Text, '"') yield sql.Token(T.Punctuation, ';') - - -class Limit(object): - """Get the LIMIT of a query. - - If not defined, return -1 (SQL specification for no LIMIT query) - """ - def process(self, stack, stream): - index = 7 - stream = list(stream) - stream.reverse() - - # Run over all tokens in the stream from the end - for token_type, value in stream: - index -= 1 - -# if index and token_type in Keyword: - if index and token_type in Keyword and value == 'LIMIT': - return stream[4 - index][1] - - return -1 - - -def compact(stream): - """Function that return a compacted version of the stream""" - pipe = Pipeline() - - pipe.append(StripComments()) - pipe.append(StripWhitespace) - - return pipe(stream) diff --git a/sqlparse/functions.py b/sqlparse/functions.py deleted file mode 100644 index e54457e..0000000 --- a/sqlparse/functions.py +++ /dev/null @@ -1,44 +0,0 @@ -''' -Created on 17/05/2012 - -@author: piranna - -Several utility functions to extract info from the SQL sentences -''' - -from sqlparse.filters import ColumnsSelect, Limit -from sqlparse.pipeline import Pipeline -from sqlparse.tokens import Keyword, Whitespace - - -def getlimit(stream): - """Function that return the LIMIT of a input SQL """ - pipe = Pipeline() - - pipe.append(Limit()) - - result = pipe(stream) - try: - return int(result) - except ValueError: - return result - - -def getcolumns(stream): - """Function that return the colums of a SELECT query""" - pipe = Pipeline() - - pipe.append(ColumnsSelect()) - - return pipe(stream) - - -class IsType(object): - """Functor that return is the statement is of a specific type""" - def __init__(self, type): - self.type = type - - def __call__(self, stream): - for token_type, value in stream: - if token_type not in Whitespace: - return token_type in Keyword and value == self.type diff --git a/sqlparse/pipeline.py b/sqlparse/pipeline.py deleted file mode 100644 index 34dad19..0000000 --- a/sqlparse/pipeline.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com -# -# This module is part of python-sqlparse and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php. - -from types import GeneratorType - - -class Pipeline(list): - """Pipeline to process filters sequentially""" - - def __call__(self, stream): - """Run the pipeline - - Return a static (non generator) version of the result - """ - - # Run the stream over all the filters on the pipeline - for filter in self: - # Functions and callable objects (objects with '__call__' method) - if callable(filter): - stream = filter(stream) - - # Normal filters (objects with 'process' method) - else: - stream = filter.process(None, stream) - - # If last filter return a generator, staticalize it inside a list - if isinstance(stream, GeneratorType): - return list(stream) - return stream diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 2513c26..4da44c6 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -7,78 +7,9 @@ import itertools import re -from collections import OrderedDict, deque +from collections import deque from contextlib import contextmanager - -class Cache(OrderedDict): - """Cache with LRU algorithm using an OrderedDict as basis - """ - - def __init__(self, maxsize=100): - OrderedDict.__init__(self) - - self._maxsize = maxsize - - def __getitem__(self, key, *args, **kwargs): - # Get the key and remove it from the cache, or raise KeyError - value = OrderedDict.__getitem__(self, key) - del self[key] - - # Insert the (key, value) pair on the front of the cache - OrderedDict.__setitem__(self, key, value) - - # Return the value from the cache - return value - - def __setitem__(self, key, value, *args, **kwargs): - # Key was inserted before, remove it so we put it at front later - if key in self: - del self[key] - - # Too much items on the cache, remove the least recent used - elif len(self) >= self._maxsize: - self.popitem(False) - - # Insert the (key, value) pair on the front of the cache - OrderedDict.__setitem__(self, key, value, *args, **kwargs) - - -def memoize_generator(func): - """Memoize decorator for generators - - Store `func` results in a cache according to their arguments as 'memoize' - does but instead this works on decorators instead of regular functions. - Obviusly, this is only useful if the generator will always return the same - values for each specific parameters... - """ - cache = Cache() - - def wrapped_func(*args, **kwargs): - params = (args, tuple(sorted(kwargs.items()))) - - # Look if cached - try: - cached = cache[params] - - # Not cached, exec and store it - except KeyError: - cached = [] - - for item in func(*args, **kwargs): - cached.append(item) - yield item - - cache[params] = cached - - # Cached, yield its items - else: - for item in cached: - yield item - - return wrapped_func - - # This regular expression replaces the home-cooked parser that was here before. # It is much faster, but requires an extra post-processing step to get the # desired results (that are compatible with what you would expect from the -- cgit v1.2.1 From 5ce225522ba2b2a8af23c7efcbd6261bd9f09528 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Thu, 2 Jun 2016 21:25:01 -0700 Subject: Remove as parameter stack in filter.process --- sqlparse/engine/__init__.py | 8 ++++---- sqlparse/engine/filter.py | 2 +- sqlparse/filters.py | 34 +++++++++++++++++----------------- 3 files changed, 22 insertions(+), 22 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py index 1c2bf09..e69a138 100644 --- a/sqlparse/engine/__init__.py +++ b/sqlparse/engine/__init__.py @@ -29,12 +29,12 @@ class FilterStack(object): # Process token stream if self.preprocess: for filter_ in self.preprocess: - stream = filter_.process(self, stream) + stream = filter_.process(stream) if (self.stmtprocess or self.postprocess or self.split_statements or self._grouping): splitter = StatementFilter() - stream = splitter.process(self, stream) + stream = splitter.process(stream) if self._grouping: @@ -50,7 +50,7 @@ class FilterStack(object): ret = [] for stmt in stream: for filter_ in self.stmtprocess: - filter_.process(self, stmt) + filter_.process(stmt) ret.append(stmt) return ret stream = _run1(stream) @@ -61,7 +61,7 @@ class FilterStack(object): for stmt in stream: stmt.tokens = list(stmt.flatten()) for filter_ in self.postprocess: - stmt = filter_.process(self, stmt) + stmt = filter_.process(stmt) yield stmt stream = _run2(stream) diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py index 71020e7..3847e3d 100644 --- a/sqlparse/engine/filter.py +++ b/sqlparse/engine/filter.py @@ -76,7 +76,7 @@ class StatementFilter(object): # Default return 0 - def process(self, stack, stream): + def process(self, stream): "Process the stream" consume_ws = False splitlevel = 0 diff --git a/sqlparse/filters.py b/sqlparse/filters.py index ccf8735..8376326 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -25,7 +25,7 @@ class _CaseFilter(object): assert case in ['lower', 'upper', 'capitalize'] self.convert = getattr(text_type, case) - def process(self, stack, stream): + def process(self, stream): for ttype, value in stream: if ttype in self.ttype: value = self.convert(value) @@ -39,7 +39,7 @@ class KeywordCaseFilter(_CaseFilter): class IdentifierCaseFilter(_CaseFilter): ttype = (T.Name, T.String.Symbol) - def process(self, stack, stream): + def process(self, stream): for ttype, value in stream: if ttype in self.ttype and not value.strip()[0] == '"': value = self.convert(value) @@ -52,7 +52,7 @@ class TruncateStringFilter(object): self.width = max(width, 1) self.char = u(char) - def process(self, stack, stream): + def process(self, stream): for ttype, value in stream: if ttype is T.Literal.String.Single: if value[:2] == '\'\'': @@ -94,8 +94,8 @@ class StripCommentsFilter(object): tlist.tokens.pop(tidx) token = self._get_next_comment(tlist) - def process(self, stack, stmt): - [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] + def process(self, stmt): + [self.process(sgroup) for sgroup in stmt.get_sublists()] self._process(stmt) @@ -139,8 +139,8 @@ class StripWhitespaceFilter(object): tlist.tokens.pop(-2) self._stripws_default(tlist) - def process(self, stack, stmt, depth=0): - [self.process(stack, sgroup, depth + 1) + def process(self, stmt, depth=0): + [self.process(sgroup, depth + 1) for sgroup in stmt.get_sublists()] self._stripws(stmt) if ( @@ -334,7 +334,7 @@ class ReindentFilter(object): self._split_kwds(tlist) [self._process(sgroup) for sgroup in tlist.get_sublists()] - def process(self, stack, stmt): + def process(self, stmt): if isinstance(stmt, sql.Statement): self._curr_stmt = stmt self._process(stmt) @@ -350,7 +350,7 @@ class ReindentFilter(object): self._last_stmt = stmt -# FIXME: Doesn't work ;) +# FIXME: Doesn't work class RightMarginFilter(object): keep_together = ( @@ -361,7 +361,7 @@ class RightMarginFilter(object): self.width = width self.line = '' - def _process(self, stack, group, stream): + def _process(self, group, stream): for token in stream: if token.is_whitespace() and '\n' in token.value: if token.value.endswith('\n'): @@ -370,7 +370,7 @@ class RightMarginFilter(object): self.line = token.value.splitlines()[-1] elif (token.is_group() and token.__class__ not in self.keep_together): - token.tokens = self._process(stack, token, token.tokens) + token.tokens = self._process(token, token.tokens) else: val = u(token) if len(self.line) + len(val) > self.width: @@ -384,17 +384,17 @@ class RightMarginFilter(object): self.line += val yield token - def process(self, stack, group): - return - group.tokens = self._process(stack, group, group.tokens) - + def process(self, group): + # return + # group.tokens = self._process(group, group.tokens) + raise NotImplementedError # --------------------------- # postprocess class SerializerUnicode(object): - def process(self, stack, stmt): + def process(self, stmt): raw = u(stmt) lines = split_unquoted_newlines(raw) res = '\n'.join(line.rstrip() for line in lines) @@ -411,7 +411,7 @@ class OutputFilter(object): def _process(self, stream, varname, has_nl): raise NotImplementedError - def process(self, stack, stmt): + def process(self, stmt): self.count += 1 if self.count > 1: varname = '%s%d' % (self.varname, self.count) -- cgit v1.2.1 From 3fed0393a80a40ea28e5fc0cea9b526630e9f42b Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Thu, 2 Jun 2016 21:38:23 -0700 Subject: Refactor filter-stack to simplify logic if (self.stmtprocess or self.postprocess or self.split_statements or self._grouping): always evaluates to true after removing unused features --- sqlparse/__init__.py | 1 - sqlparse/engine/__init__.py | 48 ++++++++++++--------------------------------- sqlparse/engine/grouping.py | 5 +++-- 3 files changed, 15 insertions(+), 39 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index d69a3d9..cb83a71 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -66,5 +66,4 @@ def split(sql, encoding=None): :returns: A list of strings. """ stack = engine.FilterStack() - stack.split_statements = True return [u(stmt).strip() for stmt in stack.run(sql, encoding)] diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py index e69a138..7f00c57 100644 --- a/sqlparse/engine/__init__.py +++ b/sqlparse/engine/__init__.py @@ -13,12 +13,10 @@ from sqlparse.engine.filter import StatementFilter class FilterStack(object): - def __init__(self): self.preprocess = [] self.stmtprocess = [] self.postprocess = [] - self.split_statements = False self._grouping = False def enable_grouping(self): @@ -27,42 +25,20 @@ class FilterStack(object): def run(self, sql, encoding=None): stream = lexer.tokenize(sql, encoding) # Process token stream - if self.preprocess: - for filter_ in self.preprocess: - stream = filter_.process(stream) - - if (self.stmtprocess or self.postprocess or - self.split_statements or self._grouping): - splitter = StatementFilter() - stream = splitter.process(stream) - - if self._grouping: - - def _group(stream): - for stmt in stream: - grouping.group(stmt) - yield stmt - stream = _group(stream) + for filter_ in self.preprocess: + stream = filter_.process(stream) - if self.stmtprocess: + stream = StatementFilter().process(stream) - def _run1(stream): - ret = [] - for stmt in stream: - for filter_ in self.stmtprocess: - filter_.process(stmt) - ret.append(stmt) - return ret - stream = _run1(stream) + # Output: Stream processed Statements + for stmt in stream: + if self._grouping: + stmt = grouping.group(stmt) - if self.postprocess: + for filter_ in self.stmtprocess: + filter_.process(stmt) - def _run2(stream): - for stmt in stream: - stmt.tokens = list(stmt.flatten()) - for filter_ in self.postprocess: - stmt = filter_.process(stmt) - yield stmt - stream = _run2(stream) + for filter_ in self.postprocess: + stmt = filter_.process(stmt) - return stream + yield stmt diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 0ac1cb3..c680995 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -266,7 +266,7 @@ def align_comments(tlist): token = tlist.token_next_by(i=sql.Comment, idx=token) -def group(tlist): +def group(stmt): for func in [ group_comments, group_brackets, @@ -291,4 +291,5 @@ def group(tlist): group_foreach, group_begin, ]: - func(tlist) + func(stmt) + return stmt -- cgit v1.2.1 From f0a6af57b7a5c116528db73643b26f934477d350 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Thu, 2 Jun 2016 22:40:37 -0700 Subject: Minor refactor statement filter and add comments --- sqlparse/engine/filter.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py index 3847e3d..c7b3bf8 100644 --- a/sqlparse/engine/filter.py +++ b/sqlparse/engine/filter.py @@ -10,41 +10,51 @@ from sqlparse import tokens as T class StatementFilter(object): - "Filter that split stream at individual statements" + """Filter that split stream at individual statements""" def __init__(self): - self._in_declare = False - self._in_dbldollar = False - self._is_create = False - self._begin_depth = 0 + self._reset() def _reset(self): - "Set the filter attributes to its default values" + """Set the filter attributes to its default values""" self._in_declare = False self._in_dbldollar = False self._is_create = False self._begin_depth = 0 def _change_splitlevel(self, ttype, value): - "Get the new split level (increase, decrease or remain equal)" + """Get the new split level (increase, decrease or remain equal)""" # PostgreSQL if ttype == T.Name.Builtin \ and value.startswith('$') and value.endswith('$'): + + # 2nd dbldollar found. $quote$ completed + # decrease level if self._in_dbldollar: self._in_dbldollar = False return -1 else: self._in_dbldollar = True return 1 + + # if inside $$ everything inside is defining function character. + # Nothing inside can create a new statement elif self._in_dbldollar: return 0 # ANSI + # if normal token return + # wouldn't parenthesis increase/decrease a level? + # no, inside a paranthesis can't start new statement if ttype not in T.Keyword: return 0 + # Everything after here is ttype = T.Keyword + # Also to note, once entered an If statement you are done and basically + # returning unified = value.upper() + # can have nested declare inside of being... if unified == 'DECLARE' and self._is_create and self._begin_depth == 0: self._in_declare = True return 1 @@ -59,12 +69,16 @@ class StatementFilter(object): if unified in ('END IF', 'END FOR', 'END WHILE'): return -1 + # Should this respect a preceeding BEGIN? + # In CASE ... WHEN ... END this results in a split level -1. + # Would having multiple CASE WHEN END and a Assigment Operator + # cause the statement to cut off prematurely? if unified == 'END': - # Should this respect a preceeding BEGIN? - # In CASE ... WHEN ... END this results in a split level -1. self._begin_depth = max(0, self._begin_depth - 1) return -1 + # three keywords begin with CREATE, but only one of them is DDL + # DDL Create though can contain more words such as "or replace" if ttype is T.Keyword.DDL and unified.startswith('CREATE'): self._is_create = True return 0 @@ -77,7 +91,7 @@ class StatementFilter(object): return 0 def process(self, stream): - "Process the stream" + """Process the stream""" consume_ws = False splitlevel = 0 stmt = None @@ -86,6 +100,9 @@ class StatementFilter(object): # Run over all stream tokens for ttype, value in stream: # Yield token if we finished a statement and there's no whitespaces + # It will count newline token as a non whitespace. In this context + # whitespace ignores newlines. + # why don't multi line comments also count? if consume_ws and ttype not in (T.Whitespace, T.Comment.Single): stmt.tokens = stmt_tokens yield stmt -- cgit v1.2.1 From 2b8ede11388e81e0f6dc871a45c5327eaf456e44 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Thu, 2 Jun 2016 22:54:25 -0700 Subject: Refactor statement filter --- sqlparse/engine/filter.py | 65 ++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 38 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py index c7b3bf8..ea2033a 100644 --- a/sqlparse/engine/filter.py +++ b/sqlparse/engine/filter.py @@ -5,8 +5,7 @@ # This module is part of python-sqlparse and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from sqlparse.sql import Statement, Token -from sqlparse import tokens as T +from sqlparse import sql, tokens as T class StatementFilter(object): @@ -22,11 +21,14 @@ class StatementFilter(object): self._is_create = False self._begin_depth = 0 + self.consume_ws = False + self.tokens = [] + self.level = 0 + def _change_splitlevel(self, ttype, value): """Get the new split level (increase, decrease or remain equal)""" # PostgreSQL - if ttype == T.Name.Builtin \ - and value.startswith('$') and value.endswith('$'): + if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$': # 2nd dbldollar found. $quote$ completed # decrease level @@ -54,6 +56,12 @@ class StatementFilter(object): # returning unified = value.upper() + # three keywords begin with CREATE, but only one of them is DDL + # DDL Create though can contain more words such as "or replace" + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + return 0 + # can have nested declare inside of being... if unified == 'DECLARE' and self._is_create and self._begin_depth == 0: self._in_declare = True @@ -61,14 +69,11 @@ class StatementFilter(object): if unified == 'BEGIN': self._begin_depth += 1 - if self._in_declare or self._is_create: + if self._is_create: # FIXME(andi): This makes no sense. return 1 return 0 - if unified in ('END IF', 'END FOR', 'END WHILE'): - return -1 - # Should this respect a preceeding BEGIN? # In CASE ... WHEN ... END this results in a split level -1. # Would having multiple CASE WHEN END and a Assigment Operator @@ -77,25 +82,19 @@ class StatementFilter(object): self._begin_depth = max(0, self._begin_depth - 1) return -1 - # three keywords begin with CREATE, but only one of them is DDL - # DDL Create though can contain more words such as "or replace" - if ttype is T.Keyword.DDL and unified.startswith('CREATE'): - self._is_create = True - return 0 - - if unified in ('IF', 'FOR', 'WHILE') \ - and self._is_create and self._begin_depth > 0: + if (unified in ('IF', 'FOR', 'WHILE') and + self._is_create and self._begin_depth > 0): return 1 + if unified in ('END IF', 'END FOR', 'END WHILE'): + return -1 + # Default return 0 def process(self, stream): """Process the stream""" - consume_ws = False - splitlevel = 0 - stmt = None - stmt_tokens = [] + EOS_TTYPE = T.Whitespace, T.Comment.Single # Run over all stream tokens for ttype, value in stream: @@ -103,32 +102,22 @@ class StatementFilter(object): # It will count newline token as a non whitespace. In this context # whitespace ignores newlines. # why don't multi line comments also count? - if consume_ws and ttype not in (T.Whitespace, T.Comment.Single): - stmt.tokens = stmt_tokens - yield stmt + if self.consume_ws and ttype not in EOS_TTYPE: + yield sql.Statement(self.tokens) # Reset filter and prepare to process next statement self._reset() - consume_ws = False - splitlevel = 0 - stmt = None - - # Create a new statement if we are not currently in one of them - if stmt is None: - stmt = Statement() - stmt_tokens = [] # Change current split level (increase, decrease or remain equal) - splitlevel += self._change_splitlevel(ttype, value) + self.level += self._change_splitlevel(ttype, value) # Append the token to the current statement - stmt_tokens.append(Token(ttype, value)) + self.tokens.append(sql.Token(ttype, value)) # Check if we get the end of a statement - if splitlevel <= 0 and ttype is T.Punctuation and value == ';': - consume_ws = True + if self.level <= 0 and ttype is T.Punctuation and value == ';': + self.consume_ws = True # Yield pending statement (if any) - if stmt is not None: - stmt.tokens = stmt_tokens - yield stmt + if self.tokens: + yield sql.Statement(self.tokens) -- cgit v1.2.1 From be62c7a673b5f0fe973523d01e22b7ad0bb76600 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Thu, 2 Jun 2016 23:07:17 -0700 Subject: Refactor filters --- sqlparse/filters.py | 107 ++++++++++++++++++++++------------------------------ 1 file changed, 45 insertions(+), 62 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/filters.py b/sqlparse/filters.py index 8376326..95ac74c 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -8,7 +8,7 @@ import re from sqlparse import sql, tokens as T -from sqlparse.compat import u, text_type +from sqlparse.compat import text_type from sqlparse.utils import split_unquoted_newlines @@ -16,13 +16,10 @@ from sqlparse.utils import split_unquoted_newlines # token process class _CaseFilter(object): - ttype = None def __init__(self, case=None): - if case is None: - case = 'upper' - assert case in ['lower', 'upper', 'capitalize'] + case = case or 'upper' self.convert = getattr(text_type, case) def process(self, stream): @@ -37,33 +34,35 @@ class KeywordCaseFilter(_CaseFilter): class IdentifierCaseFilter(_CaseFilter): - ttype = (T.Name, T.String.Symbol) + ttype = T.Name, T.String.Symbol def process(self, stream): for ttype, value in stream: - if ttype in self.ttype and not value.strip()[0] == '"': + if ttype in self.ttype and value.strip()[0] != '"': value = self.convert(value) yield ttype, value class TruncateStringFilter(object): - def __init__(self, width, char): - self.width = max(width, 1) - self.char = u(char) + self.width = width + self.char = char def process(self, stream): for ttype, value in stream: - if ttype is T.Literal.String.Single: - if value[:2] == '\'\'': - inner = value[2:-2] - quote = u'\'\'' - else: - inner = value[1:-1] - quote = u'\'' - if len(inner) > self.width: - value = u''.join((quote, inner[:self.width], self.char, - quote)) + if ttype != T.Literal.String.Single: + yield ttype, value + continue + + if value[:2] == "''": + inner = value[2:-2] + quote = "''" + else: + inner = value[1:-1] + quote = "'" + + if len(inner) > self.width: + value = ''.join((quote, inner[:self.width], self.char, quote)) yield ttype, value @@ -71,7 +70,6 @@ class TruncateStringFilter(object): # statement process class StripCommentsFilter(object): - def _get_next_comment(self, tlist): # TODO(andi) Comment types should be unified, see related issue38 token = tlist.token_next_by(i=sql.Comment, t=T.Comment) @@ -81,8 +79,8 @@ class StripCommentsFilter(object): token = self._get_next_comment(tlist) while token: tidx = tlist.token_index(token) - prev = tlist.token_prev(tidx, False) - next_ = tlist.token_next(tidx, False) + prev = tlist.token_prev(tidx, skip_ws=False) + next_ = tlist.token_next(tidx, skip_ws=False) # Replace by whitespace if prev and next exist and if they're not # whitespaces. This doesn't apply if prev or next is a paranthesis. if (prev is not None and next_ is not None @@ -100,7 +98,6 @@ class StripCommentsFilter(object): class StripWhitespaceFilter(object): - def _stripws(self, tlist): func_name = '_stripws_%s' % tlist.__class__.__name__.lower() func = getattr(self, func_name, self._stripws_default) @@ -122,14 +119,10 @@ class StripWhitespaceFilter(object): # Removes newlines before commas, see issue140 last_nl = None for token in tlist.tokens[:]: - if token.ttype is T.Punctuation \ - and token.value == ',' \ - and last_nl is not None: + if last_nl and token.ttype is T.Punctuation and token.value == ',': tlist.tokens.remove(last_nl) - if token.is_whitespace(): - last_nl = token - else: - last_nl = None + + last_nl = token if token.is_whitespace() else None return self._stripws_default(tlist) def _stripws_parenthesis(self, tlist): @@ -140,19 +133,13 @@ class StripWhitespaceFilter(object): self._stripws_default(tlist) def process(self, stmt, depth=0): - [self.process(sgroup, depth + 1) - for sgroup in stmt.get_sublists()] + [self.process(sgroup, depth + 1) for sgroup in stmt.get_sublists()] self._stripws(stmt) - if ( - depth == 0 - and stmt.tokens - and stmt.tokens[-1].is_whitespace() - ): + if depth == 0 and stmt.tokens and stmt.tokens[-1].is_whitespace(): stmt.tokens.pop(-1) class ReindentFilter(object): - def __init__(self, width=2, char=' ', line_width=None, wrap_after=0): self.width = width self.char = char @@ -196,8 +183,7 @@ class ReindentFilter(object): 'SET', 'BETWEEN', 'EXCEPT', 'HAVING') def _next_token(i): - t = tlist.token_next_match(i, T.Keyword, split_words, - regex=True) + t = tlist.token_next_by(m=(T.Keyword, split_words, True), idx=i) if t and t.value.upper() == 'BETWEEN': t = _next_token(tlist.token_index(t) + 1) if t and t.value.upper() == 'AND': @@ -208,13 +194,13 @@ class ReindentFilter(object): token = _next_token(idx) added = set() while token: - prev = tlist.token_prev(tlist.token_index(token), False) + prev = tlist.token_prev(token, skip_ws=False) offset = 1 if prev and prev.is_whitespace() and prev not in added: tlist.tokens.pop(tlist.token_index(prev)) offset += 1 - uprev = u(prev) - if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))): + uprev = text_type(prev) + if prev and (uprev.endswith('\n') or uprev.endswith('\r')): nl = tlist.token_next(token) else: nl = self.nl() @@ -224,18 +210,17 @@ class ReindentFilter(object): token = _next_token(tlist.token_index(nl) + offset) def _split_statements(self, tlist): - idx = 0 - token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) + token = tlist.token_next_by(t=(T.Keyword.DDL, T.Keyword.DML)) while token: - prev = tlist.token_prev(tlist.token_index(token), False) + prev = tlist.token_prev(token, skip_ws=False) if prev and prev.is_whitespace(): tlist.tokens.pop(tlist.token_index(prev)) # only break if it's not the first token if prev: nl = self.nl() tlist.insert_before(token, nl) - token = tlist.token_next_by_type(tlist.token_index(token) + 1, - (T.Keyword.DDL, T.Keyword.DML)) + token = tlist.token_next_by(t=(T.Keyword.DDL, T.Keyword.DML), + idx=token) def _process(self, tlist): func_name = '_process_%s' % tlist.__class__.__name__.lower() @@ -243,7 +228,7 @@ class ReindentFilter(object): func(tlist) def _process_where(self, tlist): - token = tlist.token_next_match(0, T.Keyword, 'WHERE') + token = tlist.token_next_by(m=(T.Keyword, 'WHERE')) try: tlist.insert_before(token, self.nl()) except ValueError: # issue121, errors in statement @@ -253,7 +238,7 @@ class ReindentFilter(object): self.indent -= 1 def _process_having(self, tlist): - token = tlist.token_next_match(0, T.Keyword, 'HAVING') + token = tlist.token_next_by(m=(T.Keyword, 'HAVING')) try: tlist.insert_before(token, self.nl()) except ValueError: # issue121, errors in statement @@ -270,7 +255,7 @@ class ReindentFilter(object): tlist.tokens.insert(0, self.nl()) indented = True num_offset = self._get_offset( - tlist.token_next_match(0, T.Punctuation, '(')) + tlist.token_next_by(m=(T.Punctuation, '('))) self.offset += num_offset self._process_default(tlist, stmts=not indented) if indented: @@ -323,7 +308,7 @@ class ReindentFilter(object): self.offset -= 5 if num_offset is not None: self.offset -= num_offset - end = tlist.token_next_match(0, T.Keyword, 'END') + end = tlist.token_next_by(m=(T.Keyword, 'END')) tlist.insert_before(end, self.nl()) self.offset -= outer_offset @@ -340,7 +325,7 @@ class ReindentFilter(object): self._process(stmt) if isinstance(stmt, sql.Statement): if self._last_stmt is not None: - if u(self._last_stmt).endswith('\n'): + if text_type(self._last_stmt).endswith('\n'): nl = '\n' else: nl = '\n\n' @@ -352,7 +337,6 @@ class ReindentFilter(object): # FIXME: Doesn't work class RightMarginFilter(object): - keep_together = ( # sql.TypeCast, sql.Identifier, sql.Alias, ) @@ -368,13 +352,12 @@ class RightMarginFilter(object): self.line = '' else: self.line = token.value.splitlines()[-1] - elif (token.is_group() - and token.__class__ not in self.keep_together): + elif token.is_group() and type(token) not in self.keep_together: token.tokens = self._process(token, token.tokens) else: - val = u(token) + val = text_type(token) if len(self.line) + len(val) > self.width: - match = re.search('^ +', self.line) + match = re.search(r'^ +', self.line) if match is not None: indent = match.group() else: @@ -389,13 +372,13 @@ class RightMarginFilter(object): # group.tokens = self._process(group, group.tokens) raise NotImplementedError + # --------------------------- # postprocess class SerializerUnicode(object): - def process(self, stmt): - raw = u(stmt) + raw = text_type(stmt) lines = split_unquoted_newlines(raw) res = '\n'.join(line.rstrip() for line in lines) return res @@ -418,7 +401,7 @@ class OutputFilter(object): else: varname = self.varname - has_nl = len(u(stmt).strip().splitlines()) > 1 + has_nl = len(text_type(stmt).strip().splitlines()) > 1 stmt.tokens = self._process(stmt.tokens, varname, has_nl) return stmt -- cgit v1.2.1 From da914acdb20293b9a059bdb346221757907855a7 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 29 May 2016 13:31:59 -0700 Subject: Add unicode-str compatible cls decorator --- sqlparse/compat.py | 10 ++++++++++ sqlparse/sql.py | 37 +++++++++---------------------------- 2 files changed, 19 insertions(+), 28 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/compat.py b/sqlparse/compat.py index 0226a00..0defd86 100644 --- a/sqlparse/compat.py +++ b/sqlparse/compat.py @@ -25,6 +25,10 @@ if PY3: return str(s) + def unicode_compatible(cls): + return cls + + text_type = str string_types = (str,) from io import StringIO @@ -39,6 +43,12 @@ elif PY2: return unicode(s, encoding) + def unicode_compatible(cls): + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda x: x.__unicode__().encode('utf-8') + return cls + + text_type = unicode string_types = (basestring,) from StringIO import StringIO diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 6abc432..1162a65 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -8,13 +8,13 @@ """This module contains classes representing syntactical elements of SQL.""" import re -import sys from sqlparse import tokens as T -from sqlparse.compat import string_types, u +from sqlparse.compat import u, string_types, unicode_compatible from sqlparse.utils import imt, remove_quotes +@unicode_compatible class Token(object): """Base class for all other classes in this module. @@ -26,6 +26,7 @@ class Token(object): __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword') def __init__(self, ttype, value): + value = u(value) self.value = value if ttype in T.Keyword: self.normalized = value.upper() @@ -36,30 +37,21 @@ class Token(object): self.parent = None def __str__(self): - if sys.version_info[0] == 3: - return self.value - else: - return u(self).encode('utf-8') + return self.value def __repr__(self): short = self._get_repr_value() - if sys.version_info[0] < 3: - short = short.encode('utf-8') return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), short, id(self)) - def __unicode__(self): - """Returns a unicode representation of this object.""" - return self.value or '' - def _get_repr_name(self): return str(self.ttype).split('.')[-1] def _get_repr_value(self): - raw = u(self) + raw = self.value if len(raw) > 7: - raw = raw[:6] + u'...' - return re.sub('\s+', ' ', raw) + raw = raw[:6] + '...' + return re.sub(r'\s+', ' ', raw) def flatten(self): """Resolve subgroups.""" @@ -143,6 +135,7 @@ class Token(object): return False +@unicode_compatible class TokenList(Token): """A group of tokens. @@ -158,20 +151,8 @@ class TokenList(Token): self.tokens = tokens super(TokenList, self).__init__(None, self.__str__()) - def __unicode__(self): - return self._to_string() - def __str__(self): - str_ = self._to_string() - if sys.version_info[0] <= 2: - str_ = str_.encode('utf-8') - return str_ - - def _to_string(self): - if sys.version_info[0] == 3: - return ''.join(x.value for x in self.flatten()) - else: - return ''.join(u(x) for x in self.flatten()) + return ''.join(token.value for token in self.flatten()) def _get_repr_name(self): return self.__class__.__name__ -- cgit v1.2.1 From 4ef134f40d872cb88ac4da27a3cc8223cf8aec52 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Thu, 2 Jun 2016 19:16:57 -0700 Subject: Change pprint w new str format; can out to file --- sqlparse/sql.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 1162a65..8ed8061 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -6,6 +6,7 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php """This module contains classes representing syntactical elements of SQL.""" +from __future__ import print_function import re @@ -40,9 +41,9 @@ class Token(object): return self.value def __repr__(self): - short = self._get_repr_value() - return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), - short, id(self)) + cls = self._get_repr_name() + value = self._get_repr_value() + return "<{cls} '{value}' at 0x{id:2X}>".format(id=id(self), **locals()) def _get_repr_name(self): return str(self.ttype).split('.')[-1] @@ -157,19 +158,17 @@ class TokenList(Token): def _get_repr_name(self): return self.__class__.__name__ - def _pprint_tree(self, max_depth=None, depth=0): + def _pprint_tree(self, max_depth=None, depth=0, f=None): """Pretty-print the object tree.""" - indent = ' ' * (depth * 2) + ind = ' ' * (depth * 2) for idx, token in enumerate(self.tokens): - if token.is_group(): - pre = ' +-' - else: - pre = ' | ' - print('%s%s%d %s \'%s\'' % (indent, pre, idx, - token._get_repr_name(), - token._get_repr_value())) - if (token.is_group() and (max_depth is None or depth < max_depth)): - token._pprint_tree(max_depth, depth + 1) + pre = ' +-' if token.is_group() else ' | ' + cls = token._get_repr_name() + value = token._get_repr_value() + print("{ind}{pre}{idx} {cls} '{value}'".format(**locals()), file=f) + + if token.is_group() and (max_depth is None or depth < max_depth): + token._pprint_tree(max_depth, depth + 1, f) def get_token_at_offset(self, offset): """Returns the token that is on position offset.""" -- cgit v1.2.1 From 370152be4a1b2ab474469e494334b1446dadee9d Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 11 May 2016 04:29:58 -0700 Subject: Simplify sql.py naming/alias --- sqlparse/sql.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 8ed8061..a521bee 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -373,8 +373,6 @@ class TokenList(Token): if len(self.tokens) > 2 and self.token_next_by(t=T.Whitespace): return self._get_first_name(reverse=True) - return None - def get_name(self): """Returns the name of this identifier. @@ -382,32 +380,22 @@ class TokenList(Token): be considered as the name under which the object corresponding to this identifier is known within the current statement. """ - alias = self.get_alias() - if alias is not None: - return alias - return self.get_real_name() + return self.get_alias() or self.get_real_name() def get_real_name(self): """Returns the real name (object name) of this identifier.""" # a.b - dot = self.token_next_match(0, T.Punctuation, '.') - if dot is not None: - return self._get_first_name(self.token_index(dot)) - - return self._get_first_name() + dot = self.token_next_by(m=(T.Punctuation, '.')) + return self._get_first_name(dot) def get_parent_name(self): """Return name of the parent object if any. A parent object is identified by the first occuring dot. """ - dot = self.token_next_match(0, T.Punctuation, '.') - if dot is None: - return None - prev_ = self.token_prev(self.token_index(dot)) - if prev_ is None: # something must be verry wrong here.. - return None - return remove_quotes(prev_.value) + dot = self.token_next_by(m=(T.Punctuation, '.')) + prev_ = self.token_prev(dot) + return remove_quotes(prev_.value) if prev_ is not None else None def _get_first_name(self, idx=None, reverse=False, keywords=False): """Returns the name of the first token with a name""" @@ -427,7 +415,6 @@ class TokenList(Token): return remove_quotes(tok.value) elif isinstance(tok, Identifier) or isinstance(tok, Function): return tok.get_name() - return None class Statement(TokenList): -- cgit v1.2.1 From 7c2d56c8110e69c74538013703976fed0b497238 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 30 May 2016 11:40:45 -0700 Subject: Clean-up rename variables in loops to token sql.py --- sqlparse/sql.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index a521bee..7bf01c1 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -202,9 +202,9 @@ class TokenList(Token): return True def get_sublists(self): - for x in self.tokens: - if isinstance(x, TokenList): - yield x + for token in self.tokens: + if isinstance(token, TokenList): + yield token @property def _groupable_tokens(self): @@ -410,11 +410,11 @@ class TokenList(Token): if keywords: types.append(T.Keyword) - for tok in tokens: - if tok.ttype in types: - return remove_quotes(tok.value) - elif isinstance(tok, Identifier) or isinstance(tok, Function): - return tok.get_name() + for token in tokens: + if token.ttype in types: + return remove_quotes(token.value) + elif isinstance(token, (Identifier, Function)): + return token.get_name() class Statement(TokenList): @@ -489,10 +489,10 @@ class Identifier(TokenList): def get_array_indices(self): """Returns an iterator of index token lists""" - for tok in self.tokens: - if isinstance(tok, SquareBrackets): + for token in self.tokens: + if isinstance(token, SquareBrackets): # Use [1:-1] index to discard the square brackets - yield tok.tokens[1:-1] + yield token.tokens[1:-1] class IdentifierList(TokenList): @@ -503,9 +503,9 @@ class IdentifierList(TokenList): Whitespaces and punctuations are not included in this generator. """ - for x in self.tokens: - if not x.is_whitespace() and not x.match(T.Punctuation, ','): - yield x + for token in self.tokens: + if not (token.is_whitespace() or token.match(T.Punctuation, ',')): + yield token class Parenthesis(TokenList): @@ -626,11 +626,11 @@ class Function(TokenList): def get_parameters(self): """Return a list of parameters.""" parenthesis = self.tokens[-1] - for t in parenthesis.tokens: - if imt(t, i=IdentifierList): - return t.get_identifiers() - elif imt(t, i=(Function, Identifier), t=T.Literal): - return [t, ] + for token in parenthesis.tokens: + if imt(token, i=IdentifierList): + return token.get_identifiers() + elif imt(token, i=(Function, Identifier), t=T.Literal): + return [token, ] return [] -- cgit v1.2.1 From 149519305864a6455638a5fa0984a99c3e54f1f3 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 30 May 2016 10:48:18 -0700 Subject: Clean Token/Tokenlist init's --- sqlparse/sql.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 7bf01c1..5f353b5 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -29,13 +29,10 @@ class Token(object): def __init__(self, ttype, value): value = u(value) self.value = value - if ttype in T.Keyword: - self.normalized = value.upper() - else: - self.normalized = value self.ttype = ttype - self.is_keyword = ttype in T.Keyword self.parent = None + self.is_keyword = ttype in T.Keyword + self.normalized = value.upper() if self.is_keyword else value def __str__(self): return self.value @@ -147,9 +144,7 @@ class TokenList(Token): __slots__ = ('value', 'ttype', 'tokens') def __init__(self, tokens=None): - if tokens is None: - tokens = [] - self.tokens = tokens + self.tokens = tokens or [] super(TokenList, self).__init__(None, self.__str__()) def __str__(self): -- cgit v1.2.1 From 658f13aacfd3b94398aaaf14f567564a7c1ec1de Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 30 May 2016 10:49:24 -0700 Subject: Clean-up code style sql.py other items inside slots are already defined in parent class --- sqlparse/sql.py | 66 +++++++++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 35 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 5f353b5..447de7e 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -11,7 +11,7 @@ from __future__ import print_function import re from sqlparse import tokens as T -from sqlparse.compat import u, string_types, unicode_compatible +from sqlparse.compat import string_types, text_type, unicode_compatible from sqlparse.utils import imt, remove_quotes @@ -27,7 +27,7 @@ class Token(object): __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword') def __init__(self, ttype, value): - value = u(value) + value = text_type(value) self.value = value self.ttype = ttype self.parent = None @@ -46,7 +46,7 @@ class Token(object): return str(self.ttype).split('.')[-1] def _get_repr_value(self): - raw = self.value + raw = text_type(self) if len(raw) > 7: raw = raw[:6] + '...' return re.sub(r'\s+', ' ', raw) @@ -104,7 +104,7 @@ class Token(object): def is_whitespace(self): """Return ``True`` if this token is a whitespace token.""" - return self.ttype and self.ttype in T.Whitespace + return self.ttype in T.Whitespace def within(self, group_cls): """Returns ``True`` if this token is within *group_cls*. @@ -141,17 +141,17 @@ class TokenList(Token): list of child-tokens. """ - __slots__ = ('value', 'ttype', 'tokens') + __slots__ = 'tokens' def __init__(self, tokens=None): self.tokens = tokens or [] - super(TokenList, self).__init__(None, self.__str__()) + super(TokenList, self).__init__(None, text_type(self)) def __str__(self): return ''.join(token.value for token in self.flatten()) def _get_repr_name(self): - return self.__class__.__name__ + return type(self).__name__ def _pprint_tree(self, max_depth=None, depth=0, f=None): """Pretty-print the object tree.""" @@ -180,7 +180,7 @@ class TokenList(Token): This method is recursively called for all child tokens. """ for token in self.tokens: - if isinstance(token, TokenList): + if token.is_group(): for item in token.flatten(): yield item else: @@ -198,7 +198,7 @@ class TokenList(Token): def get_sublists(self): for token in self.tokens: - if isinstance(token, TokenList): + if token.is_group(): yield token @property @@ -217,7 +217,7 @@ class TokenList(Token): funcs = (funcs,) if reverse: - iterable = iter(reversed(self.tokens[end:start - 1])) + iterable = reversed(self.tokens[end:start - 1]) else: iterable = self.tokens[start:end] @@ -327,7 +327,7 @@ class TokenList(Token): left.parent = self tokens = tokens[1:] left.tokens.extend(tokens) - left.value = left.__str__() + left.value = str(left) else: left = grp_cls(tokens) @@ -415,8 +415,6 @@ class TokenList(Token): class Statement(TokenList): """Represents a SQL statement.""" - __slots__ = ('value', 'ttype', 'tokens') - def get_type(self): """Returns the type of a statement. @@ -440,16 +438,14 @@ class Statement(TokenList): # The WITH keyword should be followed by either an Identifier or # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. - idents = self.token_next( - self.token_index(first_token), skip_ws=True) - if isinstance(idents, (Identifier, IdentifierList)): - dml_keyword = self.token_next( - self.token_index(idents), skip_ws=True) + token = self.token_next(first_token, skip_ws=True) + if isinstance(token, (Identifier, IdentifierList)): + dml_keyword = self.token_next(token, skip_ws=True) + if dml_keyword.ttype == T.Keyword.DML: return dml_keyword.normalized - # Hmm, probably invalid syntax, so return unknown. - return 'UNKNOWN' + # Hmm, probably invalid syntax, so return unknown. return 'UNKNOWN' @@ -505,8 +501,8 @@ class IdentifierList(TokenList): class Parenthesis(TokenList): """Tokens between parenthesis.""" - M_OPEN = (T.Punctuation, '(') - M_CLOSE = (T.Punctuation, ')') + M_OPEN = T.Punctuation, '(' + M_CLOSE = T.Punctuation, ')' @property def _groupable_tokens(self): @@ -515,8 +511,8 @@ class Parenthesis(TokenList): class SquareBrackets(TokenList): """Tokens between square brackets""" - M_OPEN = (T.Punctuation, '[') - M_CLOSE = (T.Punctuation, ']') + M_OPEN = T.Punctuation, '[' + M_CLOSE = T.Punctuation, ']' @property def _groupable_tokens(self): @@ -529,14 +525,14 @@ class Assignment(TokenList): class If(TokenList): """An 'if' clause with possible 'else if' or 'else' parts.""" - M_OPEN = (T.Keyword, 'IF') - M_CLOSE = (T.Keyword, 'END IF') + M_OPEN = T.Keyword, 'IF' + M_CLOSE = T.Keyword, 'END IF' class For(TokenList): """A 'FOR' loop.""" - M_OPEN = (T.Keyword, ('FOR', 'FOREACH')) - M_CLOSE = (T.Keyword, 'END LOOP') + M_OPEN = T.Keyword, ('FOR', 'FOREACH') + M_CLOSE = T.Keyword, 'END LOOP' class Comparison(TokenList): @@ -560,15 +556,15 @@ class Comment(TokenList): class Where(TokenList): """A WHERE clause.""" - M_OPEN = (T.Keyword, 'WHERE') - M_CLOSE = (T.Keyword, - ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING')) + M_OPEN = T.Keyword, 'WHERE' + M_CLOSE = T.Keyword, ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', + 'HAVING') class Case(TokenList): """A CASE statement with one or more WHEN and possibly an ELSE part.""" - M_OPEN = (T.Keyword, 'CASE') - M_CLOSE = (T.Keyword, 'END') + M_OPEN = T.Keyword, 'CASE' + M_CLOSE = T.Keyword, 'END' def get_cases(self): """Returns a list of 2-tuples (condition, value). @@ -631,5 +627,5 @@ class Function(TokenList): class Begin(TokenList): """A BEGIN/END block.""" - M_OPEN = (T.Keyword, 'BEGIN') - M_CLOSE = (T.Keyword, 'END') + M_OPEN = T.Keyword, 'BEGIN' + M_CLOSE = T.Keyword, 'END' -- cgit v1.2.1 From fa8e962e3050308de4618f0a66b9534742d30119 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 30 May 2016 12:55:34 -0700 Subject: Allow tokenlists to skip over comments Rename ignore_cm to skip_cm for consistency --- sqlparse/sql.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 447de7e..dde362b 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -226,7 +226,7 @@ class TokenList(Token): if func(token): return token - def token_first(self, ignore_whitespace=True, ignore_comments=False): + def token_first(self, skip_ws=True, skip_cm=False): """Returns the first child token. If *ignore_whitespace* is ``True`` (the default), whitespace @@ -235,8 +235,9 @@ class TokenList(Token): if *ignore_comments* is ``True`` (default: ``False``), comments are ignored too. """ - funcs = lambda tk: not ((ignore_whitespace and tk.is_whitespace()) or - (ignore_comments and imt(tk, i=Comment))) + # this on is inconsistent, using Comment instead of T.Comment... + funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or + (skip_cm and imt(tk, i=Comment))) return self._token_matching(funcs) def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): @@ -272,7 +273,7 @@ class TokenList(Token): def token_matching(self, idx, funcs): return self._token_matching(funcs, idx) - def token_prev(self, idx, skip_ws=True): + def token_prev(self, idx, skip_ws=True, skip_cm=False): """Returns the previous token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. @@ -280,10 +281,11 @@ class TokenList(Token): """ if isinstance(idx, int): idx += 1 # alot of code usage current pre-compensates for this - funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or + (skip_cm and imt(tk, t=T.Comment))) return self._token_matching(funcs, idx, reverse=True) - def token_next(self, idx, skip_ws=True): + def token_next(self, idx, skip_ws=True, skip_cm=False): """Returns the next token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. @@ -291,7 +293,8 @@ class TokenList(Token): """ if isinstance(idx, int): idx += 1 # alot of code usage current pre-compensates for this - funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or + (skip_cm and imt(tk, t=T.Comment))) return self._token_matching(funcs, idx) def token_index(self, token, start=0): @@ -309,9 +312,9 @@ class TokenList(Token): end_idx = include_end + self.token_index(end) return self.tokens[start_idx:end_idx] - def group_tokens(self, grp_cls, tokens, ignore_ws=False, extend=False): + def group_tokens(self, grp_cls, tokens, skip_ws=False, extend=False): """Replace tokens by an instance of *grp_cls*.""" - if ignore_ws: + if skip_ws: while tokens and tokens[-1].is_whitespace(): tokens = tokens[:-1] @@ -425,7 +428,7 @@ class Statement(TokenList): Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_first(ignore_comments=True) + first_token = self.token_first(skip_cm=True) if first_token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. -- cgit v1.2.1 From cfc22ad6ef8dc213ff562464ebdd67c81ab2554f Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 1 Jun 2016 12:58:39 -0700 Subject: Refactor one-time use functions --- sqlparse/sql.py | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index dde362b..b7096af 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -244,27 +244,6 @@ class TokenList(Token): funcs = lambda tk: imt(tk, i, m, t) return self._token_matching(funcs, idx, end) - def token_next_by_instance(self, idx, clss, end=None): - """Returns the next token matching a class. - - *idx* is where to start searching in the list of child tokens. - *clss* is a list of classes the token should be an instance of. - - If no matching token can be found ``None`` is returned. - """ - funcs = lambda tk: imt(tk, i=clss) - return self._token_matching(funcs, idx, end) - - def token_next_by_type(self, idx, ttypes): - """Returns next matching token by it's token type.""" - funcs = lambda tk: imt(tk, t=ttypes) - return self._token_matching(funcs, idx) - - def token_next_match(self, idx, ttype, value, regex=False): - """Returns next token where it's ``match`` method returns ``True``.""" - funcs = lambda tk: imt(tk, m=(ttype, value, regex)) - return self._token_matching(funcs, idx) - def token_not_matching(self, idx, funcs): funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs funcs = [lambda tk: not func(tk) for func in funcs] @@ -460,12 +439,12 @@ class Identifier(TokenList): def is_wildcard(self): """Return ``True`` if this identifier contains a wildcard.""" - token = self.token_next_by_type(0, T.Wildcard) + token = self.token_next_by(t=T.Wildcard) return token is not None def get_typecast(self): """Returns the typecast or ``None`` of this object as a string.""" - marker = self.token_next_match(0, T.Punctuation, '::') + marker = self.token_next_by(m=(T.Punctuation, '::')) if marker is None: return None next_ = self.token_next(self.token_index(marker), False) @@ -475,7 +454,7 @@ class Identifier(TokenList): def get_ordering(self): """Returns the ordering or ``None`` as uppercase string.""" - ordering = self.token_next_by_type(0, T.Keyword.Order) + ordering = self.token_next_by(t=T.Keyword.Order) if ordering is None: return None return ordering.value.upper() -- cgit v1.2.1 From ab1e8a1b823cb81bed7906463c6a44447b08f3b9 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 1 Jun 2016 15:00:31 -0700 Subject: Simplify index --- sqlparse/sql.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index b7096af..5dbb9f4 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -150,6 +150,12 @@ class TokenList(Token): def __str__(self): return ''.join(token.value for token in self.flatten()) + def __iter__(self): + return iter(self.tokens) + + def __getitem__(self, item): + return self.tokens[item] + def _get_repr_name(self): return type(self).__name__ @@ -186,13 +192,6 @@ class TokenList(Token): else: yield token - # def __iter__(self): - # return self - # - # def next(self): - # for token in self.tokens: - # yield token - def is_group(self): return True @@ -447,17 +446,17 @@ class Identifier(TokenList): marker = self.token_next_by(m=(T.Punctuation, '::')) if marker is None: return None - next_ = self.token_next(self.token_index(marker), False) + next_ = self.token_next(marker, False) if next_ is None: return None - return u(next_) + return next_.value def get_ordering(self): """Returns the ordering or ``None`` as uppercase string.""" ordering = self.token_next_by(t=T.Keyword.Order) if ordering is None: return None - return ordering.value.upper() + return ordering.normalized def get_array_indices(self): """Returns an iterator of index token lists""" -- cgit v1.2.1 From 5747015634a39191511de8db576f2cd0aa5eafc9 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 1 Jun 2016 05:51:17 -0700 Subject: Refactor match logic --- sqlparse/sql.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 5dbb9f4..57bf1e7 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -71,32 +71,23 @@ class Token(object): if not type_matched or values is None: return type_matched - if regex: - if isinstance(values, string_types): - values = {values} + if isinstance(values, string_types): + values = (values,) - if self.ttype is T.Keyword: - values = set(re.compile(v, re.IGNORECASE) for v in values) - else: - values = set(re.compile(v) for v in values) + if regex: + # TODO: Add test for regex with is_keyboard = false + flag = re.IGNORECASE if self.is_keyword else 0 + values = (re.compile(v, flag) for v in values) for pattern in values: - if pattern.search(self.value): + if pattern.search(self.normalized): return True return False - if isinstance(values, string_types): - if self.is_keyword: - return values.upper() == self.normalized - return values == self.value - if self.is_keyword: - for v in values: - if v.upper() == self.normalized: - return True - return False + values = (v.upper() for v in values) - return self.value in values + return self.normalized in values def is_group(self): """Returns ``True`` if this object has children.""" -- cgit v1.2.1