From c9dc9c8b79a3e290c28761a786993b02eff705d6 Mon Sep 17 00:00:00 2001 From: Darik Gamble Date: Wed, 4 Mar 2015 10:34:05 -0500 Subject: get rid of tokens.Punctuation.ArrayIndex, add negative lookbehind for sqlite identifiers --- sqlparse/lexer.py | 6 ++++-- sqlparse/tokens.py | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 999eb2c..4707990 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -194,8 +194,10 @@ class Lexer(object): (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: (r'(""|".*?[^\\]")', tokens.String.Symbol), - (r'(?<=[\w\]])(\[[^\]]*?\])', tokens.Punctuation.ArrayIndex), - (r'(\[[^\]]+\])', tokens.Name), + # sqlite names can be escaped with [square brackets]. left bracket + # cannot be preceded by word character or a right bracket -- + # otherwise it's probably an array index + (r'(? Date: Wed, 4 Mar 2015 10:34:47 -0500 Subject: Move _find_matching to a module-level function --- sqlparse/engine/grouping.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 9314b89..0be44da 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -51,19 +51,21 @@ def _group_left_right(tlist, ttype, value, cls, ttype, value) +def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value): + depth = 1 + for tok in tlist.tokens[idx:]: + if tok.match(start_ttype, start_value): + depth += 1 + elif tok.match(end_ttype, end_value): + depth -= 1 + if depth == 1: + return tok + return None + + def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, cls, include_semicolon=False, recurse=False): - def _find_matching(i, tl, stt, sva, ett, eva): - depth = 1 - for n in xrange(i, len(tl.tokens)): - t = tl.tokens[n] - if t.match(stt, sva): - depth += 1 - elif t.match(ett, eva): - depth -= 1 - if depth == 1: - return t - return None + [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, cls, include_semicolon) for sgroup in tlist.get_sublists() if recurse] -- cgit v1.2.1 From a93029f38fc2a1a182da92cf361d700cbe2b79c2 Mon Sep 17 00:00:00 2001 From: Darik Gamble Date: Wed, 4 Mar 2015 10:39:53 -0500 Subject: Parse square brackets as a group just like parens - add class sql.SquareBrackets - replace group_parenthesis() with more generic group_brackets(), which groups square and round brackets, so each can contain groups of the other --- sqlparse/engine/grouping.py | 47 +++++++++++++++++++++++++++++++++++++++++---- sqlparse/sql.py | 9 +++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 0be44da..73679e3 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -277,9 +277,48 @@ def group_identifier_list(tlist): tcomma = next_ -def group_parenthesis(tlist): - _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', - sql.Parenthesis) +def group_brackets(tlist): + """Group parentheses () or square brackets [] + + This is just like _group_matching, but complicated by the fact that + round brackets can contain square bracket groups and vice versa + """ + + if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)): + idx = 1 + else: + idx = 0 + + # Find the first opening bracket + token = tlist.token_next_match(idx, T.Punctuation, ['(', '[']) + + while token: + start_val = token.value # either '(' or '[' + if start_val == '(': + end_val = ')' + group_class = sql.Parenthesis + else: + end_val = ']' + group_class = sql.SquareBrackets + + tidx = tlist.token_index(token) + + # Find the corresponding closing bracket + end = _find_matching(tidx, tlist, T.Punctuation, start_val, + T.Punctuation, end_val) + + if end is None: + idx = tidx + 1 + else: + group = tlist.group_tokens(group_class, + tlist.tokens_between(token, end)) + + # Check for nested bracket groups within this group + group_brackets(group) + idx = tlist.token_index(group) + 1 + + # Find the next opening bracket + token = tlist.token_next_match(idx, T.Punctuation, ['(', '[']) def group_comments(tlist): @@ -395,7 +434,7 @@ def align_comments(tlist): def group(tlist): for func in [ group_comments, - group_parenthesis, + group_brackets, group_functions, group_where, group_case, diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 8492c5e..25d5243 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -542,6 +542,15 @@ class Parenthesis(TokenList): return self.tokens[1:-1] +class SquareBrackets(TokenList): + """Tokens between square brackets""" + + __slots__ = ('value', 'ttype', 'tokens') + + @property + def _groupable_tokens(self): + return self.tokens[1:-1] + class Assignment(TokenList): """An assignment like 'var := val;'""" __slots__ = ('value', 'ttype', 'tokens') -- cgit v1.2.1 From b61fe36f718ca4f7c0b4e8d1cb81cc1370877905 Mon Sep 17 00:00:00 2001 From: Darik Gamble Date: Wed, 4 Mar 2015 10:41:34 -0500 Subject: Group square-brackets into identifiers Indentifier.get_array_indices() looks for square brackets, and yields lists of bracket grouped tokens as array indices --- sqlparse/engine/grouping.py | 9 +++++---- sqlparse/sql.py | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 73679e3..a317044 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -159,16 +159,17 @@ def group_identifier(tlist): lambda y: (y.match(T.Punctuation, '.') or y.ttype in (T.Operator, T.Wildcard, - T.ArrayIndex, - T.Name)), + T.Name) + or isinstance(y, sql.SquareBrackets)), lambda y: (y.ttype in (T.String.Symbol, T.Name, T.Wildcard, - T.ArrayIndex, T.Literal.String.Single, T.Literal.Number.Integer, T.Literal.Number.Float) - or isinstance(y, (sql.Parenthesis, sql.Function))))) + or isinstance(y, (sql.Parenthesis, + sql.SquareBrackets, + sql.Function))))) for t in tl.tokens[i:]: # Don't take whitespaces into account. if t.ttype is T.Whitespace: diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 25d5243..9fcb546 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -511,11 +511,12 @@ class Identifier(TokenList): return ordering.value.upper() def get_array_indices(self): - """Returns an iterator of index expressions as strings""" + """Returns an iterator of index token lists""" - # Use [1:-1] index to discard the square brackets - return (tok.value[1:-1] for tok in self.tokens - if tok.ttype in T.ArrayIndex) + for tok in self.tokens: + if isinstance(tok, SquareBrackets): + # Use [1:-1] index to discard the square brackets + yield tok.tokens[1:-1] class IdentifierList(TokenList): -- cgit v1.2.1