diff options
| author | Victor Uriarte <victor.m.uriarte@intel.com> | 2016-05-10 21:50:46 -0700 |
|---|---|---|
| committer | Victor Uriarte <victor.m.uriarte@intel.com> | 2016-05-10 21:50:46 -0700 |
| commit | 1e770d9b99983fa82375b71c5b3f5d8c6bb1a72e (patch) | |
| tree | a7a21d6ac6561186d1e1a144f6e47e0107527fd2 /sqlparse/engine | |
| parent | 2d0c6d6d33e4bb974037f0eeb68d6a05262e4373 (diff) | |
| download | sqlparse-1e770d9b99983fa82375b71c5b3f5d8c6bb1a72e.tar.gz | |
separate identifier grouping into its components
leaving sql.Operations in for future PR
small behavior changed for invalid identifier
Diffstat (limited to 'sqlparse/engine')
| -rw-r--r-- | sqlparse/engine/grouping.py | 127 |
1 files changed, 50 insertions, 77 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 11c2b38..8fb4af1 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -import itertools - from sqlparse import sql from sqlparse import tokens as T from sqlparse.utils import recurse, imt, find_matching @@ -92,85 +90,57 @@ def group_case(tlist): _group_matching(tlist, sql.Case) +@recurse(sql.Identifier) def group_identifier(tlist): - def _consume_cycle(tl, i): - # TODO: Usage of Wildcard token is ambivalent here. - x = itertools.cycle(( - lambda y: (y.match(T.Punctuation, '.') - or y.ttype in (T.Operator, - T.Wildcard, - T.Name) - or isinstance(y, sql.SquareBrackets)), - lambda y: (y.ttype in (T.String.Symbol, - T.Name, - T.Wildcard, - T.Literal.String.Single, - T.Literal.Number.Integer, - T.Literal.Number.Float) - or isinstance(y, (sql.Parenthesis, - sql.SquareBrackets, - sql.Function))))) - for t in tl.tokens[i:]: - # Don't take whitespaces into account. - if t.ttype is T.Whitespace: - yield t - continue - if next(x)(t): - yield t - else: - if isinstance(t, sql.Comment) and t.is_multiline(): - yield t - if t.ttype is T.Keyword.Order: - yield t - return - - def _next_token(tl, i): - # chooses the next token. if two tokens are found then the - # first is returned. - t1 = tl.token_next_by_type( - i, (T.String.Symbol, T.Name, T.Literal.Number.Integer, - T.Literal.Number.Float)) - - i1 = tl.token_index(t1, start=i) if t1 else None - t2_end = None if i1 is None else i1 + 1 - t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis), - end=t2_end) - - if t1 and t2: - i2 = tl.token_index(t2, start=i) - if i1 > i2: - return t2 - else: - return t1 - elif t1: - return t1 - else: - return t2 + T_IDENT = (T.String.Symbol, T.Name) + + token = tlist.token_next_by(t=T_IDENT) + while token: + token = tlist.group_tokens(sql.Identifier, [token, ]) + token = tlist.token_next_by(t=T_IDENT, idx=token) - # bottom up approach: group subgroups first - [group_identifier(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, sql.Identifier)] - # real processing - idx = 0 - token = _next_token(tlist, idx) +def group_period(tlist): + lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), + t=(T.Name, T.String.Symbol,)) + + rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), + t=(T.Name, T.String.Symbol, T.Wildcard)) + + _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, + valid_left=lfunc, valid_right=rfunc) + + +def group_arrays(tlist): + token = tlist.token_next_by(i=sql.SquareBrackets) while token: - identifier_tokens = [token] + list( - _consume_cycle(tlist, - tlist.token_index(token, start=idx) + 1)) - # remove trailing whitespace - if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace: - identifier_tokens = identifier_tokens[:-1] - if not (len(identifier_tokens) == 1 - and (isinstance(identifier_tokens[0], (sql.Function, - sql.Parenthesis)) - or identifier_tokens[0].ttype in ( - T.Literal.Number.Integer, T.Literal.Number.Float))): - group = tlist.group_tokens(sql.Identifier, identifier_tokens) - idx = tlist.token_index(group, start=idx) + 1 - else: - idx += 1 - token = _next_token(tlist, idx) + prev = tlist.token_prev(idx=token) + if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), + t=(T.Name, T.String.Symbol,)): + tokens = tlist.tokens_between(prev, token) + token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.token_next_by(i=sql.SquareBrackets, idx=token) + + +@recurse(sql.Identifier) +def group_operator(tlist): + I_CYCLE = (sql.SquareBrackets, sql.Parenthesis, sql.Function, + sql.Identifier,) # sql.Operation) + # wilcards wouldn't have operations next to them + T_CYCLE = T_NUMERICAL + T_STRING + T_NAME # + T.Wildcard + func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) + + token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) + while token: + left, right = tlist.token_prev(token), tlist.token_next(token) + + if func(left) and func(right): + token.ttype = T.Operator + tokens = tlist.tokens_between(left, right) + # token = tlist.group_tokens(sql.Operation, tokens) + token = tlist.group_tokens(sql.Identifier, tokens) + + token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token) @recurse(sql.IdentifierList) @@ -295,7 +265,10 @@ def group(tlist): group_functions, group_where, group_case, + group_period, + group_arrays, group_identifier, + group_operator, group_order, group_typecasts, group_as, |
