summaryrefslogtreecommitdiff
path: root/sqlparse/engine/grouping.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlparse/engine/grouping.py')
-rw-r--r--sqlparse/engine/grouping.py524
1 files changed, 171 insertions, 353 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 982488b..e30abab 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -1,450 +1,268 @@
# -*- coding: utf-8 -*-
-import itertools
-
from sqlparse import sql
from sqlparse import tokens as T
+from sqlparse.utils import recurse, imt, find_matching
+
+M_ROLE = (T.Keyword, ('null', 'role'))
+M_SEMICOLON = (T.Punctuation, ';')
+M_COMMA = (T.Punctuation, ',')
+
+T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
+T_STRING = (T.String, T.String.Single, T.String.Symbol)
+T_NAME = (T.Name, T.Name.Placeholder)
-def _group_left_right(tlist, ttype, value, cls,
- check_right=lambda t: True,
- check_left=lambda t: True,
- include_semicolon=False):
- [_group_left_right(sgroup, ttype, value, cls, check_right, check_left,
- include_semicolon) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, cls)]
- idx = 0
- token = tlist.token_next_match(idx, ttype, value)
+def _group_left_right(tlist, m, cls,
+ valid_left=lambda t: t is not None,
+ valid_right=lambda t: t is not None,
+ semicolon=False):
+ """Groups together tokens that are joined by a middle token. ie. x < y"""
+ [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon)
+ for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)]
+
+ token = tlist.token_next_by(m=m)
while token:
- right = tlist.token_next(tlist.token_index(token))
- left = tlist.token_prev(tlist.token_index(token))
- if right is None or not check_right(right):
- token = tlist.token_next_match(tlist.token_index(token) + 1,
- ttype, value)
- elif left is None or not check_left(left):
- token = tlist.token_next_match(tlist.token_index(token) + 1,
- ttype, value)
- else:
- if include_semicolon:
- sright = tlist.token_next_match(tlist.token_index(right),
- T.Punctuation, ';')
- if sright is not None:
- # only overwrite "right" if a semicolon is actually
- # present.
- right = sright
- tokens = tlist.tokens_between(left, right)[1:]
- if not isinstance(left, cls):
- new = cls([left])
- new_idx = tlist.token_index(left)
- tlist.tokens.remove(left)
- tlist.tokens.insert(new_idx, new)
- left = new
- left.tokens.extend(tokens)
- for t in tokens:
- tlist.tokens.remove(t)
- token = tlist.token_next_match(tlist.token_index(left) + 1,
- ttype, value)
-
-
-def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value):
- depth = 1
- for tok in tlist.tokens[idx:]:
- if tok.match(start_ttype, start_value):
- depth += 1
- elif tok.match(end_ttype, end_value):
- depth -= 1
- if depth == 1:
- return tok
- return None
-
-
-def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
- cls, include_semicolon=False, recurse=False):
-
- [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
- cls, include_semicolon) for sgroup in tlist.get_sublists()
- if recurse]
- if isinstance(tlist, cls):
- idx = 1
- else:
- idx = 0
- token = tlist.token_next_match(idx, start_ttype, start_value)
+ left, right = tlist.token_prev(token), tlist.token_next(token)
+
+ if valid_left(left) and valid_right(right):
+ if semicolon:
+ sright = tlist.token_next_by(m=M_SEMICOLON, idx=right)
+ right = sright or right # only overwrite if a semicolon present.
+ tokens = tlist.tokens_between(left, right)
+ token = tlist.group_tokens(cls, tokens, extend=True)
+ token = tlist.token_next_by(m=m, idx=token)
+
+
+def _group_matching(tlist, cls):
+ """Groups Tokens that have beginning and end. ie. parenthesis, brackets.."""
+ idx = 1 if imt(tlist, i=cls) else 0
+
+ token = tlist.token_next_by(m=cls.M_OPEN, idx=idx)
while token:
- tidx = tlist.token_index(token)
- end = _find_matching(tidx, tlist, start_ttype, start_value,
- end_ttype, end_value)
- if end is None:
- idx = tidx + 1
- else:
- if include_semicolon:
- next_ = tlist.token_next(tlist.token_index(end))
- if next_ and next_.match(T.Punctuation, ';'):
- end = next_
- group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
- _group_matching(group, start_ttype, start_value,
- end_ttype, end_value, cls, include_semicolon)
- idx = tlist.token_index(group) + 1
- token = tlist.token_next_match(idx, start_ttype, start_value)
+ end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE)
+ if end is not None:
+ token = tlist.group_tokens(cls, tlist.tokens_between(token, end))
+ _group_matching(token, cls)
+ token = tlist.token_next_by(m=cls.M_OPEN, idx=token)
def group_if(tlist):
- _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True)
+ _group_matching(tlist, sql.If)
def group_for(tlist):
- _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP',
- sql.For, True)
+ _group_matching(tlist, sql.For)
def group_foreach(tlist):
- _group_matching(tlist, T.Keyword, 'FOREACH', T.Keyword, 'END LOOP',
- sql.For, True)
+ _group_matching(tlist, sql.For)
def group_begin(tlist):
- _group_matching(tlist, T.Keyword, 'BEGIN', T.Keyword, 'END',
- sql.Begin, True)
+ _group_matching(tlist, sql.Begin)
def group_as(tlist):
-
- def _right_valid(token):
- # Currently limited to DML/DDL. Maybe additional more non SQL reserved
- # keywords should appear here (see issue8).
- return token.ttype not in (T.DML, T.DDL)
-
- def _left_valid(token):
- if token.ttype is T.Keyword and token.value in ('NULL',):
- return True
- return token.ttype is not T.Keyword
-
- _group_left_right(tlist, T.Keyword, 'AS', sql.Identifier,
- check_right=_right_valid,
- check_left=_left_valid)
+ lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.value == 'NULL'
+ rfunc = lambda tk: not imt(tk, t=(T.DML, T.DDL))
+ _group_left_right(tlist, (T.Keyword, 'AS'), sql.Identifier,
+ valid_left=lfunc, valid_right=rfunc)
def group_assignment(tlist):
- _group_left_right(tlist, T.Assignment, ':=', sql.Assignment,
- include_semicolon=True)
+ _group_left_right(tlist, (T.Assignment, ':='), sql.Assignment,
+ semicolon=True)
def group_comparison(tlist):
+ I_COMPERABLE = (sql.Parenthesis, sql.Function, sql.Identifier)
+ T_COMPERABLE = T_NUMERICAL + T_STRING + T_NAME
- def _parts_valid(token):
- return (token.ttype in (T.String.Symbol, T.String.Single,
- T.Name, T.Number, T.Number.Float,
- T.Number.Integer, T.Literal,
- T.Literal.Number.Integer, T.Name.Placeholder)
- or isinstance(token, (sql.Identifier, sql.Parenthesis,
- sql.Function))
- or (token.ttype is T.Keyword
- and token.value.upper() in ['NULL', ]))
- _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
- check_left=_parts_valid, check_right=_parts_valid)
+ func = lambda tk: imt(tk, t=T_COMPERABLE, i=I_COMPERABLE) or (
+ imt(tk, t=T.Keyword) and tk.value.upper() == 'NULL')
+
+ _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison,
+ valid_left=func, valid_right=func)
def group_case(tlist):
- _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case,
- include_semicolon=True, recurse=True)
+ _group_matching(tlist, sql.Case)
+@recurse(sql.Identifier)
def group_identifier(tlist):
- def _consume_cycle(tl, i):
- # TODO: Usage of Wildcard token is ambivalent here.
- x = itertools.cycle((
- lambda y: (y.match(T.Punctuation, '.')
- or y.ttype in (T.Operator,
- T.Wildcard,
- T.Name)
- or isinstance(y, sql.SquareBrackets)),
- lambda y: (y.ttype in (T.String.Symbol,
- T.Name,
- T.Wildcard,
- T.Literal.String.Single,
- T.Literal.Number.Integer,
- T.Literal.Number.Float)
- or isinstance(y, (sql.Parenthesis,
- sql.SquareBrackets,
- sql.Function)))))
- for t in tl.tokens[i:]:
- # Don't take whitespaces into account.
- if t.ttype is T.Whitespace:
- yield t
- continue
- if next(x)(t):
- yield t
- else:
- if isinstance(t, sql.Comment) and t.is_multiline():
- yield t
- if t.ttype is T.Keyword.Order:
- yield t
- return
-
- def _next_token(tl, i):
- # chooses the next token. if two tokens are found then the
- # first is returned.
- t1 = tl.token_next_by_type(
- i, (T.String.Symbol, T.Name, T.Literal.Number.Integer,
- T.Literal.Number.Float))
-
- i1 = tl.token_index(t1, start=i) if t1 else None
- t2_end = None if i1 is None else i1 + 1
- t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis),
- end=t2_end)
-
- if t1 and t2:
- i2 = tl.token_index(t2, start=i)
- if i1 > i2:
- return t2
- else:
- return t1
- elif t1:
- return t1
- else:
- return t2
+ T_IDENT = (T.String.Symbol, T.Name)
- # bottom up approach: group subgroups first
- [group_identifier(sgroup) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, sql.Identifier)]
-
- # real processing
- idx = 0
- token = _next_token(tlist, idx)
+ token = tlist.token_next_by(t=T_IDENT)
while token:
- identifier_tokens = [token] + list(
- _consume_cycle(tlist,
- tlist.token_index(token, start=idx) + 1))
- # remove trailing whitespace
- if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace:
- identifier_tokens = identifier_tokens[:-1]
- if not (len(identifier_tokens) == 1
- and (isinstance(identifier_tokens[0], (sql.Function,
- sql.Parenthesis))
- or identifier_tokens[0].ttype in (
- T.Literal.Number.Integer, T.Literal.Number.Float))):
- group = tlist.group_tokens(sql.Identifier, identifier_tokens)
- idx = tlist.token_index(group, start=idx) + 1
- else:
- idx += 1
- token = _next_token(tlist, idx)
+ token = tlist.group_tokens(sql.Identifier, [token, ])
+ token = tlist.token_next_by(t=T_IDENT, idx=token)
-def group_identifier_list(tlist):
- [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, sql.IdentifierList)]
- # Allowed list items
- fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
- sql.Case)),
- lambda t: t.is_whitespace(),
- lambda t: t.ttype == T.Name,
- lambda t: t.ttype == T.Wildcard,
- lambda t: t.match(T.Keyword, 'null'),
- lambda t: t.match(T.Keyword, 'role'),
- lambda t: t.ttype == T.Number.Integer,
- lambda t: t.ttype == T.String.Single,
- lambda t: t.ttype == T.Name.Placeholder,
- lambda t: t.ttype == T.Keyword,
- lambda t: isinstance(t, sql.Comparison),
- lambda t: isinstance(t, sql.Comment),
- lambda t: t.ttype == T.Comment.Multiline,
- ]
- tcomma = tlist.token_next_match(0, T.Punctuation, ',')
- start = None
- while tcomma is not None:
- # Go back one idx to make sure to find the correct tcomma
- idx = tlist.token_index(tcomma)
- before = tlist.token_prev(idx)
- after = tlist.token_next(idx)
- # Check if the tokens around tcomma belong to a list
- bpassed = apassed = False
- for func in fend1_funcs:
- if before is not None and func(before):
- bpassed = True
- if after is not None and func(after):
- apassed = True
- if not bpassed or not apassed:
- # Something's wrong here, skip ahead to next ","
- start = None
- tcomma = tlist.token_next_match(idx + 1,
- T.Punctuation, ',')
- else:
- if start is None:
- start = before
- after_idx = tlist.token_index(after, start=idx)
- next_ = tlist.token_next(after_idx)
- if next_ is None or not next_.match(T.Punctuation, ','):
- # Reached the end of the list
- tokens = tlist.tokens_between(start, after)
- group = tlist.group_tokens(sql.IdentifierList, tokens)
- start = None
- tcomma = tlist.token_next_match(tlist.token_index(group) + 1,
- T.Punctuation, ',')
- else:
- tcomma = next_
+def group_period(tlist):
+ lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier),
+ t=(T.Name, T.String.Symbol,))
+ rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function),
+ t=(T.Name, T.String.Symbol, T.Wildcard))
-def group_brackets(tlist):
- """Group parentheses () or square brackets []
+ _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier,
+ valid_left=lfunc, valid_right=rfunc)
- This is just like _group_matching, but complicated by the fact that
- round brackets can contain square bracket groups and vice versa
- """
- if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)):
- idx = 1
- else:
- idx = 0
+def group_arrays(tlist):
+ token = tlist.token_next_by(i=sql.SquareBrackets)
+ while token:
+ prev = tlist.token_prev(idx=token)
+ if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function),
+ t=(T.Name, T.String.Symbol,)):
+ tokens = tlist.tokens_between(prev, token)
+ token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
+ token = tlist.token_next_by(i=sql.SquareBrackets, idx=token)
+
+
+@recurse(sql.Identifier)
+def group_operator(tlist):
+ I_CYCLE = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
+ sql.Identifier,) # sql.Operation)
+ # wilcards wouldn't have operations next to them
+ T_CYCLE = T_NUMERICAL + T_STRING + T_NAME # + T.Wildcard
+ func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE)
+
+ token = tlist.token_next_by(t=(T.Operator, T.Wildcard))
+ while token:
+ left, right = tlist.token_prev(token), tlist.token_next(token)
+
+ if func(left) and func(right):
+ token.ttype = T.Operator
+ tokens = tlist.tokens_between(left, right)
+ # token = tlist.group_tokens(sql.Operation, tokens)
+ token = tlist.group_tokens(sql.Identifier, tokens)
- # Find the first opening bracket
- token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
+ token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token)
+
+
+@recurse(sql.IdentifierList)
+def group_identifier_list(tlist):
+ I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
+ sql.IdentifierList) # sql.Operation
+ T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME +
+ (T.Keyword, T.Comment, T.Wildcard))
+
+ func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST)
+ token = tlist.token_next_by(m=M_COMMA)
while token:
- start_val = token.value # either '(' or '['
- if start_val == '(':
- end_val = ')'
- group_class = sql.Parenthesis
- else:
- end_val = ']'
- group_class = sql.SquareBrackets
+ before, after = tlist.token_prev(token), tlist.token_next(token)
- tidx = tlist.token_index(token)
+ if func(before) and func(after):
+ tokens = tlist.tokens_between(before, after)
+ token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True)
+ token = tlist.token_next_by(m=M_COMMA, idx=token)
- # Find the corresponding closing bracket
- end = _find_matching(tidx, tlist, T.Punctuation, start_val,
- T.Punctuation, end_val)
- if end is None:
- idx = tidx + 1
- else:
- group = tlist.group_tokens(group_class,
- tlist.tokens_between(token, end))
+def group_brackets(tlist):
+ _group_matching(tlist, sql.SquareBrackets)
- # Check for nested bracket groups within this group
- group_brackets(group)
- idx = tlist.token_index(group) + 1
- # Find the next opening bracket
- token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
+def group_parenthesis(tlist):
+ _group_matching(tlist, sql.Parenthesis)
+@recurse(sql.Comment)
def group_comments(tlist):
- [group_comments(sgroup) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, sql.Comment)]
- idx = 0
- token = tlist.token_next_by_type(idx, T.Comment)
+ token = tlist.token_next_by(t=T.Comment)
while token:
- tidx = tlist.token_index(token)
- end = tlist.token_not_matching(tidx + 1,
- [lambda t: t.ttype in T.Comment,
- lambda t: t.is_whitespace()])
- if end is None:
- idx = tidx + 1
- else:
- eidx = tlist.token_index(end)
- grp_tokens = tlist.tokens_between(token,
- tlist.token_prev(eidx, False))
- group = tlist.group_tokens(sql.Comment, grp_tokens)
- idx = tlist.token_index(group)
- token = tlist.token_next_by_type(idx, T.Comment)
+ end = tlist.token_not_matching(
+ token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace())
+ if end is not None:
+ end = tlist.token_prev(end, False)
+ tokens = tlist.tokens_between(token, end)
+ token = tlist.group_tokens(sql.Comment, tokens)
+
+ token = tlist.token_next_by(t=T.Comment, idx=token)
+@recurse(sql.Where)
def group_where(tlist):
- [group_where(sgroup) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, sql.Where)]
- idx = 0
- token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
- stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING')
+ token = tlist.token_next_by(m=sql.Where.M_OPEN)
while token:
- tidx = tlist.token_index(token)
- end = tlist.token_next_match(tidx + 1, T.Keyword, stopwords)
+ end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token)
+
if end is None:
- end = tlist._groupable_tokens[-1]
+ tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1])
else:
- end = tlist.tokens[tlist.token_index(end) - 1]
- group = tlist.group_tokens(sql.Where,
- tlist.tokens_between(token, end),
- ignore_ws=True)
- idx = tlist.token_index(group)
- token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
+ tokens = tlist.tokens_between(
+ token, tlist.tokens[tlist.token_index(end) - 1])
+
+ token = tlist.group_tokens(sql.Where, tokens)
+ token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token)
+@recurse()
def group_aliased(tlist):
- clss = (sql.Identifier, sql.Function, sql.Case)
- [group_aliased(sgroup) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, clss)]
- idx = 0
- token = tlist.token_next_by_instance(idx, clss)
+ I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
+ ) # sql.Operation)
+
+ token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
while token:
- next_ = tlist.token_next(tlist.token_index(token))
- if next_ is not None and isinstance(next_, clss):
- if not next_.value.upper().startswith('VARCHAR'):
- grp = tlist.tokens_between(token, next_)[1:]
- token.tokens.extend(grp)
- for t in grp:
- tlist.tokens.remove(t)
- idx = tlist.token_index(token) + 1
- token = tlist.token_next_by_instance(idx, clss)
+ next_ = tlist.token_next(token)
+ if imt(next_, i=sql.Identifier):
+ tokens = tlist.tokens_between(token, next_)
+ token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
+ token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token)
def group_typecasts(tlist):
- _group_left_right(tlist, T.Punctuation, '::', sql.Identifier)
+ _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier)
+@recurse(sql.Function)
def group_functions(tlist):
- [group_functions(sgroup) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, sql.Function)]
- idx = 0
- token = tlist.token_next_by_type(idx, T.Name)
+ token = tlist.token_next_by(t=T.Name)
while token:
next_ = tlist.token_next(token)
- if not isinstance(next_, sql.Parenthesis):
- idx = tlist.token_index(token) + 1
- else:
- func = tlist.group_tokens(sql.Function,
- tlist.tokens_between(token, next_))
- idx = tlist.token_index(func) + 1
- token = tlist.token_next_by_type(idx, T.Name)
+ if imt(next_, i=sql.Parenthesis):
+ tokens = tlist.tokens_between(token, next_)
+ token = tlist.group_tokens(sql.Function, tokens)
+ token = tlist.token_next_by(t=T.Name, idx=token)
def group_order(tlist):
- idx = 0
- token = tlist.token_next_by_type(idx, T.Keyword.Order)
+ """Group together Identifier and Asc/Desc token"""
+ token = tlist.token_next_by(t=T.Keyword.Order)
while token:
prev = tlist.token_prev(token)
- if isinstance(prev, sql.Identifier):
- ido = tlist.group_tokens(sql.Identifier,
- tlist.tokens_between(prev, token))
- idx = tlist.token_index(ido) + 1
- else:
- idx = tlist.token_index(token) + 1
- token = tlist.token_next_by_type(idx, T.Keyword.Order)
+ if imt(prev, i=sql.Identifier, t=T.Number):
+ tokens = tlist.tokens_between(prev, token)
+ token = tlist.group_tokens(sql.Identifier, tokens)
+ token = tlist.token_next_by(t=T.Keyword.Order, idx=token)
+@recurse()
def align_comments(tlist):
- [align_comments(sgroup) for sgroup in tlist.get_sublists()]
- idx = 0
- token = tlist.token_next_by_instance(idx, sql.Comment)
+ token = tlist.token_next_by(i=sql.Comment)
while token:
- before = tlist.token_prev(tlist.token_index(token))
+ before = tlist.token_prev(token)
if isinstance(before, sql.TokenList):
- grp = tlist.tokens_between(before, token)[1:]
- before.tokens.extend(grp)
- for t in grp:
- tlist.tokens.remove(t)
- idx = tlist.token_index(before) + 1
- else:
- idx = tlist.token_index(token) + 1
- token = tlist.token_next_by_instance(idx, sql.Comment)
+ tokens = tlist.tokens_between(before, token)
+ token = tlist.group_tokens(sql.TokenList, tokens, extend=True)
+ token = tlist.token_next_by(i=sql.Comment, idx=token)
def group(tlist):
for func in [
group_comments,
group_brackets,
+ group_parenthesis,
group_functions,
group_where,
group_case,
+ group_period,
+ group_arrays,
group_identifier,
+ group_operator,
group_order,
group_typecasts,
group_as,