diff options
| author | Victor Uriarte <victor.m.uriarte@intel.com> | 2016-06-12 20:12:48 -0700 |
|---|---|---|
| committer | Victor Uriarte <victor.m.uriarte@intel.com> | 2016-06-14 03:25:23 -0700 |
| commit | a7ffa646e9c2839999217cc181544a8a4bb9a5fd (patch) | |
| tree | 087f2e53200a3ac28d2a1ce7c4b9f198696166dd /sqlparse | |
| parent | fae3d94f7f3039d5d7b264b6b4aad69c6b03c9a4 (diff) | |
| parent | 89d4f68ba5bbe78a9dd89257cbe4a9f3cfa76433 (diff) | |
| download | sqlparse-a7ffa646e9c2839999217cc181544a8a4bb9a5fd.tar.gz | |
Merge remote-tracking branch 'core/long_live_indexes' into develop
Diffstat (limited to 'sqlparse')
| -rw-r--r-- | sqlparse/engine/grouping.py | 136 | ||||
| -rw-r--r-- | sqlparse/sql.py | 106 | ||||
| -rw-r--r-- | sqlparse/utils.py | 2 |
3 files changed, 175 insertions, 69 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 6e414b8..cae5d23 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -7,7 +7,7 @@ from sqlparse import sql from sqlparse import tokens as T -from sqlparse.utils import recurse, imt, find_matching +from sqlparse.utils import recurse, imt M_ROLE = (T.Keyword, ('null', 'role')) M_SEMICOLON = (T.Punctuation, ';') @@ -23,40 +23,47 @@ def _group_left_right(tlist, m, cls, valid_right=lambda t: t is not None, semicolon=False): """Groups together tokens that are joined by a middle token. ie. x < y""" + [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon) + for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] - for token in list(tlist): - if token.is_group() and not isinstance(token, cls): - _group_left_right(token, m, cls, valid_left, valid_right, - semicolon) - - if not token.match(*m): - continue - - left, right = tlist.token_prev(token), tlist.token_next(token) + token = tlist.token_next_by(m=m) + while token: + tidx = tlist.token_index(token) + left, right = tlist.token_prev(tidx), tlist.token_next(tidx) if valid_left(left) and valid_right(right): if semicolon: # only overwrite if a semicolon present. - sright = tlist.token_next_by(m=M_SEMICOLON, idx=right) + sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right - tokens = tlist.tokens_between(left, right) - tlist.group_tokens(cls, tokens, extend=True) + # Luckily, this leaves the position of `token` intact. + token = tlist.group_tokens_between(cls, left, right, extend=True) + token = tlist.token_next_by(m=m, idx=tidx + 1) def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" - [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, cls)] - idx = 1 if isinstance(tlist, cls) else 0 + idx = 1 if imt(tlist, i=cls) else 0 - token = tlist.token_next_by(m=cls.M_OPEN, idx=idx) - while token: - end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) - if end is not None: - tokens = tlist.tokens_between(token, end) - token = tlist.group_tokens(cls, tokens) - _group_matching(token, cls) - token = tlist.token_next_by(m=cls.M_OPEN, idx=token) + opens = [] + + while True: + try: + token = tlist.tokens[idx] + except IndexError: + break + + if token.match(*cls.M_OPEN): + opens.append(idx) + elif token.match(*cls.M_CLOSE): + try: + open_idx = opens.pop() + except IndexError: + break + tlist.group_tokens_between(cls, open_idx, idx) + idx = open_idx + + idx += 1 def group_if(tlist): @@ -109,8 +116,9 @@ def group_identifier(tlist): token = tlist.token_next_by(t=T_IDENT) while token: - token = tlist.group_tokens(sql.Identifier, [token, ]) - token = tlist.token_next_by(t=T_IDENT, idx=token) + tidx = tlist.token_index(token) + token = tlist.group_tokens_between(sql.Identifier, tidx, tidx) + token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -127,12 +135,11 @@ def group_period(tlist): def group_arrays(tlist): token = tlist.token_next_by(i=sql.SquareBrackets) while token: - prev = tlist.token_prev(token) + prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=sql.SquareBrackets, idx=token) + token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True) + token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) @recurse(sql.Identifier) @@ -145,14 +152,13 @@ def group_operator(tlist): token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) while token: - left, right = tlist.token_prev(token), tlist.token_next(token) + left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(left) and func(right): token.ttype = T.Operator - tokens = tlist.tokens_between(left, right) - token = tlist.group_tokens(sql.Operation, tokens) + token = tlist.group_tokens_between(sql.Operation, left, right) - token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token) + token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) @recurse(sql.IdentifierList) @@ -163,15 +169,17 @@ def group_identifier_list(tlist): (T.Keyword, T.Comment, T.Wildcard)) func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) - token = tlist.token_next_by(m=M_COMMA) + tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(token), tlist.token_next(token) + before_idx, before = tlist.token_idx_prev(tidx) + after_idx, after = tlist.token_idx_next(tidx) if func(before) and func(after): - tokens = tlist.tokens_between(before, after) - token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=token) + tidx = before_idx + token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True) + + tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -187,29 +195,27 @@ def group_comments(tlist): token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) + tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) if end is not None: - end = tlist.token_prev(end, False) - tokens = tlist.tokens_between(token, end) - token = tlist.group_tokens(sql.Comment, tokens) + end = tlist.token_prev(tlist.token_index(end), False) + token = tlist.group_tokens_between(sql.Comment, token, end) - token = tlist.token_next_by(t=T.Comment, idx=token) + token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) @recurse(sql.Where) def group_where(tlist): token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token) + end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) if end is None: - tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) + end = tlist._groupable_tokens[-1] else: - tokens = tlist.tokens_between( - token, tlist.tokens[tlist.token_index(end) - 1]) + end = tlist.tokens[tlist.token_index(end) - 1] - token = tlist.group_tokens(sql.Where, tokens) - token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token) + token = tlist.group_tokens_between(sql.Where, token, end) + token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) @recurse() @@ -217,13 +223,12 @@ def group_aliased(tlist): I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, sql.Operation) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(token) + next_index_, next_ = tlist.token_idx_next(tidx) if imt(next_, i=sql.Identifier): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token) + token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): @@ -243,33 +248,30 @@ def group_functions(tlist): return token = tlist.token_next_by(t=T.Name) while token: - next_ = tlist.token_next(token) + next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Parenthesis): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Function, tokens) - token = tlist.token_next_by(t=T.Name, idx=token) + token = tlist.group_tokens_between(sql.Function, token, next_) + token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" token = tlist.token_next_by(t=T.Keyword.Order) while token: - prev = tlist.token_prev(token) + prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=sql.Identifier, t=T.Number): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens) - token = tlist.token_next_by(t=T.Keyword.Order, idx=token) + token = tlist.group_tokens_between(sql.Identifier, prev, token) + token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) @recurse() def align_comments(tlist): token = tlist.token_next_by(i=sql.Comment) while token: - before = tlist.token_prev(token) + before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): - tokens = tlist.tokens_between(before, token) - token = tlist.group_tokens(sql.TokenList, tokens, extend=True) - token = tlist.token_next_by(i=sql.Comment, idx=token) + token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True) + token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) def group(stmt): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 52b3bf1..54f7d4f 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -204,6 +204,28 @@ class TokenList(Token): def _groupable_tokens(self): return self.tokens + def _token_idx_matching(self, funcs, start=0, end=None, reverse=False): + """next token that match functions""" + if start is None: + return None + + if not isinstance(funcs, (list, tuple)): + funcs = (funcs,) + + if reverse: + assert end is None + for idx in range(start - 2, -1, -1): + token = self.tokens[idx] + for func in funcs: + if func(token): + return idx, token + else: + for idx, token in enumerate(self.tokens[start:end], start=start): + for func in funcs: + if func(token): + return idx, token + return None, None + def _token_matching(self, funcs, start=0, end=None, reverse=False): """next token that match functions""" if start is None: @@ -225,6 +247,24 @@ class TokenList(Token): if func(token): return token + def token_first(self, skip_ws=True, skip_cm=False): + """Returns the first child token. + + If *skip_ws* is ``True`` (the default), whitespace + tokens are ignored. + + if *skip_cm* is ``True`` (default: ``False``), comments are + ignored too. + """ + # this on is inconsistent, using Comment instead of T.Comment... + funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or + (skip_cm and imt(tk, t=T.Comment, i=Comment))) + return self._token_matching(funcs) + + def token_idx_next_by(self, i=None, m=None, t=None, idx=0, end=None): + funcs = lambda tk: imt(tk, i, m, t) + return self._token_idx_matching(funcs, idx, end) + def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): funcs = lambda tk: imt(tk, i, m, t) return self._token_matching(funcs, idx, end) @@ -237,12 +277,24 @@ class TokenList(Token): def token_matching(self, idx, funcs): return self._token_matching(funcs, idx) + def token_idx_prev(self, idx, skip_ws=True): + """Returns the previous token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no previous token. + """ + idx += 1 # alot of code usage current pre-compensates for this + funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + return self._token_idx_matching(funcs, idx, reverse=True) + def token_prev(self, idx=0, skip_ws=True, skip_cm=False): """Returns the previous token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no previous token. """ + if isinstance(idx, int): + idx += 1 # alot of code usage current pre-compensates for this funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_matching(funcs, idx, reverse=True) @@ -255,10 +307,32 @@ class TokenList(Token): If *skip_cm* is ``True`` (default: ``False``), comments are ignored. ``None`` is returned if there's no next token. """ + if isinstance(idx, int): + idx += 1 # alot of code usage current pre-compensates for this funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_matching(funcs, idx) + def token_idx_next(self, idx, skip_ws=True): + """Returns the next token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no next token. + """ + if isinstance(idx, int): + idx += 1 # alot of code usage current pre-compensates for this + try: + if not skip_ws: + return idx, self.tokens[idx] + else: + while True: + token = self.tokens[idx] + if not token.is_whitespace(): + return idx, token + idx += 1 + except IndexError: + return None, None + def token_index(self, token, start=0): """Return list index of token.""" start = start if isinstance(start, int) else self.token_index(start) @@ -274,6 +348,36 @@ class TokenList(Token): end_idx = include_end + self.token_index(end) return self.tokens[start_idx:end_idx] + def group_tokens_between(self, grp_cls, start, end, include_end=True, + extend=False): + """Replace tokens by an instance of *grp_cls*.""" + if isinstance(start, int): + start_idx = start + start = self.tokens[start_idx] + else: + start_idx = self.token_index(start) + + end_idx = self.token_index(end) if not isinstance(end, int) else end + end_idx += include_end + + if extend and isinstance(start, grp_cls): + subtokens = self.tokens[start_idx + 1:end_idx] + + grp = start + grp.tokens.extend(subtokens) + del self.tokens[start_idx + 1:end_idx] + grp.value = start.__str__() + else: + subtokens = self.tokens[start_idx:end_idx] + grp = grp_cls(subtokens) + self.tokens[start_idx:end_idx] = [grp] + grp.parent = self + + for token in subtokens: + token.parent = grp + + return grp + def group_tokens(self, grp_cls, tokens, skip_ws=False, extend=False): """Replace tokens by an instance of *grp_cls*.""" @@ -386,7 +490,7 @@ class Statement(TokenList): Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_next(skip_cm=True) + first_token = self.token_first(skip_cm=True) if first_token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 4a8646d..59301ff 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -104,7 +104,7 @@ def imt(token, i=None, m=None, t=None): def find_matching(tlist, token, open_pattern, close_pattern): - idx = tlist.token_index(token) + idx = tlist.token_index(token) if not isinstance(token, int) else token depth = 0 for token in tlist.tokens[idx:]: if token.match(*open_pattern): |
