From 6145070d6590f1e8f7fc4d86fb0a1061bc1a47d9 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Wed, 1 Jun 2016 20:23:19 +0200 Subject: Call `Token`-methods index based. A lot of methods have token-to-idx magic due to `Token._find_matching` converting tokens to indexes. Unknowingly, this turns innocent looking algorithms into O(n^2) (or worse). This does not solve the problem, but makes it more clear by moving the call to `Token.token_index` obvious at the call-site, at the cost of repeating it over-and-over. --- sqlparse/engine/grouping.py | 50 +++++++++++++++++++++++---------------------- sqlparse/filters.py | 2 +- 2 files changed, 27 insertions(+), 25 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index f7953e7..39bcf8e 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -23,15 +23,17 @@ def _group_left_right(tlist, m, cls, token = tlist.token_next_by(m=m) while token: - left, right = tlist.token_prev(token), tlist.token_next(token) + tidx = tlist.token_index(token) + left, right = tlist.token_prev(tidx), tlist.token_next(tidx) if valid_left(left) and valid_right(right): if semicolon: - sright = tlist.token_next_by(m=M_SEMICOLON, idx=right) + sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # only overwrite if a semicolon present. tokens = tlist.tokens_between(left, right) + # Luckily, this leaves the position of `token` intact. token = tlist.group_tokens(cls, tokens, extend=True) - token = tlist.token_next_by(m=m, idx=token) + token = tlist.token_next_by(m=m, idx=tidx + 1) def _group_matching(tlist, cls): @@ -44,7 +46,7 @@ def _group_matching(tlist, cls): if end is not None: token = tlist.group_tokens(cls, tlist.tokens_between(token, end)) _group_matching(token, cls) - token = tlist.token_next_by(m=cls.M_OPEN, idx=token) + token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) def group_if(tlist): @@ -97,7 +99,7 @@ def group_identifier(tlist): token = tlist.token_next_by(t=T_IDENT) while token: token = tlist.group_tokens(sql.Identifier, [token, ]) - token = tlist.token_next_by(t=T_IDENT, idx=token) + token = tlist.token_next_by(t=T_IDENT, idx=tlist.token_index(token) + 1) def group_period(tlist): @@ -114,12 +116,12 @@ def group_period(tlist): def group_arrays(tlist): token = tlist.token_next_by(i=sql.SquareBrackets) while token: - prev = tlist.token_prev(idx=token) + prev = tlist.token_prev(idx=tlist.token_index(token)) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): tokens = tlist.tokens_between(prev, token) token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=sql.SquareBrackets, idx=token) + token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) @recurse(sql.Identifier) @@ -132,7 +134,7 @@ def group_operator(tlist): token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) while token: - left, right = tlist.token_prev(token), tlist.token_next(token) + left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(left) and func(right): token.ttype = T.Operator @@ -140,7 +142,7 @@ def group_operator(tlist): # token = tlist.group_tokens(sql.Operation, tokens) token = tlist.group_tokens(sql.Identifier, tokens) - token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token) + token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) @recurse(sql.IdentifierList) @@ -154,12 +156,12 @@ def group_identifier_list(tlist): token = tlist.token_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(token), tlist.token_next(token) + before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(before) and func(after): tokens = tlist.tokens_between(before, after) token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=token) + token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) def group_brackets(tlist): @@ -175,20 +177,20 @@ def group_comments(tlist): token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) + tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) if end is not None: - end = tlist.token_prev(end, False) + end = tlist.token_prev(tlist.token_index(end), False) tokens = tlist.tokens_between(token, end) token = tlist.group_tokens(sql.Comment, tokens) - token = tlist.token_next_by(t=T.Comment, idx=token) + token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) @recurse(sql.Where) def group_where(tlist): token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token) + end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) if end is None: tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) @@ -197,7 +199,7 @@ def group_where(tlist): token, tlist.tokens[tlist.token_index(end) - 1]) token = tlist.group_tokens(sql.Where, tokens) - token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token) + token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) @recurse() @@ -207,11 +209,11 @@ def group_aliased(tlist): token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(token) + next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Identifier): tokens = tlist.tokens_between(token, next_) token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token) + token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) def group_typecasts(tlist): @@ -231,33 +233,33 @@ def group_functions(tlist): return token = tlist.token_next_by(t=T.Name) while token: - next_ = tlist.token_next(token) + next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Parenthesis): tokens = tlist.tokens_between(token, next_) token = tlist.group_tokens(sql.Function, tokens) - token = tlist.token_next_by(t=T.Name, idx=token) + token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" token = tlist.token_next_by(t=T.Keyword.Order) while token: - prev = tlist.token_prev(token) + prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=sql.Identifier, t=T.Number): tokens = tlist.tokens_between(prev, token) token = tlist.group_tokens(sql.Identifier, tokens) - token = tlist.token_next_by(t=T.Keyword.Order, idx=token) + token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) @recurse() def align_comments(tlist): token = tlist.token_next_by(i=sql.Comment) while token: - before = tlist.token_prev(token) + before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): tokens = tlist.tokens_between(before, token) token = tlist.group_tokens(sql.TokenList, tokens, extend=True) - token = tlist.token_next_by(i=sql.Comment, idx=token) + token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) def group(tlist): diff --git a/sqlparse/filters.py b/sqlparse/filters.py index 72f17d0..095ee85 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -340,7 +340,7 @@ class ReindentFilter: offset += 1 uprev = u(prev) if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))): - nl = tlist.token_next(token) + nl = tlist.token_next(tlist.token_index(token)) else: nl = self.nl() added.add(nl) -- cgit v1.2.1 From 896774cb5298924abbcea81b9b90f1c7c10b3d6a Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 07:38:27 +0200 Subject: Special-case group_tokens(..., tokens_between()) When having been guaranteed that the tokens form a range, it is possible to get rid of a lot of calls to `Token.tokens.remove(...)` which are expensive. --- sqlparse/engine/grouping.py | 38 ++++++++++++++------------------------ sqlparse/sql.py | 23 +++++++++++++++++++++++ 2 files changed, 37 insertions(+), 24 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 39bcf8e..ad7da9f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -30,9 +30,8 @@ def _group_left_right(tlist, m, cls, if semicolon: sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # only overwrite if a semicolon present. - tokens = tlist.tokens_between(left, right) # Luckily, this leaves the position of `token` intact. - token = tlist.group_tokens(cls, tokens, extend=True) + token = tlist.group_tokens_between(cls, left, right, extend=True) token = tlist.token_next_by(m=m, idx=tidx + 1) @@ -44,7 +43,7 @@ def _group_matching(tlist, cls): while token: end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) if end is not None: - token = tlist.group_tokens(cls, tlist.tokens_between(token, end)) + token = tlist.group_tokens_between(cls, token, end) _group_matching(token, cls) token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) @@ -119,8 +118,7 @@ def group_arrays(tlist): prev = tlist.token_prev(idx=tlist.token_index(token)) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True) token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) @@ -138,9 +136,8 @@ def group_operator(tlist): if func(left) and func(right): token.ttype = T.Operator - tokens = tlist.tokens_between(left, right) - # token = tlist.group_tokens(sql.Operation, tokens) - token = tlist.group_tokens(sql.Identifier, tokens) + # token = tlist.group_tokens_between(sql.Operation, left, right) + token = tlist.group_tokens_between(sql.Identifier, left, right) token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) @@ -159,8 +156,7 @@ def group_identifier_list(tlist): before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(before) and func(after): - tokens = tlist.tokens_between(before, after) - token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) + token = tlist.group_tokens_between(sql.IdentifierList, before, after, extend=True) token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) @@ -180,8 +176,7 @@ def group_comments(tlist): tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) if end is not None: end = tlist.token_prev(tlist.token_index(end), False) - tokens = tlist.tokens_between(token, end) - token = tlist.group_tokens(sql.Comment, tokens) + token = tlist.group_tokens_between(sql.Comment, token, end) token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) @@ -193,12 +188,11 @@ def group_where(tlist): end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) if end is None: - tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) + end = tlist._groupable_tokens[-1] else: - tokens = tlist.tokens_between( - token, tlist.tokens[tlist.token_index(end) - 1]) + end = tlist.tokens[tlist.token_index(end) - 1] - token = tlist.group_tokens(sql.Where, tokens) + token = tlist.group_tokens_between(sql.Where, token, end) token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) @@ -211,8 +205,7 @@ def group_aliased(tlist): while token: next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Identifier): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.group_tokens_between(sql.Identifier, token, next_, extend=True) token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) @@ -235,8 +228,7 @@ def group_functions(tlist): while token: next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Parenthesis): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Function, tokens) + token = tlist.group_tokens_between(sql.Function, token, next_) token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) @@ -246,8 +238,7 @@ def group_order(tlist): while token: prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=sql.Identifier, t=T.Number): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens) + token = tlist.group_tokens_between(sql.Identifier, prev, token) token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) @@ -257,8 +248,7 @@ def align_comments(tlist): while token: before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): - tokens = tlist.tokens_between(before, token) - token = tlist.group_tokens(sql.TokenList, tokens, extend=True) + token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True) token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 9afdac3..81cd8e9 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -329,6 +329,29 @@ class TokenList(Token): end_idx = include_end + self.token_index(end) return self.tokens[start_idx:end_idx] + def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=False): + """Replace tokens by an instance of *grp_cls*.""" + start_idx = self.token_index(start) + end_idx = self.token_index(end) + include_end + tokens = self.tokens[start_idx:end_idx] + + if extend and isinstance(start, grp_cls): + subtokens = self.tokens[start_idx+1:end_idx] + + grp = start + grp.tokens.extend(subtokens) + del self.tokens[start_idx+1:end_idx] + grp.value = start.__str__() + else: + subtokens = self.tokens[start_idx:end_idx] + grp = grp_cls(tokens) + self.tokens[start_idx:end_idx] = [grp] + grp.parent = self + + for token in subtokens: + token.parent = grp + + return grp def group_tokens(self, grp_cls, tokens, ignore_ws=False, extend=False): """Replace tokens by an instance of *grp_cls*.""" if ignore_ws: -- cgit v1.2.1 From d4cc0644c8348da5e49c58df5e26a3e969045249 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 08:30:27 +0200 Subject: Replace _group_matching with an inward-out grouping algorithm All the matching between open/close was done all the time, first finding the matching closing token, and then grouping the tokens in between, and recurse over the newly created list. Instead, it is more efficient to look for the previous open-token on finding a closing-token, group these two together, and then continue on. squashed: Handle token indices in group_tokens_between and find_matching. --- sqlparse/engine/grouping.py | 28 ++++++++++++++++++++-------- sqlparse/sql.py | 13 +++++++++---- sqlparse/utils.py | 2 +- 3 files changed, 30 insertions(+), 13 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index ad7da9f..e004eae 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -2,7 +2,7 @@ from sqlparse import sql from sqlparse import tokens as T -from sqlparse.utils import recurse, imt, find_matching +from sqlparse.utils import recurse, imt M_ROLE = (T.Keyword, ('null', 'role')) M_SEMICOLON = (T.Punctuation, ';') @@ -39,13 +39,25 @@ def _group_matching(tlist, cls): """Groups Tokens that have beginning and end. ie. parenthesis, brackets..""" idx = 1 if imt(tlist, i=cls) else 0 - token = tlist.token_next_by(m=cls.M_OPEN, idx=idx) - while token: - end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) - if end is not None: - token = tlist.group_tokens_between(cls, token, end) - _group_matching(token, cls) - token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) + opens = [] + + while True: + try: + token = tlist.tokens[idx] + except IndexError: + break + + if token.match(*cls.M_OPEN): + opens.append(idx) + elif token.match(*cls.M_CLOSE): + try: + open_idx = opens.pop() + except IndexError: + break + tlist.group_tokens_between(cls, open_idx, idx) + idx = open_idx + + idx += 1 def group_if(tlist): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 81cd8e9..dfe0430 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -331,9 +331,14 @@ class TokenList(Token): def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=False): """Replace tokens by an instance of *grp_cls*.""" - start_idx = self.token_index(start) - end_idx = self.token_index(end) + include_end - tokens = self.tokens[start_idx:end_idx] + if isinstance(start, int): + start_idx = start + start = self.tokens[start_idx] + else: + start_idx = self.token_index(start) + + end_idx = self.token_index(end) if not isinstance(end, int) else end + end_idx += include_end if extend and isinstance(start, grp_cls): subtokens = self.tokens[start_idx+1:end_idx] @@ -344,7 +349,7 @@ class TokenList(Token): grp.value = start.__str__() else: subtokens = self.tokens[start_idx:end_idx] - grp = grp_cls(tokens) + grp = grp_cls(subtokens) self.tokens[start_idx:end_idx] = [grp] grp.parent = self diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 90acb5c..5e01f58 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -164,7 +164,7 @@ def imt(token, i=None, m=None, t=None): def find_matching(tlist, token, M1, M2): - idx = tlist.token_index(token) + idx = tlist.token_index(token) if not isinstance(token, int) else token depth = 0 for token in tlist.tokens[idx:]: if token.match(*M1): -- cgit v1.2.1 From 237575ef726e4232b60a5043177c43a72f370238 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 09:21:05 +0200 Subject: Re-use token index in group_identifier. --- sqlparse/engine/grouping.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index e004eae..77a53ad 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -109,8 +109,9 @@ def group_identifier(tlist): token = tlist.token_next_by(t=T_IDENT) while token: - token = tlist.group_tokens(sql.Identifier, [token, ]) - token = tlist.token_next_by(t=T_IDENT, idx=tlist.token_index(token) + 1) + tidx = tlist.token_index(token) + token = tlist.group_tokens_between(sql.Identifier, tidx, tidx) + token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -165,11 +166,14 @@ def group_identifier_list(tlist): token = tlist.token_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) + tidx = tlist.token_index(token) + before, after = tlist.token_prev(tidx), tlist.token_next(tidx) if func(before) and func(after): - token = tlist.group_tokens_between(sql.IdentifierList, before, after, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) + tidx = tlist.token_index(before) + token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) + + token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -215,10 +219,11 @@ def group_aliased(tlist): token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(tlist.token_index(token)) + tidx = tlist.token_index(token) + next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, token, next_, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) + token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) + token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): -- cgit v1.2.1 From 67dc823e1174eee9ea2159674c8eb016b2f95b54 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 10:08:00 +0200 Subject: Use specialized token_idx_next_by in group_aliased. The method group_aliased was making a lot of calls to token_index. By specializing token_next_by to token_idx_next_by, the calls to token_index became superfluous. Also use token_idx_next_by in group_identifier_list. It was making a lot of calls, which is now more than reduced in half. --- sqlparse/engine/grouping.py | 10 ++++------ sqlparse/sql.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 77a53ad..fddee0f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -163,17 +163,16 @@ def group_identifier_list(tlist): (T.Keyword, T.Comment, T.Wildcard)) func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) - token = tlist.token_next_by(m=M_COMMA) + tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - tidx = tlist.token_index(token) before, after = tlist.token_prev(tidx), tlist.token_next(tidx) if func(before) and func(after): tidx = tlist.token_index(before) token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) + tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -217,13 +216,12 @@ def group_aliased(tlist): I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, ) # sql.Operation) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - tidx = tlist.token_index(token) next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): diff --git a/sqlparse/sql.py b/sqlparse/sql.py index dfe0430..928b784 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -225,6 +225,22 @@ class TokenList(Token): def _groupable_tokens(self): return self.tokens + def _token_idx_matching(self, funcs, start=0, end=None, reverse=False): + """next token that match functions""" + if start is None: + return None + + if not isinstance(funcs, (list, tuple)): + funcs = (funcs,) + + iterable = enumerate(self.tokens[start:end], start=start) + + for idx, token in iterable: + for func in funcs: + if func(token): + return idx, token + return None, None + def _token_matching(self, funcs, start=0, end=None, reverse=False): """next token that match functions""" if start is None: @@ -259,6 +275,10 @@ class TokenList(Token): (ignore_comments and imt(tk, i=Comment))) return self._token_matching(funcs) + def token_idx_next_by(self, i=None, m=None, t=None, idx=0, end=None): + funcs = lambda tk: imt(tk, i, m, t) + return self._token_idx_matching(funcs, idx, end) + def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): funcs = lambda tk: imt(tk, i, m, t) return self._token_matching(funcs, idx, end) -- cgit v1.2.1 From 8f7968ed5c649e5227e605ee272f59dd5ca75adb Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 10:28:54 +0200 Subject: Index-based token_idx_prev Prevent some more calls to token_index in group_identifier_list. They are now all gone. --- sqlparse/engine/grouping.py | 5 +++-- sqlparse/sql.py | 28 ++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index fddee0f..6bdba2f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -166,10 +166,11 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(tidx), tlist.token_next(tidx) + before_idx, before = tlist.token_idx_prev(tidx) + after = tlist.token_next(tidx) if func(before) and func(after): - tidx = tlist.token_index(before) + tidx = before_idx token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 928b784..9782c33 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -233,12 +233,18 @@ class TokenList(Token): if not isinstance(funcs, (list, tuple)): funcs = (funcs,) - iterable = enumerate(self.tokens[start:end], start=start) - - for idx, token in iterable: - for func in funcs: - if func(token): - return idx, token + if reverse: + assert end is None + for idx in range(start - 2, -1, -1): + token = self.tokens[idx] + for func in funcs: + if func(token): + return idx, token + else: + for idx, token in enumerate(self.tokens[start:end], start=start): + for func in funcs: + if func(token): + return idx, token return None, None def _token_matching(self, funcs, start=0, end=None, reverse=False): @@ -312,6 +318,16 @@ class TokenList(Token): def token_matching(self, idx, funcs): return self._token_matching(funcs, idx) + def token_idx_prev(self, idx, skip_ws=True): + """Returns the previous token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no previous token. + """ + idx += 1 # alot of code usage current pre-compensates for this + funcs = lambda tk: not (tk.is_whitespace() and skip_ws) + return self._token_idx_matching(funcs, idx, reverse=True) + def token_prev(self, idx, skip_ws=True): """Returns the previous token relative to *idx*. -- cgit v1.2.1 From 89d4f68ba5bbe78a9dd89257cbe4a9f3cfa76433 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 11:58:19 +0200 Subject: Use a specialized token_idx_next. Prevent calling token_index. --- sqlparse/engine/grouping.py | 8 ++++---- sqlparse/sql.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 6bdba2f..0169830 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -167,11 +167,11 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: before_idx, before = tlist.token_idx_prev(tidx) - after = tlist.token_next(tidx) + after_idx, after = tlist.token_idx_next(tidx) if func(before) and func(after): tidx = before_idx - token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) + token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True) tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) @@ -219,9 +219,9 @@ def group_aliased(tlist): tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(tidx) + next_index_, next_ = tlist.token_idx_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) + token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True) tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 9782c33..f3ef642 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -350,6 +350,26 @@ class TokenList(Token): funcs = lambda tk: not (tk.is_whitespace() and skip_ws) return self._token_matching(funcs, idx) + def token_idx_next(self, idx, skip_ws=True): + """Returns the next token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no next token. + """ + if isinstance(idx, int): + idx += 1 # alot of code usage current pre-compensates for this + try: + if not skip_ws: + return idx, self.tokens[idx] + else: + while True: + token = self.tokens[idx] + if not token.is_whitespace(): + return idx, token + idx += 1 + except IndexError: + return None, None + def token_index(self, token, start=0): """Return list index of token.""" start = self.token_index(start) if not isinstance(start, int) else start -- cgit v1.2.1