From 6145070d6590f1e8f7fc4d86fb0a1061bc1a47d9 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Wed, 1 Jun 2016 20:23:19 +0200 Subject: Call `Token`-methods index based. A lot of methods have token-to-idx magic due to `Token._find_matching` converting tokens to indexes. Unknowingly, this turns innocent looking algorithms into O(n^2) (or worse). This does not solve the problem, but makes it more clear by moving the call to `Token.token_index` obvious at the call-site, at the cost of repeating it over-and-over. --- sqlparse/engine/grouping.py | 50 +++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index f7953e7..39bcf8e 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -23,15 +23,17 @@ def _group_left_right(tlist, m, cls, token = tlist.token_next_by(m=m) while token: - left, right = tlist.token_prev(token), tlist.token_next(token) + tidx = tlist.token_index(token) + left, right = tlist.token_prev(tidx), tlist.token_next(tidx) if valid_left(left) and valid_right(right): if semicolon: - sright = tlist.token_next_by(m=M_SEMICOLON, idx=right) + sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # only overwrite if a semicolon present. tokens = tlist.tokens_between(left, right) + # Luckily, this leaves the position of `token` intact. token = tlist.group_tokens(cls, tokens, extend=True) - token = tlist.token_next_by(m=m, idx=token) + token = tlist.token_next_by(m=m, idx=tidx + 1) def _group_matching(tlist, cls): @@ -44,7 +46,7 @@ def _group_matching(tlist, cls): if end is not None: token = tlist.group_tokens(cls, tlist.tokens_between(token, end)) _group_matching(token, cls) - token = tlist.token_next_by(m=cls.M_OPEN, idx=token) + token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) def group_if(tlist): @@ -97,7 +99,7 @@ def group_identifier(tlist): token = tlist.token_next_by(t=T_IDENT) while token: token = tlist.group_tokens(sql.Identifier, [token, ]) - token = tlist.token_next_by(t=T_IDENT, idx=token) + token = tlist.token_next_by(t=T_IDENT, idx=tlist.token_index(token) + 1) def group_period(tlist): @@ -114,12 +116,12 @@ def group_period(tlist): def group_arrays(tlist): token = tlist.token_next_by(i=sql.SquareBrackets) while token: - prev = tlist.token_prev(idx=token) + prev = tlist.token_prev(idx=tlist.token_index(token)) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): tokens = tlist.tokens_between(prev, token) token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=sql.SquareBrackets, idx=token) + token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) @recurse(sql.Identifier) @@ -132,7 +134,7 @@ def group_operator(tlist): token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) while token: - left, right = tlist.token_prev(token), tlist.token_next(token) + left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(left) and func(right): token.ttype = T.Operator @@ -140,7 +142,7 @@ def group_operator(tlist): # token = tlist.group_tokens(sql.Operation, tokens) token = tlist.group_tokens(sql.Identifier, tokens) - token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token) + token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) @recurse(sql.IdentifierList) @@ -154,12 +156,12 @@ def group_identifier_list(tlist): token = tlist.token_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(token), tlist.token_next(token) + before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(before) and func(after): tokens = tlist.tokens_between(before, after) token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=token) + token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) def group_brackets(tlist): @@ -175,20 +177,20 @@ def group_comments(tlist): token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) + tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) if end is not None: - end = tlist.token_prev(end, False) + end = tlist.token_prev(tlist.token_index(end), False) tokens = tlist.tokens_between(token, end) token = tlist.group_tokens(sql.Comment, tokens) - token = tlist.token_next_by(t=T.Comment, idx=token) + token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) @recurse(sql.Where) def group_where(tlist): token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token) + end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) if end is None: tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) @@ -197,7 +199,7 @@ def group_where(tlist): token, tlist.tokens[tlist.token_index(end) - 1]) token = tlist.group_tokens(sql.Where, tokens) - token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token) + token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) @recurse() @@ -207,11 +209,11 @@ def group_aliased(tlist): token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(token) + next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Identifier): tokens = tlist.tokens_between(token, next_) token = tlist.group_tokens(sql.Identifier, tokens, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token) + token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) def group_typecasts(tlist): @@ -231,33 +233,33 @@ def group_functions(tlist): return token = tlist.token_next_by(t=T.Name) while token: - next_ = tlist.token_next(token) + next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Parenthesis): tokens = tlist.tokens_between(token, next_) token = tlist.group_tokens(sql.Function, tokens) - token = tlist.token_next_by(t=T.Name, idx=token) + token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" token = tlist.token_next_by(t=T.Keyword.Order) while token: - prev = tlist.token_prev(token) + prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=sql.Identifier, t=T.Number): tokens = tlist.tokens_between(prev, token) token = tlist.group_tokens(sql.Identifier, tokens) - token = tlist.token_next_by(t=T.Keyword.Order, idx=token) + token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) @recurse() def align_comments(tlist): token = tlist.token_next_by(i=sql.Comment) while token: - before = tlist.token_prev(token) + before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): tokens = tlist.tokens_between(before, token) token = tlist.group_tokens(sql.TokenList, tokens, extend=True) - token = tlist.token_next_by(i=sql.Comment, idx=token) + token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) def group(tlist): -- cgit v1.2.1 From 896774cb5298924abbcea81b9b90f1c7c10b3d6a Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 07:38:27 +0200 Subject: Special-case group_tokens(..., tokens_between()) When having been guaranteed that the tokens form a range, it is possible to get rid of a lot of calls to `Token.tokens.remove(...)` which are expensive. --- sqlparse/engine/grouping.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 39bcf8e..ad7da9f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -30,9 +30,8 @@ def _group_left_right(tlist, m, cls, if semicolon: sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # only overwrite if a semicolon present. - tokens = tlist.tokens_between(left, right) # Luckily, this leaves the position of `token` intact. - token = tlist.group_tokens(cls, tokens, extend=True) + token = tlist.group_tokens_between(cls, left, right, extend=True) token = tlist.token_next_by(m=m, idx=tidx + 1) @@ -44,7 +43,7 @@ def _group_matching(tlist, cls): while token: end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) if end is not None: - token = tlist.group_tokens(cls, tlist.tokens_between(token, end)) + token = tlist.group_tokens_between(cls, token, end) _group_matching(token, cls) token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) @@ -119,8 +118,7 @@ def group_arrays(tlist): prev = tlist.token_prev(idx=tlist.token_index(token)) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True) token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) @@ -138,9 +136,8 @@ def group_operator(tlist): if func(left) and func(right): token.ttype = T.Operator - tokens = tlist.tokens_between(left, right) - # token = tlist.group_tokens(sql.Operation, tokens) - token = tlist.group_tokens(sql.Identifier, tokens) + # token = tlist.group_tokens_between(sql.Operation, left, right) + token = tlist.group_tokens_between(sql.Identifier, left, right) token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) @@ -159,8 +156,7 @@ def group_identifier_list(tlist): before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) if func(before) and func(after): - tokens = tlist.tokens_between(before, after) - token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True) + token = tlist.group_tokens_between(sql.IdentifierList, before, after, extend=True) token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) @@ -180,8 +176,7 @@ def group_comments(tlist): tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) if end is not None: end = tlist.token_prev(tlist.token_index(end), False) - tokens = tlist.tokens_between(token, end) - token = tlist.group_tokens(sql.Comment, tokens) + token = tlist.group_tokens_between(sql.Comment, token, end) token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) @@ -193,12 +188,11 @@ def group_where(tlist): end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) if end is None: - tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1]) + end = tlist._groupable_tokens[-1] else: - tokens = tlist.tokens_between( - token, tlist.tokens[tlist.token_index(end) - 1]) + end = tlist.tokens[tlist.token_index(end) - 1] - token = tlist.group_tokens(sql.Where, tokens) + token = tlist.group_tokens_between(sql.Where, token, end) token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) @@ -211,8 +205,7 @@ def group_aliased(tlist): while token: next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Identifier): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Identifier, tokens, extend=True) + token = tlist.group_tokens_between(sql.Identifier, token, next_, extend=True) token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) @@ -235,8 +228,7 @@ def group_functions(tlist): while token: next_ = tlist.token_next(tlist.token_index(token)) if imt(next_, i=sql.Parenthesis): - tokens = tlist.tokens_between(token, next_) - token = tlist.group_tokens(sql.Function, tokens) + token = tlist.group_tokens_between(sql.Function, token, next_) token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) @@ -246,8 +238,7 @@ def group_order(tlist): while token: prev = tlist.token_prev(tlist.token_index(token)) if imt(prev, i=sql.Identifier, t=T.Number): - tokens = tlist.tokens_between(prev, token) - token = tlist.group_tokens(sql.Identifier, tokens) + token = tlist.group_tokens_between(sql.Identifier, prev, token) token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) @@ -257,8 +248,7 @@ def align_comments(tlist): while token: before = tlist.token_prev(tlist.token_index(token)) if isinstance(before, sql.TokenList): - tokens = tlist.tokens_between(before, token) - token = tlist.group_tokens(sql.TokenList, tokens, extend=True) + token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True) token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) -- cgit v1.2.1 From d4cc0644c8348da5e49c58df5e26a3e969045249 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 08:30:27 +0200 Subject: Replace _group_matching with an inward-out grouping algorithm All the matching between open/close was done all the time, first finding the matching closing token, and then grouping the tokens in between, and recurse over the newly created list. Instead, it is more efficient to look for the previous open-token on finding a closing-token, group these two together, and then continue on. squashed: Handle token indices in group_tokens_between and find_matching. --- sqlparse/engine/grouping.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index ad7da9f..e004eae 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -2,7 +2,7 @@ from sqlparse import sql from sqlparse import tokens as T -from sqlparse.utils import recurse, imt, find_matching +from sqlparse.utils import recurse, imt M_ROLE = (T.Keyword, ('null', 'role')) M_SEMICOLON = (T.Punctuation, ';') @@ -39,13 +39,25 @@ def _group_matching(tlist, cls): """Groups Tokens that have beginning and end. ie. parenthesis, brackets..""" idx = 1 if imt(tlist, i=cls) else 0 - token = tlist.token_next_by(m=cls.M_OPEN, idx=idx) - while token: - end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE) - if end is not None: - token = tlist.group_tokens_between(cls, token, end) - _group_matching(token, cls) - token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1) + opens = [] + + while True: + try: + token = tlist.tokens[idx] + except IndexError: + break + + if token.match(*cls.M_OPEN): + opens.append(idx) + elif token.match(*cls.M_CLOSE): + try: + open_idx = opens.pop() + except IndexError: + break + tlist.group_tokens_between(cls, open_idx, idx) + idx = open_idx + + idx += 1 def group_if(tlist): -- cgit v1.2.1 From 237575ef726e4232b60a5043177c43a72f370238 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 09:21:05 +0200 Subject: Re-use token index in group_identifier. --- sqlparse/engine/grouping.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index e004eae..77a53ad 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -109,8 +109,9 @@ def group_identifier(tlist): token = tlist.token_next_by(t=T_IDENT) while token: - token = tlist.group_tokens(sql.Identifier, [token, ]) - token = tlist.token_next_by(t=T_IDENT, idx=tlist.token_index(token) + 1) + tidx = tlist.token_index(token) + token = tlist.group_tokens_between(sql.Identifier, tidx, tidx) + token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -165,11 +166,14 @@ def group_identifier_list(tlist): token = tlist.token_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) + tidx = tlist.token_index(token) + before, after = tlist.token_prev(tidx), tlist.token_next(tidx) if func(before) and func(after): - token = tlist.group_tokens_between(sql.IdentifierList, before, after, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=tlist.token_index(token) + 1) + tidx = tlist.token_index(before) + token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) + + token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -215,10 +219,11 @@ def group_aliased(tlist): token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(tlist.token_index(token)) + tidx = tlist.token_index(token) + next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, token, next_, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tlist.token_index(token) + 1) + token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) + token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): -- cgit v1.2.1 From 67dc823e1174eee9ea2159674c8eb016b2f95b54 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 10:08:00 +0200 Subject: Use specialized token_idx_next_by in group_aliased. The method group_aliased was making a lot of calls to token_index. By specializing token_next_by to token_idx_next_by, the calls to token_index became superfluous. Also use token_idx_next_by in group_identifier_list. It was making a lot of calls, which is now more than reduced in half. --- sqlparse/engine/grouping.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 77a53ad..fddee0f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -163,17 +163,16 @@ def group_identifier_list(tlist): (T.Keyword, T.Comment, T.Wildcard)) func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) - token = tlist.token_next_by(m=M_COMMA) + tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - tidx = tlist.token_index(token) before, after = tlist.token_prev(tidx), tlist.token_next(tidx) if func(before) and func(after): tidx = tlist.token_index(before) token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) - token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) + tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -217,13 +216,12 @@ def group_aliased(tlist): I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, ) # sql.Operation) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - tidx = tlist.token_index(token) next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) - token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) + tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): -- cgit v1.2.1 From 8f7968ed5c649e5227e605ee272f59dd5ca75adb Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 10:28:54 +0200 Subject: Index-based token_idx_prev Prevent some more calls to token_index in group_identifier_list. They are now all gone. --- sqlparse/engine/grouping.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index fddee0f..6bdba2f 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -166,10 +166,11 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - before, after = tlist.token_prev(tidx), tlist.token_next(tidx) + before_idx, before = tlist.token_idx_prev(tidx) + after = tlist.token_next(tidx) if func(before) and func(after): - tidx = tlist.token_index(before) + tidx = before_idx token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) -- cgit v1.2.1 From 89d4f68ba5bbe78a9dd89257cbe4a9f3cfa76433 Mon Sep 17 00:00:00 2001 From: Sjoerd Job Postmus Date: Thu, 2 Jun 2016 11:58:19 +0200 Subject: Use a specialized token_idx_next. Prevent calling token_index. --- sqlparse/engine/grouping.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 6bdba2f..0169830 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -167,11 +167,11 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: before_idx, before = tlist.token_idx_prev(tidx) - after = tlist.token_next(tidx) + after_idx, after = tlist.token_idx_next(tidx) if func(before) and func(after): tidx = before_idx - token = tlist.group_tokens_between(sql.IdentifierList, tidx, after, extend=True) + token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True) tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) @@ -219,9 +219,9 @@ def group_aliased(tlist): tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - next_ = tlist.token_next(tidx) + next_index_, next_ = tlist.token_idx_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, tidx, next_, extend=True) + token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True) tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) -- cgit v1.2.1 From 405a66817f1b0789901adf9c81b96658a04e6950 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 12 Jun 2016 21:22:07 -0700 Subject: Reapply fix for case within paranthesis --- sqlparse/engine/grouping.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index cae5d23..f9ca6b4 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -43,7 +43,9 @@ def _group_left_right(tlist, m, cls, def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" - idx = 1 if imt(tlist, i=cls) else 0 + [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, cls)] + idx = 1 if isinstance(tlist, cls) else 0 opens = [] -- cgit v1.2.1 From c601435bde6afd32f93b7e19b17287ca9d3b02f9 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 12 Jun 2016 21:39:14 -0700 Subject: Apply alt style for grouping left/right --- sqlparse/engine/grouping.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index f9ca6b4..240ce5e 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -23,11 +23,14 @@ def _group_left_right(tlist, m, cls, valid_right=lambda t: t is not None, semicolon=False): """Groups together tokens that are joined by a middle token. ie. x < y""" - [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon) - for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + _group_left_right(token, m, cls, valid_left, valid_right, + semicolon) + continue + if not token.match(*m): + continue - token = tlist.token_next_by(m=m) - while token: tidx = tlist.token_index(token) left, right = tlist.token_prev(tidx), tlist.token_next(tidx) @@ -37,15 +40,14 @@ def _group_left_right(tlist, m, cls, sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) right = sright or right # Luckily, this leaves the position of `token` intact. - token = tlist.group_tokens_between(cls, left, right, extend=True) - token = tlist.token_next_by(m=m, idx=tidx + 1) + tlist.group_tokens_between(cls, left, right, extend=True) def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] - idx = 1 if isinstance(tlist, cls) else 0 + idx = 0 # check no longer needed since not recursing. opens = [] -- cgit v1.2.1 From 954ba46e16af4e3c9b1302bbae95ebf2a4be2a8b Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Sun, 12 Jun 2016 22:07:07 -0700 Subject: Refactor _group_matching --- sqlparse/engine/grouping.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 240ce5e..bf76119 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -45,29 +45,25 @@ def _group_left_right(tlist, m, cls, def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" - [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, cls)] - idx = 0 # check no longer needed since not recursing. - opens = [] - - while True: - try: - token = tlist.tokens[idx] - except IndexError: - break + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + # Check inside previously grouped (ie. parenthesis) if group + # of differnt type is inside (ie, case). though ideally should + # should check for all open/close tokens at once to avoid recursion + _group_matching(token, cls) + continue if token.match(*cls.M_OPEN): - opens.append(idx) + opens.append(token) elif token.match(*cls.M_CLOSE): try: - open_idx = opens.pop() + open_token = opens.pop() except IndexError: - break - tlist.group_tokens_between(cls, open_idx, idx) - idx = open_idx - - idx += 1 + # this indicates invalid sql and unbalanced tokens. + # instead of break, continue in case other "valid" groups exist + continue + tlist.group_tokens_between(cls, open_token, token) def group_if(tlist): -- cgit v1.2.1 From 997f95b8b6ec5129362dcfe5deedaf50800e3afc Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 10:50:58 -0700 Subject: Change argument order to match order of all other functions --- sqlparse/engine/grouping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index bf76119..86c4bf2 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -195,7 +195,8 @@ def group_comments(tlist): token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace()) + lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), + idx=tlist.token_index(token) + 1) if end is not None: end = tlist.token_prev(tlist.token_index(end), False) token = tlist.group_tokens_between(sql.Comment, token, end) -- cgit v1.2.1 From a795be1a70a241e177227b742269fb2df88af962 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 13:21:20 -0700 Subject: Change token_ funcs to token_idx funcs --- sqlparse/engine/grouping.py | 111 +++++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 52 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 86c4bf2..88064cb 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -32,15 +32,16 @@ def _group_left_right(tlist, m, cls, continue tidx = tlist.token_index(token) - left, right = tlist.token_prev(tidx), tlist.token_next(tidx) + pidx, prev_ = tlist.token_idx_prev(tidx) + nidx, next_ = tlist.token_idx_next(tidx) - if valid_left(left) and valid_right(right): + if valid_left(prev_) and valid_right(next_): if semicolon: # only overwrite if a semicolon present. - sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1) - right = sright or right + snidx, _ = tlist.token_idx_next_by(m=M_SEMICOLON, idx=nidx) + nidx = snidx or nidx # Luckily, this leaves the position of `token` intact. - tlist.group_tokens_between(cls, left, right, extend=True) + tlist.group_tokens_between(cls, pidx, nidx, extend=True) def _group_matching(tlist, cls): @@ -114,11 +115,10 @@ def group_case(tlist): def group_identifier(tlist): T_IDENT = (T.String.Symbol, T.Name) - token = tlist.token_next_by(t=T_IDENT) + tidx, token = tlist.token_idx_next_by(t=T_IDENT) while token: - tidx = tlist.token_index(token) - token = tlist.group_tokens_between(sql.Identifier, tidx, tidx) - token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) + tlist.group_tokens_between(sql.Identifier, tidx, tidx) + tidx, token = tlist.token_idx_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -133,13 +133,14 @@ def group_period(tlist): def group_arrays(tlist): - token = tlist.token_next_by(i=sql.SquareBrackets) + tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets) while token: - prev = tlist.token_prev(tlist.token_index(token)) + pidx, prev = tlist.token_idx_prev(tidx) if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True) - token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1) + tlist.group_tokens_between(sql.Identifier, pidx, tidx, extend=True) + tidx = pidx + tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets, idx=tidx + 1) @recurse(sql.Identifier) @@ -150,15 +151,18 @@ def group_operator(tlist): T_CYCLE = T_NUMERICAL + T_STRING + T_NAME func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) - token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) + tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard)) while token: - left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token)) + pidx, prev_ = tlist.token_idx_prev(tidx) + nidx, next_ = tlist.token_idx_next(tidx) - if func(left) and func(right): + if func(prev_) and func(next_): token.ttype = T.Operator - token = tlist.group_tokens_between(sql.Operation, left, right) + tlist.group_tokens_between(sql.Operation, pidx, nidx) + tidx = pidx - token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1) + tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard), + idx=tidx + 1) @recurse(sql.IdentifierList) @@ -172,13 +176,12 @@ def group_identifier_list(tlist): tidx, token = tlist.token_idx_next_by(m=M_COMMA) while token: - before_idx, before = tlist.token_idx_prev(tidx) - after_idx, after = tlist.token_idx_next(tidx) - - if func(before) and func(after): - tidx = before_idx - token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True) + pidx, prev_ = tlist.token_idx_prev(tidx) + nidx, next_ = tlist.token_idx_next(tidx) + if func(prev_) and func(next_): + tlist.group_tokens_between(sql.IdentifierList, pidx, nidx, extend=True) + tidx = pidx tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) @@ -192,31 +195,32 @@ def group_parenthesis(tlist): @recurse(sql.Comment) def group_comments(tlist): - token = tlist.token_next_by(t=T.Comment) + tidx, token = tlist.token_idx_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), - idx=tlist.token_index(token) + 1) + lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx + 1) if end is not None: - end = tlist.token_prev(tlist.token_index(end), False) - token = tlist.group_tokens_between(sql.Comment, token, end) + eidx = tlist.token_index(end) + eidx, end = tlist.token_idx_prev(eidx, skip_ws=False) + tlist.group_tokens_between(sql.Comment, tidx, eidx) - token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1) + tidx, token = tlist.token_idx_next_by(t=T.Comment, idx=tidx + 1) @recurse(sql.Where) def group_where(tlist): - token = tlist.token_next_by(m=sql.Where.M_OPEN) + tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN) while token: - end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1) + eidx, end = tlist.token_idx_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) if end is None: end = tlist._groupable_tokens[-1] else: - end = tlist.tokens[tlist.token_index(end) - 1] - - token = tlist.group_tokens_between(sql.Where, token, end) - token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1) + end = tlist.tokens[eidx - 1] + # TODO: convert this to eidx instead of end token. + # i think above values are len(tlist) and eidx-1 + tlist.group_tokens_between(sql.Where, tidx, end) + tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) @recurse() @@ -226,9 +230,9 @@ def group_aliased(tlist): tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) while token: - next_index_, next_ = tlist.token_idx_next(tidx) + nidx, next_ = tlist.token_idx_next(tidx) if imt(next_, i=sql.Identifier): - token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True) + tlist.group_tokens_between(sql.Identifier, tidx, nidx, extend=True) tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) @@ -247,32 +251,35 @@ def group_functions(tlist): has_table = True if has_create and has_table: return - token = tlist.token_next_by(t=T.Name) + + tidx, token = tlist.token_idx_next_by(t=T.Name) while token: - next_ = tlist.token_next(tlist.token_index(token)) - if imt(next_, i=sql.Parenthesis): - token = tlist.group_tokens_between(sql.Function, token, next_) - token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1) + nidx, next_ = tlist.token_idx_next(tidx) + if isinstance(next_, sql.Parenthesis): + tlist.group_tokens_between(sql.Function, tidx, nidx) + tidx, token = tlist.token_idx_next_by(t=T.Name, idx=tidx + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" - token = tlist.token_next_by(t=T.Keyword.Order) + tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order) while token: - prev = tlist.token_prev(tlist.token_index(token)) + pidx, prev = tlist.token_idx_prev(tidx) if imt(prev, i=sql.Identifier, t=T.Number): - token = tlist.group_tokens_between(sql.Identifier, prev, token) - token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1) + tlist.group_tokens_between(sql.Identifier, pidx, tidx) + tidx = pidx + tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order, idx=tidx + 1) @recurse() def align_comments(tlist): - token = tlist.token_next_by(i=sql.Comment) + tidx, token = tlist.token_idx_next_by(i=sql.Comment) while token: - before = tlist.token_prev(tlist.token_index(token)) - if isinstance(before, sql.TokenList): - token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True) - token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1) + pidx, prev = tlist.token_idx_prev(tidx) + if isinstance(prev, sql.TokenList): + tlist.group_tokens_between(sql.TokenList, pidx, tidx, extend=True) + tidx = pidx + tidx, token = tlist.token_idx_next_by(i=sql.Comment, idx=tidx + 1) def group(stmt): -- cgit v1.2.1 From 4f922d9b6fb68b8281c6b3d93a57a4c84860e06a Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 22:01:53 -0700 Subject: Rename token_idx_ funcs to simply token_ funcs --- sqlparse/engine/grouping.py | 100 ++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 50 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 88064cb..a229e3d 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -32,16 +32,16 @@ def _group_left_right(tlist, m, cls, continue tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_idx_prev(tidx) - nidx, next_ = tlist.token_idx_next(tidx) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) if valid_left(prev_) and valid_right(next_): if semicolon: # only overwrite if a semicolon present. - snidx, _ = tlist.token_idx_next_by(m=M_SEMICOLON, idx=nidx) + snidx, _ = tlist.token_next_by(m=M_SEMICOLON, idx=nidx) nidx = snidx or nidx # Luckily, this leaves the position of `token` intact. - tlist.group_tokens_between(cls, pidx, nidx, extend=True) + tlist.group_tokens(cls, pidx, nidx, extend=True) def _group_matching(tlist, cls): @@ -64,7 +64,7 @@ def _group_matching(tlist, cls): # this indicates invalid sql and unbalanced tokens. # instead of break, continue in case other "valid" groups exist continue - tlist.group_tokens_between(cls, open_token, token) + tlist.group_tokens(cls, open_token, token) def group_if(tlist): @@ -115,10 +115,10 @@ def group_case(tlist): def group_identifier(tlist): T_IDENT = (T.String.Symbol, T.Name) - tidx, token = tlist.token_idx_next_by(t=T_IDENT) + tidx, token = tlist.token_next_by(t=T_IDENT) while token: - tlist.group_tokens_between(sql.Identifier, tidx, tidx) - tidx, token = tlist.token_idx_next_by(t=T_IDENT, idx=tidx + 1) + tlist.group_tokens(sql.Identifier, tidx, tidx) + tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) def group_period(tlist): @@ -133,14 +133,14 @@ def group_period(tlist): def group_arrays(tlist): - tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets) + tidx, token = tlist.token_next_by(i=sql.SquareBrackets) while token: - pidx, prev = tlist.token_idx_prev(tidx) - if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function), + pidx, prev_ = tlist.token_prev(tidx) + if imt(prev_, i=(sql.SquareBrackets, sql.Identifier, sql.Function), t=(T.Name, T.String.Symbol,)): - tlist.group_tokens_between(sql.Identifier, pidx, tidx, extend=True) + tlist.group_tokens(sql.Identifier, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_idx_next_by(i=sql.SquareBrackets, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx + 1) @recurse(sql.Identifier) @@ -151,18 +151,18 @@ def group_operator(tlist): T_CYCLE = T_NUMERICAL + T_STRING + T_NAME func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) - tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard)) + tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) while token: - pidx, prev_ = tlist.token_idx_prev(tidx) - nidx, next_ = tlist.token_idx_next(tidx) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) if func(prev_) and func(next_): token.ttype = T.Operator - tlist.group_tokens_between(sql.Operation, pidx, nidx) + tlist.group_tokens(sql.Operation, pidx, nidx) tidx = pidx - tidx, token = tlist.token_idx_next_by(t=(T.Operator, T.Wildcard), - idx=tidx + 1) + tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), + idx=tidx + 1) @recurse(sql.IdentifierList) @@ -174,15 +174,15 @@ def group_identifier_list(tlist): func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) - tidx, token = tlist.token_idx_next_by(m=M_COMMA) + tidx, token = tlist.token_next_by(m=M_COMMA) while token: - pidx, prev_ = tlist.token_idx_prev(tidx) - nidx, next_ = tlist.token_idx_next(tidx) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) if func(prev_) and func(next_): - tlist.group_tokens_between(sql.IdentifierList, pidx, nidx, extend=True) + tlist.group_tokens(sql.IdentifierList, pidx, nidx, extend=True) tidx = pidx - tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1) + tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) def group_brackets(tlist): @@ -195,23 +195,23 @@ def group_parenthesis(tlist): @recurse(sql.Comment) def group_comments(tlist): - tidx, token = tlist.token_idx_next_by(t=T.Comment) + tidx, token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx + 1) if end is not None: eidx = tlist.token_index(end) - eidx, end = tlist.token_idx_prev(eidx, skip_ws=False) - tlist.group_tokens_between(sql.Comment, tidx, eidx) + eidx, end = tlist.token_prev(eidx, skip_ws=False) + tlist.group_tokens(sql.Comment, tidx, eidx) - tidx, token = tlist.token_idx_next_by(t=T.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx + 1) @recurse(sql.Where) def group_where(tlist): - tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN) + tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - eidx, end = tlist.token_idx_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) + eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) if end is None: end = tlist._groupable_tokens[-1] @@ -219,8 +219,8 @@ def group_where(tlist): end = tlist.tokens[eidx - 1] # TODO: convert this to eidx instead of end token. # i think above values are len(tlist) and eidx-1 - tlist.group_tokens_between(sql.Where, tidx, end) - tidx, token = tlist.token_idx_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) + tlist.group_tokens(sql.Where, tidx, end) + tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) @recurse() @@ -228,12 +228,12 @@ def group_aliased(tlist): I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, sql.Operation) - tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number) + tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: - nidx, next_ = tlist.token_idx_next(tidx) + nidx, next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): - tlist.group_tokens_between(sql.Identifier, tidx, nidx, extend=True) - tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) + tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True) + tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) def group_typecasts(tlist): @@ -252,34 +252,34 @@ def group_functions(tlist): if has_create and has_table: return - tidx, token = tlist.token_idx_next_by(t=T.Name) + tidx, token = tlist.token_next_by(t=T.Name) while token: - nidx, next_ = tlist.token_idx_next(tidx) + nidx, next_ = tlist.token_next(tidx) if isinstance(next_, sql.Parenthesis): - tlist.group_tokens_between(sql.Function, tidx, nidx) - tidx, token = tlist.token_idx_next_by(t=T.Name, idx=tidx + 1) + tlist.group_tokens(sql.Function, tidx, nidx) + tidx, token = tlist.token_next_by(t=T.Name, idx=tidx + 1) def group_order(tlist): """Group together Identifier and Asc/Desc token""" - tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order) + tidx, token = tlist.token_next_by(t=T.Keyword.Order) while token: - pidx, prev = tlist.token_idx_prev(tidx) - if imt(prev, i=sql.Identifier, t=T.Number): - tlist.group_tokens_between(sql.Identifier, pidx, tidx) + pidx, prev_ = tlist.token_prev(tidx) + if imt(prev_, i=sql.Identifier, t=T.Number): + tlist.group_tokens(sql.Identifier, pidx, tidx) tidx = pidx - tidx, token = tlist.token_idx_next_by(t=T.Keyword.Order, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx + 1) @recurse() def align_comments(tlist): - tidx, token = tlist.token_idx_next_by(i=sql.Comment) + tidx, token = tlist.token_next_by(i=sql.Comment) while token: - pidx, prev = tlist.token_idx_prev(tidx) - if isinstance(prev, sql.TokenList): - tlist.group_tokens_between(sql.TokenList, pidx, tidx, extend=True) + pidx, prev_ = tlist.token_prev(tidx) + if isinstance(prev_, sql.TokenList): + tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_idx_next_by(i=sql.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx + 1) def group(stmt): -- cgit v1.2.1 From 5002bfa36c4fa2ee72eff18648b6ddc616b718f0 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Mon, 13 Jun 2016 22:20:29 -0700 Subject: Normalize behavior between token_next and token_next_by both will now return the "next" token and not itself when passing own index --- sqlparse/engine/grouping.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index a229e3d..e7072d0 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -118,7 +118,7 @@ def group_identifier(tlist): tidx, token = tlist.token_next_by(t=T_IDENT) while token: tlist.group_tokens(sql.Identifier, tidx, tidx) - tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx) def group_period(tlist): @@ -140,7 +140,7 @@ def group_arrays(tlist): t=(T.Name, T.String.Symbol,)): tlist.group_tokens(sql.Identifier, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx) @recurse(sql.Identifier) @@ -161,8 +161,7 @@ def group_operator(tlist): tlist.group_tokens(sql.Operation, pidx, nidx) tidx = pidx - tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), - idx=tidx + 1) + tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tidx) @recurse(sql.IdentifierList) @@ -182,7 +181,7 @@ def group_identifier_list(tlist): if func(prev_) and func(next_): tlist.group_tokens(sql.IdentifierList, pidx, nidx, extend=True) tidx = pidx - tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx + 1) + tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx) def group_brackets(tlist): @@ -198,20 +197,20 @@ def group_comments(tlist): tidx, token = tlist.token_next_by(t=T.Comment) while token: end = tlist.token_not_matching( - lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx + 1) + lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx) if end is not None: eidx = tlist.token_index(end) eidx, end = tlist.token_prev(eidx, skip_ws=False) tlist.group_tokens(sql.Comment, tidx, eidx) - tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx) @recurse(sql.Where) def group_where(tlist): tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN) while token: - eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx + 1) + eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx) if end is None: end = tlist._groupable_tokens[-1] @@ -220,7 +219,7 @@ def group_where(tlist): # TODO: convert this to eidx instead of end token. # i think above values are len(tlist) and eidx-1 tlist.group_tokens(sql.Where, tidx, end) - tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx + 1) + tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx) @recurse() @@ -233,7 +232,7 @@ def group_aliased(tlist): nidx, next_ = tlist.token_next(tidx) if imt(next_, i=sql.Identifier): tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True) - tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx) def group_typecasts(tlist): @@ -257,7 +256,7 @@ def group_functions(tlist): nidx, next_ = tlist.token_next(tidx) if isinstance(next_, sql.Parenthesis): tlist.group_tokens(sql.Function, tidx, nidx) - tidx, token = tlist.token_next_by(t=T.Name, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Name, idx=tidx) def group_order(tlist): @@ -268,7 +267,7 @@ def group_order(tlist): if imt(prev_, i=sql.Identifier, t=T.Number): tlist.group_tokens(sql.Identifier, pidx, tidx) tidx = pidx - tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx + 1) + tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx) @recurse() @@ -279,7 +278,7 @@ def align_comments(tlist): if isinstance(prev_, sql.TokenList): tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True) tidx = pidx - tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx + 1) + tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx) def group(stmt): -- cgit v1.2.1 From 56b28dc15023d36bab8764bea6df75e28651646e Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 14 Jun 2016 04:15:27 -0700 Subject: Make use of token_index more obvious --- sqlparse/engine/grouping.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index e7072d0..c52a759 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -64,7 +64,9 @@ def _group_matching(tlist, cls): # this indicates invalid sql and unbalanced tokens. # instead of break, continue in case other "valid" groups exist continue - tlist.group_tokens(cls, open_token, token) + oidx = tlist.token_index(open_token) + cidx = tlist.token_index(token) + tlist.group_tokens(cls, oidx, cidx) def group_if(tlist): @@ -196,10 +198,9 @@ def group_parenthesis(tlist): def group_comments(tlist): tidx, token = tlist.token_next_by(t=T.Comment) while token: - end = tlist.token_not_matching( + eidx, end = tlist.token_not_matching( lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace(), idx=tidx) if end is not None: - eidx = tlist.token_index(end) eidx, end = tlist.token_prev(eidx, skip_ws=False) tlist.group_tokens(sql.Comment, tidx, eidx) @@ -218,7 +219,8 @@ def group_where(tlist): end = tlist.tokens[eidx - 1] # TODO: convert this to eidx instead of end token. # i think above values are len(tlist) and eidx-1 - tlist.group_tokens(sql.Where, tidx, end) + eidx = tlist.token_index(end) + tlist.group_tokens(sql.Where, tidx, eidx) tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx) -- cgit v1.2.1 From af9b82e0b2d00732704fedf7d7b03dcb598dca84 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 14 Jun 2016 06:26:41 -0700 Subject: Reorder grouping code and func call order Remove repeated for-each/for grouping --- sqlparse/engine/grouping.py | 133 ++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 67 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index c52a759..7879f76 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -9,41 +9,11 @@ from sqlparse import sql from sqlparse import tokens as T from sqlparse.utils import recurse, imt -M_ROLE = (T.Keyword, ('null', 'role')) -M_SEMICOLON = (T.Punctuation, ';') -M_COMMA = (T.Punctuation, ',') - T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float) T_STRING = (T.String, T.String.Single, T.String.Symbol) T_NAME = (T.Name, T.Name.Placeholder) -def _group_left_right(tlist, m, cls, - valid_left=lambda t: t is not None, - valid_right=lambda t: t is not None, - semicolon=False): - """Groups together tokens that are joined by a middle token. ie. x < y""" - for token in list(tlist): - if token.is_group() and not isinstance(token, cls): - _group_left_right(token, m, cls, valid_left, valid_right, - semicolon) - continue - if not token.match(*m): - continue - - tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) - - if valid_left(prev_) and valid_right(next_): - if semicolon: - # only overwrite if a semicolon present. - snidx, _ = tlist.token_next_by(m=M_SEMICOLON, idx=nidx) - nidx = snidx or nidx - # Luckily, this leaves the position of `token` intact. - tlist.group_tokens(cls, pidx, nidx, extend=True) - - def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" opens = [] @@ -69,6 +39,18 @@ def _group_matching(tlist, cls): tlist.group_tokens(cls, oidx, cidx) +def group_brackets(tlist): + _group_matching(tlist, sql.SquareBrackets) + + +def group_parenthesis(tlist): + _group_matching(tlist, sql.Parenthesis) + + +def group_case(tlist): + _group_matching(tlist, sql.Case) + + def group_if(tlist): _group_matching(tlist, sql.If) @@ -77,16 +59,54 @@ def group_for(tlist): _group_matching(tlist, sql.For) -def group_foreach(tlist): - _group_matching(tlist, sql.For) - - def group_begin(tlist): _group_matching(tlist, sql.Begin) +def _group_left_right(tlist, m, cls, + valid_left=lambda t: t is not None, + valid_right=lambda t: t is not None, + semicolon=False): + """Groups together tokens that are joined by a middle token. ie. x < y""" + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + _group_left_right(token, m, cls, valid_left, valid_right, + semicolon) + continue + if not token.match(*m): + continue + + tidx = tlist.token_index(token) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) + + if valid_left(prev_) and valid_right(next_): + if semicolon: + # only overwrite if a semicolon present. + m_semicolon = T.Punctuation, ';' + snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx) + nidx = snidx or nidx + # Luckily, this leaves the position of `token` intact. + tlist.group_tokens(cls, pidx, nidx, extend=True) + + +def group_typecasts(tlist): + _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier) + + +def group_period(tlist): + lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), + t=(T.Name, T.String.Symbol,)) + + rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), + t=(T.Name, T.String.Symbol, T.Wildcard)) + + _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, + valid_left=lfunc, valid_right=rfunc) + + def group_as(tlist): - lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.value == 'NULL' + lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.normalized == 'NULL' rfunc = lambda tk: not imt(tk, t=(T.DML, T.DDL)) _group_left_right(tlist, (T.Keyword, 'AS'), sql.Identifier, valid_left=lfunc, valid_right=rfunc) @@ -109,10 +129,6 @@ def group_comparison(tlist): valid_left=func, valid_right=func) -def group_case(tlist): - _group_matching(tlist, sql.Case) - - @recurse(sql.Identifier) def group_identifier(tlist): T_IDENT = (T.String.Symbol, T.Name) @@ -123,17 +139,6 @@ def group_identifier(tlist): tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx) -def group_period(tlist): - lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), - t=(T.Name, T.String.Symbol,)) - - rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), - t=(T.Name, T.String.Symbol, T.Wildcard)) - - _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, - valid_left=lfunc, valid_right=rfunc) - - def group_arrays(tlist): tidx, token = tlist.token_next_by(i=sql.SquareBrackets) while token: @@ -168,6 +173,9 @@ def group_operator(tlist): @recurse(sql.IdentifierList) def group_identifier_list(tlist): + M_ROLE = T.Keyword, ('null', 'role') + M_COMMA = T.Punctuation, ',' + I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, sql.IdentifierList, sql.Operation) T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME + @@ -186,14 +194,6 @@ def group_identifier_list(tlist): tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx) -def group_brackets(tlist): - _group_matching(tlist, sql.SquareBrackets) - - -def group_parenthesis(tlist): - _group_matching(tlist, sql.Parenthesis) - - @recurse(sql.Comment) def group_comments(tlist): tidx, token = tlist.token_next_by(t=T.Comment) @@ -237,10 +237,6 @@ def group_aliased(tlist): tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx) -def group_typecasts(tlist): - _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier) - - @recurse(sql.Function) def group_functions(tlist): has_create = False @@ -286,11 +282,17 @@ def align_comments(tlist): def group(stmt): for func in [ group_comments, + + # _group_matching group_brackets, group_parenthesis, + group_case, + group_if, + group_for, + group_begin, + group_functions, group_where, - group_case, group_period, group_arrays, group_identifier, @@ -301,12 +303,9 @@ def group(stmt): group_aliased, group_assignment, group_comparison, + align_comments, group_identifier_list, - group_if, - group_for, - group_foreach, - group_begin, ]: func(stmt) return stmt -- cgit v1.2.1 From 74b3464d781cbad4c39cd082daa80334aa7aed78 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Tue, 14 Jun 2016 21:20:31 -0700 Subject: Re-Write grouping functions --- sqlparse/engine/grouping.py | 76 ++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 29 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 7879f76..ae214c2 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -152,46 +152,42 @@ def group_arrays(tlist): @recurse(sql.Identifier) def group_operator(tlist): - I_CYCLE = (sql.SquareBrackets, sql.Parenthesis, sql.Function, + ttypes = T_NUMERICAL + T_STRING + T_NAME + clss = (sql.SquareBrackets, sql.Parenthesis, sql.Function, sql.Identifier, sql.Operation) - # wilcards wouldn't have operations next to them - T_CYCLE = T_NUMERICAL + T_STRING + T_NAME - func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) - tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) - while token: - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) + def match(token): + return imt(token, t=(T.Operator, T.Wildcard)) - if func(prev_) and func(next_): - token.ttype = T.Operator - tlist.group_tokens(sql.Operation, pidx, nidx) - tidx = pidx + def valid(token): + return imt(token, i=clss, t=ttypes) + + def post(tlist, pidx, tidx, nidx): + tlist[tidx].ttype = T.Operator + return pidx, nidx - tidx, token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tidx) + _group(tlist, sql.Operation, match, valid, valid, post, extend=False) -@recurse(sql.IdentifierList) def group_identifier_list(tlist): - M_ROLE = T.Keyword, ('null', 'role') - M_COMMA = T.Punctuation, ',' + m_role = T.Keyword, ('null', 'role') + m_comma = T.Punctuation, ',' + clss = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, + sql.IdentifierList, sql.Operation) + ttypes = (T_NUMERICAL + T_STRING + T_NAME + + (T.Keyword, T.Comment, T.Wildcard)) - I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, - sql.IdentifierList, sql.Operation) - T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME + - (T.Keyword, T.Comment, T.Wildcard)) + def match(token): + return imt(token, m=m_comma) - func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST) + def func(token): + return imt(token, i=clss, m=m_role, t=ttypes) - tidx, token = tlist.token_next_by(m=M_COMMA) - while token: - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) + def post(tlist, pidx, tidx, nidx): + return pidx, nidx - if func(prev_) and func(next_): - tlist.group_tokens(sql.IdentifierList, pidx, nidx, extend=True) - tidx = pidx - tidx, token = tlist.token_next_by(m=M_COMMA, idx=tidx) + _group(tlist, sql.IdentifierList, match, + valid_left=func, valid_right=func, post=post, extend=True) @recurse(sql.Comment) @@ -309,3 +305,25 @@ def group(stmt): ]: func(stmt) return stmt + + +def _group(tlist, cls, match, + valid_left=lambda t: True, + valid_right=lambda t: True, + post=None, + extend=True): + """Groups together tokens that are joined by a middle token. ie. x < y""" + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + _group(token, cls, match, valid_left, valid_right, post, extend) + continue + if not match(token): + continue + + tidx = tlist.token_index(token) + pidx, prev_ = tlist.token_prev(tidx) + nidx, next_ = tlist.token_next(tidx) + + if valid_left(prev_) and valid_right(next_): + from_idx, to_idx = post(tlist, pidx, tidx, nidx) + tlist.group_tokens(cls, from_idx, to_idx, extend=extend) -- cgit v1.2.1 From 0acaa4c57d7169f3903af3c3df4faf95d2cbea84 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 02:39:54 -0700 Subject: Reduce calls by _group to get tk idx --- sqlparse/engine/grouping.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index ae214c2..a74f6f8 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -313,17 +313,28 @@ def _group(tlist, cls, match, post=None, extend=True): """Groups together tokens that are joined by a middle token. ie. x < y""" - for token in list(tlist): + + tidx_offset = 0 + pidx, prev_ = None, None + for idx, token in enumerate(list(tlist)): + tidx = idx - tidx_offset + + if token.is_whitespace(): + continue if token.is_group() and not isinstance(token, cls): _group(token, cls, match, valid_left, valid_right, post, extend) + pidx, prev_ = tidx, token continue if not match(token): + pidx, prev_ = tidx, token continue - tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_prev(tidx) nidx, next_ = tlist.token_next(tidx) if valid_left(prev_) and valid_right(next_): from_idx, to_idx = post(tlist, pidx, tidx, nidx) - tlist.group_tokens(cls, from_idx, to_idx, extend=extend) + grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) + tidx_offset += to_idx - from_idx + pidx, prev_ = from_idx, grp + else: + pidx, prev_ = tidx, token -- cgit v1.2.1 From 49979e9ca1159190320e2faad989c8bd267c8000 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 12:36:45 -0700 Subject: Refactor _group's prev token logic --- sqlparse/engine/grouping.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index a74f6f8..1be4f53 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -321,20 +321,18 @@ def _group(tlist, cls, match, if token.is_whitespace(): continue + if token.is_group() and not isinstance(token, cls): _group(token, cls, match, valid_left, valid_right, post, extend) - pidx, prev_ = tidx, token - continue - if not match(token): - pidx, prev_ = tidx, token - continue - nidx, next_ = tlist.token_next(tidx) + if match(token): + nidx, next_ = tlist.token_next(tidx) + if valid_left(prev_) and valid_right(next_): + from_idx, to_idx = post(tlist, pidx, tidx, nidx) + grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) - if valid_left(prev_) and valid_right(next_): - from_idx, to_idx = post(tlist, pidx, tidx, nidx) - grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) - tidx_offset += to_idx - from_idx - pidx, prev_ = from_idx, grp - else: - pidx, prev_ = tidx, token + tidx_offset += to_idx - from_idx + pidx, prev_ = from_idx, grp + continue + + pidx, prev_ = tidx, token -- cgit v1.2.1 From a653650432b76447255e69cd93ba2d2e2c34d037 Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 03:04:41 -0700 Subject: remove extra recurse and rename vars # Conflicts: # sqlparse/engine/grouping.py --- sqlparse/engine/grouping.py | 73 ++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 30 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 1be4f53..b0b8836 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -118,11 +118,11 @@ def group_assignment(tlist): def group_comparison(tlist): - I_COMPERABLE = (sql.Parenthesis, sql.Function, sql.Identifier, + sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier, sql.Operation) - T_COMPERABLE = T_NUMERICAL + T_STRING + T_NAME + ttypes = T_NUMERICAL + T_STRING + T_NAME - func = lambda tk: (imt(tk, t=T_COMPERABLE, i=I_COMPERABLE) or + func = lambda tk: (imt(tk, t=ttypes, i=sqlcls) or (tk and tk.is_keyword and tk.normalized == 'NULL')) _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison, @@ -131,63 +131,74 @@ def group_comparison(tlist): @recurse(sql.Identifier) def group_identifier(tlist): - T_IDENT = (T.String.Symbol, T.Name) + ttypes = (T.String.Symbol, T.Name) - tidx, token = tlist.token_next_by(t=T_IDENT) + tidx, token = tlist.token_next_by(t=ttypes) while token: tlist.group_tokens(sql.Identifier, tidx, tidx) - tidx, token = tlist.token_next_by(t=T_IDENT, idx=tidx) + tidx, token = tlist.token_next_by(t=ttypes, idx=tidx) def group_arrays(tlist): - tidx, token = tlist.token_next_by(i=sql.SquareBrackets) - while token: - pidx, prev_ = tlist.token_prev(tidx) - if imt(prev_, i=(sql.SquareBrackets, sql.Identifier, sql.Function), - t=(T.Name, T.String.Symbol,)): - tlist.group_tokens(sql.Identifier, pidx, tidx, extend=True) - tidx = pidx - tidx, token = tlist.token_next_by(i=sql.SquareBrackets, idx=tidx) + sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function + ttypes = T.Name, T.String.Symbol + + def match(token): + return isinstance(token, sql.SquareBrackets) + + def valid_prev(token): + return imt(token, i=sqlcls, t=ttypes) + + def valid_next(token): + return True + + def post(tlist, pidx, tidx, nidx): + return pidx, tidx + + _group(tlist, sql.Identifier, match, + valid_prev, valid_next, post, extend=True, recurse=False) -@recurse(sql.Identifier) def group_operator(tlist): ttypes = T_NUMERICAL + T_STRING + T_NAME - clss = (sql.SquareBrackets, sql.Parenthesis, sql.Function, - sql.Identifier, sql.Operation) + sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function, + sql.Identifier, sql.Operation) def match(token): return imt(token, t=(T.Operator, T.Wildcard)) def valid(token): - return imt(token, i=clss, t=ttypes) + return imt(token, i=sqlcls, t=ttypes) def post(tlist, pidx, tidx, nidx): tlist[tidx].ttype = T.Operator return pidx, nidx - _group(tlist, sql.Operation, match, valid, valid, post, extend=False) + valid_prev = valid_next = valid + _group(tlist, sql.Operation, match, + valid_prev, valid_next, post, extend=False) def group_identifier_list(tlist): m_role = T.Keyword, ('null', 'role') m_comma = T.Punctuation, ',' - clss = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, - sql.IdentifierList, sql.Operation) + sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, + sql.IdentifierList, sql.Operation) ttypes = (T_NUMERICAL + T_STRING + T_NAME + (T.Keyword, T.Comment, T.Wildcard)) def match(token): return imt(token, m=m_comma) - def func(token): - return imt(token, i=clss, m=m_role, t=ttypes) + def valid(token): + return imt(token, i=sqlcls, m=m_role, t=ttypes) def post(tlist, pidx, tidx, nidx): return pidx, nidx + valid_prev = valid_next = valid _group(tlist, sql.IdentifierList, match, - valid_left=func, valid_right=func, post=post, extend=True) + valid_prev, valid_next, post, extend=True) @recurse(sql.Comment) @@ -308,10 +319,12 @@ def group(stmt): def _group(tlist, cls, match, - valid_left=lambda t: True, - valid_right=lambda t: True, + valid_prev=lambda t: True, + valid_next=lambda t: True, post=None, - extend=True): + extend=True, + recurse=True + ): """Groups together tokens that are joined by a middle token. ie. x < y""" tidx_offset = 0 @@ -322,12 +335,12 @@ def _group(tlist, cls, match, if token.is_whitespace(): continue - if token.is_group() and not isinstance(token, cls): - _group(token, cls, match, valid_left, valid_right, post, extend) + if recurse and token.is_group() and not isinstance(token, cls): + _group(token, cls, match, valid_prev, valid_next, post, extend) if match(token): nidx, next_ = tlist.token_next(tidx) - if valid_left(prev_) and valid_right(next_): + if valid_prev(prev_) and valid_next(next_): from_idx, to_idx = post(tlist, pidx, tidx, nidx) grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend) -- cgit v1.2.1 From d6763dc7592f6e60c3e2e712b39a1b865fc8485e Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 03:50:36 -0700 Subject: Change grouping from _left_right to _group --- sqlparse/engine/grouping.py | 89 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 18 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index b0b8836..2148b50 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -91,42 +91,95 @@ def _group_left_right(tlist, m, cls, def group_typecasts(tlist): - _group_left_right(tlist, (T.Punctuation, '::'), sql.Identifier) + def match(token): + return token.match(T.Punctuation, '::') + + def valid(token): + return token is not None + + def post(tlist, pidx, tidx, nidx): + return pidx, nidx + + valid_prev = valid_next = valid + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) def group_period(tlist): - lfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Identifier), - t=(T.Name, T.String.Symbol,)) + def match(token): + return token.match(T.Punctuation, '.') + + def valid_prev(token): + sqlcls = sql.SquareBrackets, sql.Identifier + ttypes = T.Name, T.String.Symbol + return imt(token, i=sqlcls, t=ttypes) + + def valid_next(token): + sqlcls = sql.SquareBrackets, sql.Function + ttypes = T.Name, T.String.Symbol, T.Wildcard + return imt(token, i=sqlcls, t=ttypes) - rfunc = lambda tk: imt(tk, i=(sql.SquareBrackets, sql.Function), - t=(T.Name, T.String.Symbol, T.Wildcard)) + def post(tlist, pidx, tidx, nidx): + return pidx, nidx - _group_left_right(tlist, (T.Punctuation, '.'), sql.Identifier, - valid_left=lfunc, valid_right=rfunc) + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) def group_as(tlist): - lfunc = lambda tk: not imt(tk, t=T.Keyword) or tk.normalized == 'NULL' - rfunc = lambda tk: not imt(tk, t=(T.DML, T.DDL)) - _group_left_right(tlist, (T.Keyword, 'AS'), sql.Identifier, - valid_left=lfunc, valid_right=rfunc) + def match(token): + return token.is_keyword and token.normalized == 'AS' + + def valid_prev(token): + return token.normalized == 'NULL' or not token.is_keyword + + def valid_next(token): + ttypes = T.DML, T.DDL + return not imt(token, t=ttypes) + + def post(tlist, pidx, tidx, nidx): + return pidx, nidx + + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) def group_assignment(tlist): - _group_left_right(tlist, (T.Assignment, ':='), sql.Assignment, - semicolon=True) + def match(token): + return token.match(T.Assignment, ':=') + + def valid(token): + return token is not None + + def post(tlist, pidx, tidx, nidx): + m_semicolon = T.Punctuation, ';' + snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx) + nidx = snidx or nidx + return pidx, nidx + + valid_prev = valid_next = valid + _group(tlist, sql.Assignment, match, valid_prev, valid_next, post) def group_comparison(tlist): sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier, - sql.Operation) + sql.Operation) ttypes = T_NUMERICAL + T_STRING + T_NAME - func = lambda tk: (imt(tk, t=ttypes, i=sqlcls) or - (tk and tk.is_keyword and tk.normalized == 'NULL')) + def match(token): + return token.ttype == T.Operator.Comparison + + def valid(token): + if imt(token, t=ttypes, i=sqlcls): + return True + elif token and token.is_keyword and token.normalized == 'NULL': + return True + else: + return False + + def post(tlist, pidx, tidx, nidx): + return pidx, nidx - _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison, - valid_left=func, valid_right=func) + valid_prev = valid_next = valid + _group(tlist, sql.Comparison, match, + valid_prev, valid_next, post, extend=False) @recurse(sql.Identifier) -- cgit v1.2.1 From 228059eb4da5ed2389fc7e987dba37c6d05ea3ea Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 13:59:29 -0700 Subject: remove group left_right --- sqlparse/engine/grouping.py | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 2148b50..b2b46bb 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -63,33 +63,6 @@ def group_begin(tlist): _group_matching(tlist, sql.Begin) -def _group_left_right(tlist, m, cls, - valid_left=lambda t: t is not None, - valid_right=lambda t: t is not None, - semicolon=False): - """Groups together tokens that are joined by a middle token. ie. x < y""" - for token in list(tlist): - if token.is_group() and not isinstance(token, cls): - _group_left_right(token, m, cls, valid_left, valid_right, - semicolon) - continue - if not token.match(*m): - continue - - tidx = tlist.token_index(token) - pidx, prev_ = tlist.token_prev(tidx) - nidx, next_ = tlist.token_next(tidx) - - if valid_left(prev_) and valid_right(next_): - if semicolon: - # only overwrite if a semicolon present. - m_semicolon = T.Punctuation, ';' - snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx) - nidx = snidx or nidx - # Luckily, this leaves the position of `token` intact. - tlist.group_tokens(cls, pidx, nidx, extend=True) - - def group_typecasts(tlist): def match(token): return token.match(T.Punctuation, '::') -- cgit v1.2.1 From caefd84779d1115b6775ea7c0039f969f3b10d8a Mon Sep 17 00:00:00 2001 From: Victor Uriarte Date: Wed, 15 Jun 2016 10:52:35 -0700 Subject: Change group_matching back to idx --- sqlparse/engine/grouping.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'sqlparse/engine') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index b2b46bb..62f37a6 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -17,7 +17,16 @@ T_NAME = (T.Name, T.Name.Placeholder) def _group_matching(tlist, cls): """Groups Tokens that have beginning and end.""" opens = [] - for token in list(tlist): + tidx_offset = 0 + for idx, token in enumerate(list(tlist)): + tidx = idx - tidx_offset + + if token.is_whitespace(): + # ~50% of tokens will be whitespace. Will checking early + # for them avoid 3 comparisons, but then add 1 more comparison + # for the other ~50% of tokens... + continue + if token.is_group() and not isinstance(token, cls): # Check inside previously grouped (ie. parenthesis) if group # of differnt type is inside (ie, case). though ideally should @@ -26,17 +35,18 @@ def _group_matching(tlist, cls): continue if token.match(*cls.M_OPEN): - opens.append(token) + opens.append(tidx) + elif token.match(*cls.M_CLOSE): try: - open_token = opens.pop() + open_idx = opens.pop() except IndexError: # this indicates invalid sql and unbalanced tokens. # instead of break, continue in case other "valid" groups exist continue - oidx = tlist.token_index(open_token) - cidx = tlist.token_index(token) - tlist.group_tokens(cls, oidx, cidx) + close_idx = tidx + tlist.group_tokens(cls, open_idx, close_idx) + tidx_offset += close_idx - open_idx def group_brackets(tlist): -- cgit v1.2.1