summaryrefslogtreecommitdiff
path: root/sqlparse
diff options
context:
space:
mode:
authorVictor Uriarte <victor.m.uriarte@intel.com>2016-06-12 20:12:48 -0700
committerVictor Uriarte <victor.m.uriarte@intel.com>2016-06-14 03:25:23 -0700
commita7ffa646e9c2839999217cc181544a8a4bb9a5fd (patch)
tree087f2e53200a3ac28d2a1ce7c4b9f198696166dd /sqlparse
parentfae3d94f7f3039d5d7b264b6b4aad69c6b03c9a4 (diff)
parent89d4f68ba5bbe78a9dd89257cbe4a9f3cfa76433 (diff)
downloadsqlparse-a7ffa646e9c2839999217cc181544a8a4bb9a5fd.tar.gz
Merge remote-tracking branch 'core/long_live_indexes' into develop
Diffstat (limited to 'sqlparse')
-rw-r--r--sqlparse/engine/grouping.py136
-rw-r--r--sqlparse/sql.py106
-rw-r--r--sqlparse/utils.py2
3 files changed, 175 insertions, 69 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 6e414b8..cae5d23 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -7,7 +7,7 @@
from sqlparse import sql
from sqlparse import tokens as T
-from sqlparse.utils import recurse, imt, find_matching
+from sqlparse.utils import recurse, imt
M_ROLE = (T.Keyword, ('null', 'role'))
M_SEMICOLON = (T.Punctuation, ';')
@@ -23,40 +23,47 @@ def _group_left_right(tlist, m, cls,
valid_right=lambda t: t is not None,
semicolon=False):
"""Groups together tokens that are joined by a middle token. ie. x < y"""
+ [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon)
+ for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)]
- for token in list(tlist):
- if token.is_group() and not isinstance(token, cls):
- _group_left_right(token, m, cls, valid_left, valid_right,
- semicolon)
-
- if not token.match(*m):
- continue
-
- left, right = tlist.token_prev(token), tlist.token_next(token)
+ token = tlist.token_next_by(m=m)
+ while token:
+ tidx = tlist.token_index(token)
+ left, right = tlist.token_prev(tidx), tlist.token_next(tidx)
if valid_left(left) and valid_right(right):
if semicolon:
# only overwrite if a semicolon present.
- sright = tlist.token_next_by(m=M_SEMICOLON, idx=right)
+ sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1)
right = sright or right
- tokens = tlist.tokens_between(left, right)
- tlist.group_tokens(cls, tokens, extend=True)
+ # Luckily, this leaves the position of `token` intact.
+ token = tlist.group_tokens_between(cls, left, right, extend=True)
+ token = tlist.token_next_by(m=m, idx=tidx + 1)
def _group_matching(tlist, cls):
"""Groups Tokens that have beginning and end."""
- [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, cls)]
- idx = 1 if isinstance(tlist, cls) else 0
+ idx = 1 if imt(tlist, i=cls) else 0
- token = tlist.token_next_by(m=cls.M_OPEN, idx=idx)
- while token:
- end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE)
- if end is not None:
- tokens = tlist.tokens_between(token, end)
- token = tlist.group_tokens(cls, tokens)
- _group_matching(token, cls)
- token = tlist.token_next_by(m=cls.M_OPEN, idx=token)
+ opens = []
+
+ while True:
+ try:
+ token = tlist.tokens[idx]
+ except IndexError:
+ break
+
+ if token.match(*cls.M_OPEN):
+ opens.append(idx)
+ elif token.match(*cls.M_CLOSE):
+ try:
+ open_idx = opens.pop()
+ except IndexError:
+ break
+ tlist.group_tokens_between(cls, open_idx, idx)
+ idx = open_idx
+
+ idx += 1
def group_if(tlist):
@@ -109,8 +116,9 @@ def group_identifier(tlist):
token = tlist.token_next_by(t=T_IDENT)
while token:
- token = tlist.group_tokens(sql.Identifier, [token, ])
- token = tlist.token_next_by(t=T_IDENT, idx=token)
+ tidx = tlist.token_index(token)
+ token = tlist.group_tokens_between(sql.Identifier, tidx, tidx)
+ token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1)
def group_period(tlist):
@@ -127,12 +135,11 @@ def group_period(tlist):
def group_arrays(tlist):
token = tlist.token_next_by(i=sql.SquareBrackets)
while token:
- prev = tlist.token_prev(token)
+ prev = tlist.token_prev(tlist.token_index(token))
if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function),
t=(T.Name, T.String.Symbol,)):
- tokens = tlist.tokens_between(prev, token)
- token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
- token = tlist.token_next_by(i=sql.SquareBrackets, idx=token)
+ token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True)
+ token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1)
@recurse(sql.Identifier)
@@ -145,14 +152,13 @@ def group_operator(tlist):
token = tlist.token_next_by(t=(T.Operator, T.Wildcard))
while token:
- left, right = tlist.token_prev(token), tlist.token_next(token)
+ left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token))
if func(left) and func(right):
token.ttype = T.Operator
- tokens = tlist.tokens_between(left, right)
- token = tlist.group_tokens(sql.Operation, tokens)
+ token = tlist.group_tokens_between(sql.Operation, left, right)
- token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token)
+ token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1)
@recurse(sql.IdentifierList)
@@ -163,15 +169,17 @@ def group_identifier_list(tlist):
(T.Keyword, T.Comment, T.Wildcard))
func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST)
- token = tlist.token_next_by(m=M_COMMA)
+ tidx, token = tlist.token_idx_next_by(m=M_COMMA)
while token:
- before, after = tlist.token_prev(token), tlist.token_next(token)
+ before_idx, before = tlist.token_idx_prev(tidx)
+ after_idx, after = tlist.token_idx_next(tidx)
if func(before) and func(after):
- tokens = tlist.tokens_between(before, after)
- token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True)
- token = tlist.token_next_by(m=M_COMMA, idx=token)
+ tidx = before_idx
+ token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True)
+
+ tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1)
def group_brackets(tlist):
@@ -187,29 +195,27 @@ def group_comments(tlist):
token = tlist.token_next_by(t=T.Comment)
while token:
end = tlist.token_not_matching(
- token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace())
+ tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace())
if end is not None:
- end = tlist.token_prev(end, False)
- tokens = tlist.tokens_between(token, end)
- token = tlist.group_tokens(sql.Comment, tokens)
+ end = tlist.token_prev(tlist.token_index(end), False)
+ token = tlist.group_tokens_between(sql.Comment, token, end)
- token = tlist.token_next_by(t=T.Comment, idx=token)
+ token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1)
@recurse(sql.Where)
def group_where(tlist):
token = tlist.token_next_by(m=sql.Where.M_OPEN)
while token:
- end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token)
+ end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1)
if end is None:
- tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1])
+ end = tlist._groupable_tokens[-1]
else:
- tokens = tlist.tokens_between(
- token, tlist.tokens[tlist.token_index(end) - 1])
+ end = tlist.tokens[tlist.token_index(end) - 1]
- token = tlist.group_tokens(sql.Where, tokens)
- token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token)
+ token = tlist.group_tokens_between(sql.Where, token, end)
+ token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1)
@recurse()
@@ -217,13 +223,12 @@ def group_aliased(tlist):
I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
sql.Operation)
- token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
+ tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number)
while token:
- next_ = tlist.token_next(token)
+ next_index_, next_ = tlist.token_idx_next(tidx)
if imt(next_, i=sql.Identifier):
- tokens = tlist.tokens_between(token, next_)
- token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
- token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token)
+ token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True)
+ tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1)
def group_typecasts(tlist):
@@ -243,33 +248,30 @@ def group_functions(tlist):
return
token = tlist.token_next_by(t=T.Name)
while token:
- next_ = tlist.token_next(token)
+ next_ = tlist.token_next(tlist.token_index(token))
if imt(next_, i=sql.Parenthesis):
- tokens = tlist.tokens_between(token, next_)
- token = tlist.group_tokens(sql.Function, tokens)
- token = tlist.token_next_by(t=T.Name, idx=token)
+ token = tlist.group_tokens_between(sql.Function, token, next_)
+ token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1)
def group_order(tlist):
"""Group together Identifier and Asc/Desc token"""
token = tlist.token_next_by(t=T.Keyword.Order)
while token:
- prev = tlist.token_prev(token)
+ prev = tlist.token_prev(tlist.token_index(token))
if imt(prev, i=sql.Identifier, t=T.Number):
- tokens = tlist.tokens_between(prev, token)
- token = tlist.group_tokens(sql.Identifier, tokens)
- token = tlist.token_next_by(t=T.Keyword.Order, idx=token)
+ token = tlist.group_tokens_between(sql.Identifier, prev, token)
+ token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1)
@recurse()
def align_comments(tlist):
token = tlist.token_next_by(i=sql.Comment)
while token:
- before = tlist.token_prev(token)
+ before = tlist.token_prev(tlist.token_index(token))
if isinstance(before, sql.TokenList):
- tokens = tlist.tokens_between(before, token)
- token = tlist.group_tokens(sql.TokenList, tokens, extend=True)
- token = tlist.token_next_by(i=sql.Comment, idx=token)
+ token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True)
+ token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1)
def group(stmt):
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 52b3bf1..54f7d4f 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -204,6 +204,28 @@ class TokenList(Token):
def _groupable_tokens(self):
return self.tokens
+ def _token_idx_matching(self, funcs, start=0, end=None, reverse=False):
+ """next token that match functions"""
+ if start is None:
+ return None
+
+ if not isinstance(funcs, (list, tuple)):
+ funcs = (funcs,)
+
+ if reverse:
+ assert end is None
+ for idx in range(start - 2, -1, -1):
+ token = self.tokens[idx]
+ for func in funcs:
+ if func(token):
+ return idx, token
+ else:
+ for idx, token in enumerate(self.tokens[start:end], start=start):
+ for func in funcs:
+ if func(token):
+ return idx, token
+ return None, None
+
def _token_matching(self, funcs, start=0, end=None, reverse=False):
"""next token that match functions"""
if start is None:
@@ -225,6 +247,24 @@ class TokenList(Token):
if func(token):
return token
+ def token_first(self, skip_ws=True, skip_cm=False):
+ """Returns the first child token.
+
+ If *skip_ws* is ``True`` (the default), whitespace
+ tokens are ignored.
+
+ if *skip_cm* is ``True`` (default: ``False``), comments are
+ ignored too.
+ """
+ # this on is inconsistent, using Comment instead of T.Comment...
+ funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or
+ (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+ return self._token_matching(funcs)
+
+ def token_idx_next_by(self, i=None, m=None, t=None, idx=0, end=None):
+ funcs = lambda tk: imt(tk, i, m, t)
+ return self._token_idx_matching(funcs, idx, end)
+
def token_next_by(self, i=None, m=None, t=None, idx=0, end=None):
funcs = lambda tk: imt(tk, i, m, t)
return self._token_matching(funcs, idx, end)
@@ -237,12 +277,24 @@ class TokenList(Token):
def token_matching(self, idx, funcs):
return self._token_matching(funcs, idx)
+ def token_idx_prev(self, idx, skip_ws=True):
+ """Returns the previous token relative to *idx*.
+
+ If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+ ``None`` is returned if there's no previous token.
+ """
+ idx += 1 # alot of code usage current pre-compensates for this
+ funcs = lambda tk: not (tk.is_whitespace() and skip_ws)
+ return self._token_idx_matching(funcs, idx, reverse=True)
+
def token_prev(self, idx=0, skip_ws=True, skip_cm=False):
"""Returns the previous token relative to *idx*.
If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
``None`` is returned if there's no previous token.
"""
+ if isinstance(idx, int):
+ idx += 1 # alot of code usage current pre-compensates for this
funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or
(skip_cm and imt(tk, t=T.Comment, i=Comment)))
return self._token_matching(funcs, idx, reverse=True)
@@ -255,10 +307,32 @@ class TokenList(Token):
If *skip_cm* is ``True`` (default: ``False``), comments are ignored.
``None`` is returned if there's no next token.
"""
+ if isinstance(idx, int):
+ idx += 1 # alot of code usage current pre-compensates for this
funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or
(skip_cm and imt(tk, t=T.Comment, i=Comment)))
return self._token_matching(funcs, idx)
+ def token_idx_next(self, idx, skip_ws=True):
+ """Returns the next token relative to *idx*.
+
+ If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+ ``None`` is returned if there's no next token.
+ """
+ if isinstance(idx, int):
+ idx += 1 # alot of code usage current pre-compensates for this
+ try:
+ if not skip_ws:
+ return idx, self.tokens[idx]
+ else:
+ while True:
+ token = self.tokens[idx]
+ if not token.is_whitespace():
+ return idx, token
+ idx += 1
+ except IndexError:
+ return None, None
+
def token_index(self, token, start=0):
"""Return list index of token."""
start = start if isinstance(start, int) else self.token_index(start)
@@ -274,6 +348,36 @@ class TokenList(Token):
end_idx = include_end + self.token_index(end)
return self.tokens[start_idx:end_idx]
+ def group_tokens_between(self, grp_cls, start, end, include_end=True,
+ extend=False):
+ """Replace tokens by an instance of *grp_cls*."""
+ if isinstance(start, int):
+ start_idx = start
+ start = self.tokens[start_idx]
+ else:
+ start_idx = self.token_index(start)
+
+ end_idx = self.token_index(end) if not isinstance(end, int) else end
+ end_idx += include_end
+
+ if extend and isinstance(start, grp_cls):
+ subtokens = self.tokens[start_idx + 1:end_idx]
+
+ grp = start
+ grp.tokens.extend(subtokens)
+ del self.tokens[start_idx + 1:end_idx]
+ grp.value = start.__str__()
+ else:
+ subtokens = self.tokens[start_idx:end_idx]
+ grp = grp_cls(subtokens)
+ self.tokens[start_idx:end_idx] = [grp]
+ grp.parent = self
+
+ for token in subtokens:
+ token.parent = grp
+
+ return grp
+
def group_tokens(self, grp_cls, tokens, skip_ws=False, extend=False):
"""Replace tokens by an instance of *grp_cls*."""
@@ -386,7 +490,7 @@ class Statement(TokenList):
Whitespaces and comments at the beginning of the statement
are ignored.
"""
- first_token = self.token_next(skip_cm=True)
+ first_token = self.token_first(skip_cm=True)
if first_token is None:
# An "empty" statement that either has not tokens at all
# or only whitespace tokens.
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index 4a8646d..59301ff 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -104,7 +104,7 @@ def imt(token, i=None, m=None, t=None):
def find_matching(tlist, token, open_pattern, close_pattern):
- idx = tlist.token_index(token)
+ idx = tlist.token_index(token) if not isinstance(token, int) else token
depth = 0
for token in tlist.tokens[idx:]:
if token.match(*open_pattern):