From 5ee6aed6aef8f8ffeeef67e3909bf8a72e9213b8 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 3 May 2009 21:32:27 +0200 Subject: Improved parsing of identifier lists (targets issue2). --- sqlparse/engine/grouping.py | 61 +++++++++++++++++++++++++++------------------ sqlparse/keywords.py | 2 +- sqlparse/sql.py | 16 ++++++++++++ 3 files changed, 54 insertions(+), 25 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 8068015..bdf2cd0 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -132,31 +132,44 @@ def group_identifier_list(tlist): [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() if not isinstance(sgroup, (Identifier, IdentifierList))] idx = 0 - token = tlist.token_next_by_instance(idx, Identifier) - while token: - tidx = tlist.token_index(token) - end = tlist.token_not_matching(tidx+1, - [lambda t: isinstance(t, Identifier), - lambda t: t.is_whitespace(), - lambda t: t.match(T.Punctuation, - ',') - ]) - if end is None: - end = tlist.tokens[-1] - exclude_end = False + # Allowed list items + fend1_funcs = [lambda t: isinstance(t, Identifier), + lambda t: t.is_whitespace(), + lambda t: t.ttype == T.Wildcard, + lambda t: t.match(T.Keyword, 'null'), + lambda t: t.ttype == T.Number.Integer, + lambda t: t.ttype == T.String.Single, + ] + tcomma = tlist.token_next_match(idx, T.Punctuation, ',') + start = None + while tcomma is not None: + before = tlist.token_prev(tcomma) + after = tlist.token_next(tcomma) + # Check if the tokens around tcomma belong to a list + bpassed = apassed = False + for func in fend1_funcs: + if before is not None and func(before): + bpassed = True + if after is not None and func(after): + apassed = True + if not bpassed or not apassed: + # Something's wrong here, skip ahead to next "," + start = None + tcomma = tlist.token_next_match(tlist.token_index(tcomma)+1, + T.Punctuation, ',') else: - exclude_end = True - grp_tokens = tlist.tokens_between(token, end, - exclude_end=exclude_end) - while grp_tokens and (grp_tokens[-1].is_whitespace() - or grp_tokens[-1].match(T.Punctuation, ',')): - grp_tokens.pop() - if len(grp_tokens) <= 1: - idx = tidx + 1 - else: - group = tlist.group_tokens(IdentifierList, grp_tokens) - idx = tlist.token_index(group) - token = tlist.token_next_by_instance(idx, Identifier) + if start is None: + start = before + next_ = tlist.token_next(after) + if next_ is None or not next_.match(T.Punctuation, ','): + # Reached the end of the list + tokens = tlist.tokens_between(start, after) + group = tlist.group_tokens(IdentifierList, tokens) + start = None + tcomma = tlist.token_next_match(tlist.token_index(group)+1, + T.Punctuation, ',') + else: + tcomma = next_ def group_parenthesis(tlist): diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index be20326..578f54f 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -39,7 +39,7 @@ KEYWORDS = { 'BREADTH': Keyword, 'BY': Keyword, - 'C': Keyword, +# 'C': Keyword, # most likely this is an alias 'CACHE': Keyword, 'CALL': Keyword, 'CALLED': Keyword, diff --git a/sqlparse/sql.py b/sqlparse/sql.py index e17285c..d1ee143 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -204,12 +204,24 @@ class TokenList(Token): return token return None + def token_matching(self, idx, funcs): + for token in self.tokens[idx:]: + for i, func in enumerate(funcs): + if func(token): + print 'MATCHED', i, token + return token + return None + def token_prev(self, idx, skip_ws=True): """Returns the previous token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no previous token. """ + if idx is None: + return None + if not isinstance(idx, int): + idx = self.token_index(idx) while idx != 0: idx -= 1 if self.tokens[idx].is_whitespace() and skip_ws: @@ -222,6 +234,10 @@ class TokenList(Token): If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no next token. """ + if idx is None: + return None + if not isinstance(idx, int): + idx = self.token_index(idx) while idx < len(self.tokens)-1: idx += 1 if self.tokens[idx].is_whitespace() and skip_ws: -- cgit v1.2.1