Improved parsing of identifier lists (targets issue2).

author: Andi Albrecht <albrecht.andi@gmail.com> 2009-05-03 21:32:27 +0200
committer: Andi Albrecht <albrecht.andi@gmail.com> 2009-05-03 21:32:27 +0200
commit: 5ee6aed6aef8f8ffeeef67e3909bf8a72e9213b8 (patch)
tree: 27d8f5ac36ed9300407ba7bbeaf433abdf7617ed /sqlparse
parent: 118d6bb96a03003460d2cea1af74f2b362458037 (diff)
download: sqlparse-5ee6aed6aef8f8ffeeef67e3909bf8a72e9213b8.tar.gz
3 files changed, 54 insertions, 25 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 8068015..bdf2cd0 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -132,31 +132,44 @@ def group_identifier_list(tlist):
     [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
      if not isinstance(sgroup, (Identifier, IdentifierList))]
     idx = 0
-    token = tlist.token_next_by_instance(idx, Identifier)
-    while token:
-        tidx = tlist.token_index(token)
-        end = tlist.token_not_matching(tidx+1,
-                                       [lambda t: isinstance(t, Identifier),
-                                        lambda t: t.is_whitespace(),
-                                        lambda t: t.match(T.Punctuation,
-                                                          ',')
-                                        ])
-        if end is None:
-            end = tlist.tokens[-1]
-            exclude_end = False
+    # Allowed list items
+    fend1_funcs = [lambda t: isinstance(t, Identifier),
+                   lambda t: t.is_whitespace(),
+                   lambda t: t.ttype == T.Wildcard,
+                   lambda t: t.match(T.Keyword, 'null'),
+                   lambda t: t.ttype == T.Number.Integer,
+                   lambda t: t.ttype == T.String.Single,
+                   ]
+    tcomma = tlist.token_next_match(idx, T.Punctuation, ',')
+    start = None
+    while tcomma is not None:
+        before = tlist.token_prev(tcomma)
+        after = tlist.token_next(tcomma)
+        # Check if the tokens around tcomma belong to a list
+        bpassed = apassed = False
+        for func in fend1_funcs:
+            if before is not None and func(before):
+                bpassed = True
+            if after is not None and func(after):
+                apassed = True
+        if not bpassed or not apassed:
+            # Something's wrong here, skip ahead to next ","
+            start = None
+            tcomma = tlist.token_next_match(tlist.token_index(tcomma)+1,
+                                            T.Punctuation, ',')
         else:
-            exclude_end = True
-        grp_tokens = tlist.tokens_between(token, end,
-                                          exclude_end=exclude_end)
-        while grp_tokens and (grp_tokens[-1].is_whitespace()
-                              or grp_tokens[-1].match(T.Punctuation, ',')):
-            grp_tokens.pop()
-        if len(grp_tokens) <= 1:
-            idx = tidx + 1
-        else:
-            group = tlist.group_tokens(IdentifierList, grp_tokens)
-            idx = tlist.token_index(group)
-        token = tlist.token_next_by_instance(idx, Identifier)
+            if start is None:
+                start = before
+            next_ = tlist.token_next(after)
+            if next_ is None or not next_.match(T.Punctuation, ','):
+                # Reached the end of the list
+                tokens = tlist.tokens_between(start, after)
+                group = tlist.group_tokens(IdentifierList, tokens)
+                start = None
+                tcomma = tlist.token_next_match(tlist.token_index(group)+1,
+                                                T.Punctuation, ',')
+            else:
+                tcomma = next_
 
 
 def group_parenthesis(tlist):
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index be20326..578f54f 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -39,7 +39,7 @@ KEYWORDS = {
     'BREADTH': Keyword,
     'BY': Keyword,
 
-    'C': Keyword,
+#    'C': Keyword,  # most likely this is an alias
     'CACHE': Keyword,
     'CALL': Keyword,
     'CALLED': Keyword,
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index e17285c..d1ee143 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -204,12 +204,24 @@ class TokenList(Token):
                 return token
         return None
 
+    def token_matching(self, idx, funcs):
+        for token in self.tokens[idx:]:
+            for i, func in enumerate(funcs):
+                if func(token):
+                    print 'MATCHED', i, token
+                    return token
+        return None
+
     def token_prev(self, idx, skip_ws=True):
         """Returns the previous token relative to *idx*.
 
         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
         ``None`` is returned if there's no previous token.
         """
+        if idx is None:
+            return None
+        if not isinstance(idx, int):
+            idx = self.token_index(idx)
         while idx != 0:
             idx -= 1
             if self.tokens[idx].is_whitespace() and skip_ws:
@@ -222,6 +234,10 @@ class TokenList(Token):
         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
         ``None`` is returned if there's no next token.
         """
+        if idx is None:
+            return None
+        if not isinstance(idx, int):
+            idx = self.token_index(idx)
         while idx < len(self.tokens)-1:
             idx += 1
             if self.tokens[idx].is_whitespace() and skip_ws:
author	Andi Albrecht <albrecht.andi@gmail.com>	2009-05-03 21:32:27 +0200
committer	Andi Albrecht <albrecht.andi@gmail.com>	2009-05-03 21:32:27 +0200
commit	5ee6aed6aef8f8ffeeef67e3909bf8a72e9213b8 (patch)
tree	27d8f5ac36ed9300407ba7bbeaf433abdf7617ed /sqlparse
parent	118d6bb96a03003460d2cea1af74f2b362458037 (diff)
download	sqlparse-5ee6aed6aef8f8ffeeef67e3909bf8a72e9213b8.tar.gz