Moved syntactical units to sql module.

author: Andi Albrecht <albrecht.andi@gmail.com> 2009-04-06 08:24:29 +0200
committer: Andi Albrecht <albrecht.andi@gmail.com> 2009-04-06 08:24:29 +0200
commit: 56d27222b173013afe74a6bbe0da5eebf29ef559 (patch)
tree: 4482d389d807f0e5c0fbb203d0f0906dfafb6f30 /sqlparse
parent: 59ab1c932d8ec3c49ea4aa7b581202bc98c4e4a0 (diff)
download: sqlparse-56d27222b173013afe74a6bbe0da5eebf29ef559.tar.gz
2 files changed, 319 insertions, 308 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 433f539..9176fbe 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -5,316 +5,9 @@ import re
 import types
 
 from sqlparse import tokens as T
+from sqlparse.sql import *
 
 
-class Token(object):
-
-    __slots__ = ('value', 'ttype')
-
-    def __init__(self, ttype, value):
-        self.value = value
-        self.ttype = ttype
-
-    def __str__(self):
-        return unicode(self).encode('latin-1')
-
-    def __repr__(self):
-        short = self._get_repr_value()
-        return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
-                                          short, id(self))
-
-    def __unicode__(self):
-        return self.value
-
-    def to_unicode(self):
-        return unicode(self)
-
-    def _get_repr_name(self):
-        return str(self.ttype).split('.')[-1]
-
-    def _get_repr_value(self):
-        raw = unicode(self)
-        if len(raw) > 7:
-            short = raw[:6]+u'...'
-        else:
-            short = raw
-        return re.sub('\s+', ' ', short)
-
-    def match(self, ttype, values, regex=False):
-        if self.ttype is not ttype:
-            return False
-        if values is None:
-            return self.ttype is ttype
-        if isinstance(values, basestring):
-            values = [values]
-        if regex:
-            if self.ttype is T.Keyword:
-                values = [re.compile(v, re.IGNORECASE) for v in values]
-            else:
-                values = [re.compile(v) for v in values]
-            for pattern in values:
-                if pattern.search(self.value):
-                    return True
-            return False
-        else:
-            if self.ttype is T.Keyword:
-                return self.value.upper() in [v.upper() for v in values]
-            else:
-                return self.value in values
-
-    def is_group(self):
-        return False
-
-    def is_whitespace(self):
-        return self.ttype and self.ttype in T.Whitespace
-
-
-class TokenList(Token):
-
-    __slots__ = ('value', 'ttype', 'tokens')
-
-    def __init__(self, tokens=None):
-        if tokens is None:
-            tokens = []
-        self.tokens = tokens
-        Token.__init__(self, None, None)
-
-    def __unicode__(self):
-        return ''.join(unicode(x) for x in self.flatten())
-
-    def __str__(self):
-        return unicode(self).encode('latin-1')
-
-    def _get_repr_name(self):
-        return self.__class__.__name__
-
-    def _pprint_tree(self, max_depth=None, depth=0):
-        """Pretty-print the object tree."""
-        indent = ' '*(depth*2)
-        for token in self.tokens:
-            if token.is_group():
-                pre = ' | '
-            else:
-                pre = ' | '
-            print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(),
-                                     token._get_repr_value())
-            if (token.is_group() and max_depth is not None
-                and depth < max_depth):
-                token._pprint_tree(max_depth, depth+1)
-
-    def flatten(self):
-        for token in self.tokens:
-            if isinstance(token, TokenList):
-                for item in token.flatten():
-                    yield item
-            else:
-                yield token
-
-    def is_group(self):
-        return True
-
-    def get_sublists(self):
-        return [x for x in self.tokens if isinstance(x, TokenList)]
-
-    def token_first(self, ignore_whitespace=True):
-        for token in self.tokens:
-            if ignore_whitespace and token.is_whitespace():
-                continue
-            return token
-        return None
-
-    def token_next_by_instance(self, idx, clss):
-        if type(clss) not in (types.ListType, types.TupleType):
-            clss = (clss,)
-        if type(clss) is not types.TupleType:
-            clss = tuple(clss)
-        for token in self.tokens[idx:]:
-            if isinstance(token, clss):
-                return token
-        return None
-
-    def token_next_by_type(self, idx, ttypes):
-        if not isinstance(ttypes, (types.TupleType, types.ListType)):
-            ttypes = [ttypes]
-        for token in self.tokens[idx:]:
-            if token.ttype in ttypes:
-                return token
-        return None
-
-    def token_next_match(self, idx, ttype, value, regex=False):
-        if type(idx) != types.IntType:
-            idx = self.token_index(idx)
-        for token in self.tokens[idx:]:
-            if token.match(ttype, value, regex):
-                return token
-        return None
-
-    def token_not_matching(self, idx, funcs):
-        for token in self.tokens[idx:]:
-            passed = False
-            for func in funcs:
-                if func(token):
-                   passed = True
-                   break
-            if not passed:
-                return token
-        return None
-
-    def token_prev(self, idx, skip_ws=True):
-        while idx != 0:
-            idx -= 1
-            if self.tokens[idx].is_whitespace() and skip_ws:
-                continue
-            return self.tokens[idx]
-
-    def token_next(self, idx, skip_ws=True):
-        while idx < len(self.tokens)-1:
-            idx += 1
-            if self.tokens[idx].is_whitespace() and skip_ws:
-                continue
-            return self.tokens[idx]
-
-    def token_index(self, token):
-        """Return list index of token."""
-        return self.tokens.index(token)
-
-    def tokens_between(self, start, end, exclude_end=False):
-        """Return all tokens between (and including) start and end."""
-        if exclude_end:
-            offset = 0
-        else:
-            offset = 1
-        return self.tokens[self.token_index(start):self.token_index(end)+offset]
-
-    def group_tokens(self, grp_cls, tokens):
-        """Replace tokens by instance of grp_cls."""
-        idx = self.token_index(tokens[0])
-        for t in tokens:
-            self.tokens.remove(t)
-        grp = grp_cls(tokens)
-        self.tokens.insert(idx, grp)
-        return grp
-
-    def insert_before(self, where, token):
-        self.tokens.insert(self.token_index(where), token)
-
-
-class Statement(TokenList):
-
-    __slots__ = ('value', 'ttype', 'tokens')
-
-    def get_type(self):
-        first_token = self.token_first()
-        if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
-            return first_token.value.upper()
-        else:
-            return 'UNKNOWN'
-
-
-class Identifier(TokenList):
-
-    __slots__ = ('value', 'ttype', 'tokens')
-
-    def has_alias(self):
-        return self.get_alias() is not None
-
-    def get_alias(self):
-        kw = self.token_next_match(0, T.Keyword, 'AS')
-        if kw is not None:
-            alias = self.token_next(self.token_index(kw))
-            if alias is None:
-                return None
-        else:
-            next_ = self.token_next(0)
-            if next_ is None or not isinstance(next_, Identifier):
-                return None
-            alias = next_
-        if isinstance(alias, Identifier):
-            return alias.get_name()
-        else:
-            return alias.to_unicode()
-
-    def get_name(self):
-        alias = self.get_alias()
-        if alias is not None:
-            return alias
-        return self.get_real_name()
-
-    def get_real_name(self):
-        return self.token_next_by_type(0, T.Name).value
-
-    def get_typecast(self):
-        marker = self.token_next_match(0, T.Punctuation, '::')
-        if marker is None:
-            return None
-        next_ = self.token_next(self.token_index(marker), False)
-        if next_ is None:
-            return None
-        return next_.to_unicode()
-
-
-class IdentifierList(TokenList):
-
-    __slots__ = ('value', 'ttype', 'tokens')
-
-    def get_identifiers(self):
-        return [x for x in self.tokens if isinstance(x, Identifier)]
-
-
-class Parenthesis(TokenList):
-    __slots__ = ('value', 'ttype', 'tokens')
-
-
-class Assignment(TokenList):
-    __slots__ = ('value', 'ttype', 'tokens')
-
-class If(TokenList):
-    __slots__ = ('value', 'ttype', 'tokens')
-
-class For(TokenList):
-    __slots__ = ('value', 'ttype', 'tokens')
-
-class Comparsion(TokenList):
-    __slots__ = ('value', 'ttype', 'tokens')
-
-class Comment(TokenList):
-    __slots__ = ('value', 'ttype', 'tokens')
-
-class Where(TokenList):
-    __slots__ = ('value', 'ttype', 'tokens')
-
-
-class Case(TokenList):
-
-    __slots__ = ('value', 'ttype', 'tokens')
-
-    def get_cases(self):
-        """Returns a list of 2-tuples (condition, value).
-
-        If an ELSE exists condition is None.
-        """
-        ret = []
-        in_condition = in_value = False
-        for token in self.tokens:
-            if token.match(T.Keyword, 'WHEN'):
-                ret.append(([], []))
-                in_condition = True
-                in_value = False
-            elif token.match(T.Keyword, 'ELSE'):
-                ret.append((None, []))
-                in_condition = False
-                in_value = True
-            elif token.match(T.Keyword, 'THEN'):
-                in_condition = False
-                in_value = True
-            elif token.match(T.Keyword, 'END'):
-                in_condition = False
-                in_value = False
-            if in_condition:
-                ret[-1][0].append(token)
-            elif in_value:
-                ret[-1][1].append(token)
-        return ret
 
 def _group_left_right(tlist, ttype, value, cls,
                       check_right=lambda t: True,
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
new file mode 100644
index 0000000..372a3c7
--- /dev/null
+++ b/sqlparse/sql.py
@@ -0,0 +1,318 @@
+# -*- coding: utf-8 -*-
+
+"""This module contains classes representing syntactical elements of SQL."""
+
+import re
+import types
+
+from sqlparse import tokens as T
+
+
+class Token(object):
+
+    __slots__ = ('value', 'ttype')
+
+    def __init__(self, ttype, value):
+        self.value = value
+        self.ttype = ttype
+
+    def __str__(self):
+        return unicode(self).encode('latin-1')
+
+    def __repr__(self):
+        short = self._get_repr_value()
+        return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+                                          short, id(self))
+
+    def __unicode__(self):
+        return self.value
+
+    def to_unicode(self):
+        return unicode(self)
+
+    def _get_repr_name(self):
+        return str(self.ttype).split('.')[-1]
+
+    def _get_repr_value(self):
+        raw = unicode(self)
+        if len(raw) > 7:
+            short = raw[:6]+u'...'
+        else:
+            short = raw
+        return re.sub('\s+', ' ', short)
+
+    def match(self, ttype, values, regex=False):
+        if self.ttype is not ttype:
+            return False
+        if values is None:
+            return self.ttype is ttype
+        if isinstance(values, basestring):
+            values = [values]
+        if regex:
+            if self.ttype is T.Keyword:
+                values = [re.compile(v, re.IGNORECASE) for v in values]
+            else:
+                values = [re.compile(v) for v in values]
+            for pattern in values:
+                if pattern.search(self.value):
+                    return True
+            return False
+        else:
+            if self.ttype is T.Keyword:
+                return self.value.upper() in [v.upper() for v in values]
+            else:
+                return self.value in values
+
+    def is_group(self):
+        return False
+
+    def is_whitespace(self):
+        return self.ttype and self.ttype in T.Whitespace
+
+
+class TokenList(Token):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def __init__(self, tokens=None):
+        if tokens is None:
+            tokens = []
+        self.tokens = tokens
+        Token.__init__(self, None, None)
+
+    def __unicode__(self):
+        return ''.join(unicode(x) for x in self.flatten())
+
+    def __str__(self):
+        return unicode(self).encode('latin-1')
+
+    def _get_repr_name(self):
+        return self.__class__.__name__
+
+    def _pprint_tree(self, max_depth=None, depth=0):
+        """Pretty-print the object tree."""
+        indent = ' '*(depth*2)
+        for token in self.tokens:
+            if token.is_group():
+                pre = ' | '
+            else:
+                pre = ' | '
+            print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(),
+                                     token._get_repr_value())
+            if (token.is_group() and max_depth is not None
+                and depth < max_depth):
+                token._pprint_tree(max_depth, depth+1)
+
+    def flatten(self):
+        for token in self.tokens:
+            if isinstance(token, TokenList):
+                for item in token.flatten():
+                    yield item
+            else:
+                yield token
+
+    def is_group(self):
+        return True
+
+    def get_sublists(self):
+        return [x for x in self.tokens if isinstance(x, TokenList)]
+
+    def token_first(self, ignore_whitespace=True):
+        for token in self.tokens:
+            if ignore_whitespace and token.is_whitespace():
+                continue
+            return token
+        return None
+
+    def token_next_by_instance(self, idx, clss):
+        if type(clss) not in (types.ListType, types.TupleType):
+            clss = (clss,)
+        if type(clss) is not types.TupleType:
+            clss = tuple(clss)
+        for token in self.tokens[idx:]:
+            if isinstance(token, clss):
+                return token
+        return None
+
+    def token_next_by_type(self, idx, ttypes):
+        if not isinstance(ttypes, (types.TupleType, types.ListType)):
+            ttypes = [ttypes]
+        for token in self.tokens[idx:]:
+            if token.ttype in ttypes:
+                return token
+        return None
+
+    def token_next_match(self, idx, ttype, value, regex=False):
+        if type(idx) != types.IntType:
+            idx = self.token_index(idx)
+        for token in self.tokens[idx:]:
+            if token.match(ttype, value, regex):
+                return token
+        return None
+
+    def token_not_matching(self, idx, funcs):
+        for token in self.tokens[idx:]:
+            passed = False
+            for func in funcs:
+                if func(token):
+                   passed = True
+                   break
+            if not passed:
+                return token
+        return None
+
+    def token_prev(self, idx, skip_ws=True):
+        while idx != 0:
+            idx -= 1
+            if self.tokens[idx].is_whitespace() and skip_ws:
+                continue
+            return self.tokens[idx]
+
+    def token_next(self, idx, skip_ws=True):
+        while idx < len(self.tokens)-1:
+            idx += 1
+            if self.tokens[idx].is_whitespace() and skip_ws:
+                continue
+            return self.tokens[idx]
+
+    def token_index(self, token):
+        """Return list index of token."""
+        return self.tokens.index(token)
+
+    def tokens_between(self, start, end, exclude_end=False):
+        """Return all tokens between (and including) start and end."""
+        if exclude_end:
+            offset = 0
+        else:
+            offset = 1
+        return self.tokens[self.token_index(start):self.token_index(end)+offset]
+
+    def group_tokens(self, grp_cls, tokens):
+        """Replace tokens by instance of grp_cls."""
+        idx = self.token_index(tokens[0])
+        for t in tokens:
+            self.tokens.remove(t)
+        grp = grp_cls(tokens)
+        self.tokens.insert(idx, grp)
+        return grp
+
+    def insert_before(self, where, token):
+        self.tokens.insert(self.token_index(where), token)
+
+
+class Statement(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_type(self):
+        first_token = self.token_first()
+        if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            return first_token.value.upper()
+        else:
+            return 'UNKNOWN'
+
+
+class Identifier(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def has_alias(self):
+        return self.get_alias() is not None
+
+    def get_alias(self):
+        kw = self.token_next_match(0, T.Keyword, 'AS')
+        if kw is not None:
+            alias = self.token_next(self.token_index(kw))
+            if alias is None:
+                return None
+        else:
+            next_ = self.token_next(0)
+            if next_ is None or not isinstance(next_, Identifier):
+                return None
+            alias = next_
+        if isinstance(alias, Identifier):
+            return alias.get_name()
+        else:
+            return alias.to_unicode()
+
+    def get_name(self):
+        alias = self.get_alias()
+        if alias is not None:
+            return alias
+        return self.get_real_name()
+
+    def get_real_name(self):
+        return self.token_next_by_type(0, T.Name).value
+
+    def get_typecast(self):
+        marker = self.token_next_match(0, T.Punctuation, '::')
+        if marker is None:
+            return None
+        next_ = self.token_next(self.token_index(marker), False)
+        if next_ is None:
+            return None
+        return next_.to_unicode()
+
+
+class IdentifierList(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_identifiers(self):
+        return [x for x in self.tokens if isinstance(x, Identifier)]
+
+
+class Parenthesis(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Assignment(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class If(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class For(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class Comparsion(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class Comment(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+class Where(TokenList):
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Case(TokenList):
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_cases(self):
+        """Returns a list of 2-tuples (condition, value).
+
+        If an ELSE exists condition is None.
+        """
+        ret = []
+        in_condition = in_value = False
+        for token in self.tokens:
+            if token.match(T.Keyword, 'WHEN'):
+                ret.append(([], []))
+                in_condition = True
+                in_value = False
+            elif token.match(T.Keyword, 'ELSE'):
+                ret.append((None, []))
+                in_condition = False
+                in_value = True
+            elif token.match(T.Keyword, 'THEN'):
+                in_condition = False
+                in_value = True
+            elif token.match(T.Keyword, 'END'):
+                in_condition = False
+                in_value = False
+            if in_condition:
+                ret[-1][0].append(token)
+            elif in_value:
+                ret[-1][1].append(token)
+        return ret
author	Andi Albrecht <albrecht.andi@gmail.com>	2009-04-06 08:24:29 +0200
committer	Andi Albrecht <albrecht.andi@gmail.com>	2009-04-06 08:24:29 +0200
commit	56d27222b173013afe74a6bbe0da5eebf29ef559 (patch)
tree	4482d389d807f0e5c0fbb203d0f0906dfafb6f30 /sqlparse
parent	59ab1c932d8ec3c49ea4aa7b581202bc98c4e4a0 (diff)
download	sqlparse-56d27222b173013afe74a6bbe0da5eebf29ef559.tar.gz