# -*- coding: utf-8 -*- import re from sqlparse.engine.filter import TokenFilter from sqlparse import tokens as T class _Base(object): __slots__ = ('to_unicode', 'to_str', '_get_repr_name') def __unicode__(self): return 'Unkown _Base object' def __str__(self): return unicode(self).encode('latin-1') def __repr__(self): raw = unicode(self) if len(raw) > 7: short = raw[:6]+u'...' else: short = raw short = re.sub('\s+', ' ', short) return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), short, id(self)) def _get_repr_name(self): return self.__class__.__name__ def to_unicode(self): return unicode(self) def to_str(self): return str(self) class Token(_Base): __slots__ = ('value', 'ttype') def __init__(self, ttype, value): self.value = value self.ttype = ttype def __unicode__(self): return self.value def _get_repr_name(self): return str(self.ttype).split('.')[-1] def match(self, ttype, values): if self.ttype is not ttype: return False if isinstance(values, basestring): values = [values] if self.ttype is T.Keyword: return self.value.upper() in [v.upper() for v in values] else: return self.value in values def is_group(self): return False def is_whitespace(self): return self.ttype and self.ttype is T.Whitespace class _Group(Token): __slots__ = ('value', 'ttype', 'tokens') def __init__(self, tokens=None): super(_Group, self).__init__(None, None) if tokens is None: tokens = [] self._tokens = tokens def _set_tokens(self, tokens): self._tokens = tokens def _get_tokens(self): if type(self._tokens) is not types.TupleType: self._tokens = tuple(self._tokens) return self._tokens tokens = property(fget=_get_tokens, fset=_set_tokens) def _get_repr_name(self): return self.__class__.__name__ def _pprint_tree(self, depth=0): """Pretty-print the object tree.""" indent = ' '*(depth*2) for token in self.tokens: print '%s%r' % (indent, token) if token.is_group(): token._pprint_tree(depth+1) def __unicode__(self): return u''.join(unicode(t) for t in self.tokens) @property def subgroups(self): #return [x for x in self.tokens if isinstance(x, _Group)] for item in self.tokens: if item.is_group(): yield item def is_group(self): return True class Statement(_Group): __slots__ = ('value', 'ttype', '_tokens') class Parenthesis(_Group): __slots__ = ('value', 'ttype', '_tokens') class Where(_Group): __slots__ = ('value', 'ttype', '_tokens') class CommentMulti(_Group): __slots__ = ('value', 'ttype', '_tokens') class Identifier(_Group): __slots__ = ('value', 'ttype', '_tokens') class TypeCast(_Group): __slots__ = ('value', 'ttype', '_tokens') @property def casted_object(self): return self.tokens[0] @property def casted_type(self): return self.tokens[-1] class Alias(_Group): __slots__ = ('value', 'ttype', '_tokens') @property def aliased_object(self): return self.tokens[0] @property def alias(self): return self.tokens[-1] # - Filter class StatementFilter(TokenFilter): def __init__(self): self._in_declare = False self._in_dbldollar = False self._is_create = False def _reset(self): self._in_declare = False self._in_dbldollar = False self._is_create = False def _change_splitlevel(self, ttype, value): # PostgreSQL if (ttype == T.Name.Builtin and value.startswith('$') and value.endswith('$')): if self._in_dbldollar: self._in_dbldollar = False return -1 else: self._in_dbldollar = True return 1 elif self._in_dbldollar: return 0 # ANSI if ttype is not T.Keyword: return 0 unified = value.upper() if unified == 'DECLARE': self._in_declare = True return 1 if unified == 'BEGIN': if self._in_declare: return 0 return 0 if unified == 'END': return -1 if ttype is T.Keyword.DDL and unified.startswith('CREATE'): self._is_create = True if unified in ('IF', 'FOR') and self._is_create: return 1 # Default return 0 def process(self, stack, stream): splitlevel = 0 stmt = None consume_ws = False stmt_tokens = [] for ttype, value in stream: # Before appending the token if (consume_ws and ttype is not T.Whitespace and ttype is not T.Comment.Single): consume_ws = False stmt.tokens = stmt_tokens yield stmt self._reset() stmt = None splitlevel = 0 if stmt is None: stmt = Statement() stmt_tokens = [] splitlevel += self._change_splitlevel(ttype, value) # Append the token stmt_tokens.append(Token(ttype, value)) # After appending the token if (not splitlevel and ttype is T.Punctuation and value == ';'): consume_ws = True if stmt is not None: stmt.tokens = stmt_tokens yield stmt class GroupFilter(object): def process(self, stream): pass class GroupParenthesis(GroupFilter): """Group parenthesis groups.""" def _finish_group(self, group): start = group[0] end = group[-1] tokens = list(self._process(group[1:-1])) return [start]+tokens+[end] def _process(self, stream): group = None depth = 0 for token in stream: if token.is_group(): token.tokens = self._process(token.tokens) if token.match(T.Punctuation, '('): if depth == 0: group = [] depth += 1 if group is not None: group.append(token) if token.match(T.Punctuation, ')'): depth -= 1 if depth == 0: yield Parenthesis(self._finish_group(group)) group = None continue if group is None: yield token def process(self, group): if not isinstance(group, Parenthesis): group.tokens = self._process(group.tokens) class GroupWhere(GroupFilter): def _process(self, stream): group = None depth = 0 for token in stream: if token.is_group(): token.tokens = self._process(token.tokens) if token.match(T.Keyword, 'WHERE'): if depth == 0: group = [] depth += 1 # Process conditions here? E.g. "A =|!=|in|is|... B"... elif (token.ttype is T.Keyword and token.value.upper() in ('ORDER', 'GROUP', 'LIMIT', 'UNION')): depth -= 1 if depth == 0: yield Where(group) group = None if depth < 0: depth = 0 if group is not None: group.append(token) else: yield token if group is not None: yield Where(group) def process(self, group): if not isinstance(group, Where): group.tokens = self._process(group.tokens) class GroupMultiComments(GroupFilter): """Groups Comment.Multiline and adds trailing whitespace up to first lb.""" def _process(self, stream): new_tokens = [] grp = None consume_ws = False for token in stream: if token.is_group(): token.tokens = self._process(token.tokens) if token.ttype is T.Comment.Multiline: if grp is None: grp = [] consume_ws = True grp.append(token) elif consume_ws and token.ttype is not T.Whitespace: yield CommentMulti(grp) grp = None consume_ws = False yield token elif consume_ws: lines = token.value.splitlines(True) grp.append(Token(T.Whitespace, lines[0])) if lines[0].endswith('\n'): yield CommentMulti(grp) grp = None consume_ws = False if lines[1:]: yield Token(T.Whitespace, ''.join(lines[1:])) else: yield token def process(self, group): if not isinstance(group, CommentMulti): group.tokens = self._process(group.tokens) ## class GroupIdentifier(GroupFilter): ## def _process(self, stream): ## buff = [] ## expect_dot = False ## for token in stream: ## if token.is_group(): ## token.tokens = self._process(token.tokens) ## if (token.ttype is T.String.Symbol or token.ttype is T.Name ## and not expect_dot): ## buff.append(token) ## expect_dot = True ## elif expect_dot and token.match(T.Punctuation, '.'): ## buff.append(token) ## expect_dot = False ## else: ## if expect_dot == False: ## # something's wrong, it ends with a dot... ## while buff: ## yield buff.pop(0) ## expect_dot = False ## elif buff: ## idt = Identifier() ## idt.tokens = buff ## yield idt ## buff = [] ## yield token ## if buff and expect_dot: ## idt = Identifier() ## idt.tokens = buff ## yield idt ## buff = [] ## while buff: ## yield buff.pop(0) ## def process(self, group): ## if not isinstance(group, Identifier): ## group.tokens = self._process(group.tokens) class AddTypeCastFilter(GroupFilter): def _process(self, stream): buff = [] expect_colon = False has_colons = False for token in stream: if token.is_group(): token.tokens = self._process(token.tokens) if ((isinstance(token, Parenthesis) or isinstance(token, Identifier)) and not expect_colon): buff.append(token) expect_colon = True elif expect_colon and token.match(T.Punctuation, ':'): buff.append(token) has_colons = True elif (expect_colon and (token.ttype in T.Name or isinstance(token, Identifier)) ): if not has_colons: while buff: yield buff.pop(0) yield token else: buff.append(token) grp = TypeCast() grp.tokens = buff buff = [] yield grp expect_colons = has_colons = False else: while buff: yield buff.pop(0) yield token while buff: yield buff.pop(0) def process(self, group): if not isinstance(group, TypeCast): group.tokens = self._process(group.tokens) class AddAliasFilter(GroupFilter): def _process(self, stream): buff = [] search_alias = False lazy = False for token in stream: if token.is_group(): token.tokens = self._process(token.tokens) if search_alias and (isinstance(token, Identifier) or token.ttype in (T.Name, T.String.Symbol) or (lazy and not token.is_whitespace())): buff.append(token) search_alias = lazy = False grp = Alias() grp.tokens = buff buff = [] yield grp elif (isinstance(token, (Identifier, TypeCast)) or token.ttype in (T.Name, T.String.Symbol)): buff.append(token) search_alias = True elif search_alias and (token.is_whitespace() or token.match(T.Keyword, 'as')): buff.append(token) if token.match(T.Keyword, 'as'): lazy = True else: while buff: yield buff.pop(0) yield token search_alias = False while buff: yield buff.pop(0) def process(self, group): if not isinstance(group, Alias): group.tokens = self._process(group.tokens) GROUP_FILTER = (GroupParenthesis(), GroupMultiComments(), GroupWhere(), GroupIdentifier(), AddTypeCastFilter(), AddAliasFilter(), ) import types def group_tokens(group): def _materialize(g): if type(g.tokens) is not types.TupleType: g.tokens = tuple(g.tokens) for sg in g.subgroups: _materialize(sg) for groupfilter in GROUP_FILTER: groupfilter.process(group) # _materialize(group) # group.tokens = tuple(group.tokens) # for subgroup in group.subgroups: # group_tokens(subgroup)