diff options
| author | Andi Albrecht <albrecht.andi@gmail.com> | 2015-10-26 19:40:29 +0100 |
|---|---|---|
| committer | Andi Albrecht <albrecht.andi@gmail.com> | 2015-10-26 19:40:29 +0100 |
| commit | e6a51a0bc3f87e284de74cec838d3ee98c2f9cf5 (patch) | |
| tree | 8ac469820a09c31e9e49543ecbbbdeebad53c85e /sqlparse | |
| parent | 8bfdaf3cc37ffe48a60c7f4ee5d5e99d0b07e696 (diff) | |
| download | sqlparse-e6a51a0bc3f87e284de74cec838d3ee98c2f9cf5.tar.gz | |
Use compat module for single Python 2/3 code base.
This change includes minor fixes and code cleanup too.
Diffstat (limited to 'sqlparse')
| -rw-r--r-- | sqlparse/__init__.py | 3 | ||||
| -rw-r--r-- | sqlparse/compat.py | 4 | ||||
| -rw-r--r-- | sqlparse/filters.py | 23 | ||||
| -rw-r--r-- | sqlparse/lexer.py | 29 | ||||
| -rw-r--r-- | sqlparse/sql.py | 23 |
5 files changed, 44 insertions, 38 deletions
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 77a09f4..e8252d7 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -14,6 +14,7 @@ from sqlparse import engine from sqlparse import filters from sqlparse import formatter +from sqlparse.compat import u # Deprecated in 0.1.5. Will be removed in 0.2.0 from sqlparse.exceptions import SQLParseError @@ -67,7 +68,7 @@ def split(sql, encoding=None): """ stack = engine.FilterStack() stack.split_statements = True - return [unicode(stmt).strip() for stmt in stack.run(sql, encoding)] + return [u(stmt).strip() for stmt in stack.run(sql, encoding)] from sqlparse.engine.filter import StatementFilter diff --git a/sqlparse/compat.py b/sqlparse/compat.py index 9efae26..6b26384 100644 --- a/sqlparse/compat.py +++ b/sqlparse/compat.py @@ -19,7 +19,7 @@ if PY3: from io import StringIO def u(s): - return s + return str(s) elif PY2: text_type = unicode @@ -27,7 +27,7 @@ elif PY2: from StringIO import StringIO # flake8: noqa def u(s): - return unicode(s, 'unicode_escape') + return unicode(s) # Directly copied from six: diff --git a/sqlparse/filters.py b/sqlparse/filters.py index 676344f..eabf863 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -5,6 +5,7 @@ import re from os.path import abspath, join from sqlparse import sql, tokens as T +from sqlparse.compat import u, text_type from sqlparse.engine import FilterStack from sqlparse.lexer import tokenize from sqlparse.pipeline import Pipeline @@ -25,7 +26,7 @@ class _CaseFilter: if case is None: case = 'upper' assert case in ['lower', 'upper', 'capitalize'] - self.convert = getattr(unicode, case) + self.convert = getattr(text_type, case) def process(self, stack, stream): for ttype, value in stream: @@ -52,7 +53,7 @@ class TruncateStringFilter: def __init__(self, width, char): self.width = max(width, 1) - self.char = unicode(char) + self.char = u(char) def process(self, stack, stream): for ttype, value in stream: @@ -154,7 +155,7 @@ class IncludeStatement: f.close() # There was a problem loading the include file - except IOError, err: + except IOError as err: # Raise the exception to the interpreter if self.raiseexceptions: raise @@ -171,7 +172,7 @@ class IncludeStatement: self.raiseexceptions) # Max recursion limit reached - except ValueError, err: + except ValueError as err: # Raise the exception to the interpreter if self.raiseexceptions: raise @@ -300,7 +301,7 @@ class ReindentFilter: raise StopIteration def _get_offset(self, token): - raw = ''.join(map(unicode, self._flatten_up_to_token(token))) + raw = ''.join(map(text_type, self._flatten_up_to_token(token))) line = raw.splitlines()[-1] # Now take current offset into account and return relative offset. full_offset = len(line) - len(self.char * (self.width * self.indent)) @@ -340,7 +341,7 @@ class ReindentFilter: if prev and prev.is_whitespace() and prev not in added: tlist.tokens.pop(tlist.token_index(prev)) offset += 1 - uprev = unicode(prev) + uprev = u(prev) if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))): nl = tlist.token_next(token) else: @@ -462,7 +463,7 @@ class ReindentFilter: self._process(stmt) if isinstance(stmt, sql.Statement): if self._last_stmt is not None: - if unicode(self._last_stmt).endswith('\n'): + if u(self._last_stmt).endswith('\n'): nl = '\n' else: nl = '\n\n' @@ -494,7 +495,7 @@ class RightMarginFilter: and not token.__class__ in self.keep_together): token.tokens = self._process(stack, token, token.tokens) else: - val = unicode(token) + val = u(token) if len(self.line) + len(val) > self.width: match = re.search('^ +', self.line) if match is not None: @@ -568,7 +569,7 @@ class ColumnsSelect: class SerializerUnicode: def process(self, stack, stmt): - raw = unicode(stmt) + raw = u(stmt) lines = split_unquoted_newlines(raw) res = '\n'.join(line.rstrip() for line in lines) return res @@ -578,7 +579,7 @@ def Tokens2Unicode(stream): result = "" for _, value in stream: - result += unicode(value) + result += u(value) return result @@ -600,7 +601,7 @@ class OutputFilter: else: varname = self.varname - has_nl = len(unicode(stmt).strip().splitlines()) > 1 + has_nl = len(u(stmt).strip().splitlines()) > 1 stmt.tokens = self._process(stmt.tokens, varname, has_nl) return stmt diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index fd29f5c..2b0688a 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -17,7 +17,7 @@ import sys from sqlparse import tokens from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON -from cStringIO import StringIO +from sqlparse.compat import StringIO, string_types, with_metaclass, text_type class include(str): @@ -81,14 +81,14 @@ class LexerMeta(type): try: rex = re.compile(tdef[0], rflags).match - except Exception, err: + except Exception as err: raise ValueError(("uncompilable regex %r in state" " %r of %r: %s" % (tdef[0], state, cls, err))) assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \ - ('token type must be simple type or callable, not %r' - % (tdef[1],)) + ('token type must be simple type or callable, not %r' + % (tdef[1],)) if len(tdef) == 2: new_state = None @@ -113,7 +113,7 @@ class LexerMeta(type): itokens = [] for istate in tdef2: assert istate != state, \ - 'circular state ref %r' % istate + 'circular state ref %r' % istate itokens.extend(cls._process_state(unprocessed, processed, istate)) processed[new_state] = itokens @@ -123,7 +123,7 @@ class LexerMeta(type): for state in tdef2: assert (state in unprocessed or state in ('#pop', '#push')), \ - 'unknown new state ' + state + 'unknown new state ' + state new_state = tdef2 else: assert False, 'unknown new state def %r' % tdef2 @@ -134,7 +134,7 @@ class LexerMeta(type): cls._all_tokens = {} cls._tmpname = 0 processed = cls._all_tokens[cls.__name__] = {} - #tokendefs = tokendefs or cls.tokens[name] + # tokendefs = tokendefs or cls.tokens[name] for state in cls.tokens.keys(): cls._process_state(cls.tokens, processed, state) return processed @@ -152,9 +152,7 @@ class LexerMeta(type): return type.__call__(cls, *args, **kwds) -class Lexer(object): - - __metaclass__ = LexerMeta +class _Lexer(object): encoding = 'utf-8' stripall = False @@ -201,7 +199,8 @@ class Lexer(object): # cannot be preceded by word character or a right bracket -- # otherwise it's probably an array index (r'(?<![\w\])])(\[[^\]]+\])', tokens.Name), - (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), + (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?' + r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword), (r'NOT NULL\b', tokens.Keyword), (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), @@ -258,13 +257,13 @@ class Lexer(object): Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ - if isinstance(text, basestring): + if isinstance(text, string_types): if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') - if sys.version_info[0] < 3 and isinstance(text, unicode): + if sys.version_info[0] < 3 and isinstance(text, text_type): text = StringIO(text.encode('utf-8')) self.encoding = 'utf-8' else: @@ -350,6 +349,10 @@ class Lexer(object): break +class Lexer(with_metaclass(LexerMeta, _Lexer)): + pass + + def tokenize(sql, encoding=None): """Tokenize sql. diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 7325712..97dd24e 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -6,6 +6,7 @@ import re import sys from sqlparse import tokens as T +from sqlparse.compat import string_types, u class Token(object): @@ -32,7 +33,7 @@ class Token(object): if sys.version_info[0] == 3: return self.value else: - return unicode(self).encode('utf-8') + return u(self).encode('utf-8') def __repr__(self): short = self._get_repr_value() @@ -51,13 +52,13 @@ class Token(object): .. deprecated:: 0.1.5 Use ``unicode(token)`` (for Python 3: ``str(token)``) instead. """ - return unicode(self) + return u(self) def _get_repr_name(self): return str(self.ttype).split('.')[-1] def _get_repr_value(self): - raw = unicode(self) + raw = u(self) if len(raw) > 7: raw = raw[:6] + u'...' return re.sub('\s+', ' ', raw) @@ -83,7 +84,7 @@ class Token(object): return type_matched if regex: - if isinstance(values, basestring): + if isinstance(values, string_types): values = set([values]) if self.ttype is T.Keyword: @@ -96,7 +97,7 @@ class Token(object): return True return False - if isinstance(values, basestring): + if isinstance(values, string_types): if self.is_keyword: return values.upper() == self.normalized return values == self.value @@ -172,7 +173,7 @@ class TokenList(Token): if sys.version_info[0] == 3: return ''.join(x.value for x in self.flatten()) else: - return ''.join(unicode(x) for x in self.flatten()) + return ''.join(u(x) for x in self.flatten()) def _get_repr_name(self): return self.__class__.__name__ @@ -185,9 +186,9 @@ class TokenList(Token): pre = ' +-' else: pre = ' | ' - print '%s%s%d %s \'%s\'' % (indent, pre, idx, + print('%s%s%d %s \'%s\'' % (indent, pre, idx, token._get_repr_name(), - token._get_repr_value()) + token._get_repr_value())) if (token.is_group() and (max_depth is None or depth < max_depth)): token._pprint_tree(max_depth, depth + 1) @@ -285,7 +286,7 @@ class TokenList(Token): if not isinstance(idx, int): idx = self.token_index(idx) - for n in xrange(idx, len(self.tokens)): + for n in range(idx, len(self.tokens)): token = self.tokens[n] if token.match(ttype, value, regex): return token @@ -349,7 +350,7 @@ class TokenList(Token): # Performing `index` manually is much faster when starting in the middle # of the list of tokens and expecting to find the token near to the starting # index. - for i in xrange(start, len(self.tokens)): + for i in range(start, len(self.tokens)): if self.tokens[i] == token: return i return -1 @@ -518,7 +519,7 @@ class Identifier(TokenList): next_ = self.token_next(self.token_index(marker), False) if next_ is None: return None - return unicode(next_) + return u(next_) def get_ordering(self): """Returns the ordering or ``None`` as uppercase string.""" |
