summaryrefslogtreecommitdiff
path: root/sqlparse
diff options
context:
space:
mode:
authorJesús Leganés Combarro "Piranna" <piranna@gmail.com>2012-06-03 13:25:55 +0200
committerJesús Leganés Combarro "Piranna" <piranna@gmail.com>2012-06-03 13:25:55 +0200
commit5771365dee7f434a58f885cccfdcf6696335542b (patch)
tree0c501922b5bc34f0e3e1ef07bc4dd4dc83c138e7 /sqlparse
parent7728323fb24d825a47ff29d85d9a64667de7e47f (diff)
parent49b41027a3cb88b74f9d4ffb1adb34367c7763ce (diff)
downloadsqlparse-5771365dee7f434a58f885cccfdcf6696335542b.tar.gz
Merge branch 'issue_50' into milestone_0.1.5
Diffstat (limited to 'sqlparse')
-rw-r--r--sqlparse/engine/grouping.py80
-rw-r--r--sqlparse/filters.py257
-rw-r--r--sqlparse/formatter.py18
3 files changed, 269 insertions, 86 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 1487c24..b82a317 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -185,9 +185,11 @@ def group_identifier(tlist):
def group_identifier_list(tlist):
- [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
- if not isinstance(sgroup, sql.IdentifierList)]
- idx = 0
+ # First group the `tlist` sublists
+ for sgroup in tlist.get_sublists():
+ if not isinstance(sgroup, sql.IdentifierList):
+ group_identifier_list(sgroup)
+
# Allowed list items
fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
sql.Case)),
@@ -202,36 +204,64 @@ def group_identifier_list(tlist):
lambda t: isinstance(t, sql.Comparison),
lambda t: isinstance(t, sql.Comment),
]
- tcomma = tlist.token_next_match(idx, T.Punctuation, ',')
+
+ def group_identifierlist(start, after):
+ """
+ Create and group the identifiers list
+ """
+ tokens = tlist.tokens_between(start, after)
+ return tlist.group_tokens(sql.IdentifierList, tokens)
+
+ # Search for the first identifier list
start = None
- while tcomma is not None:
+ tcomma = tlist.token_next_match(0, T.Punctuation, ',')
+
+ while tcomma:
before = tlist.token_prev(tcomma)
after = tlist.token_next(tcomma)
- # Check if the tokens around tcomma belong to a list
+
+ # Check if the tokens around tcomma belong to an identifier list
bpassed = apassed = False
for func in fend1_funcs:
- if before is not None and func(before):
+ if before and func(before):
bpassed = True
- if after is not None and func(after):
+ if after and func(after):
apassed = True
- if not bpassed or not apassed:
- # Something's wrong here, skip ahead to next ","
- start = None
- tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1,
- T.Punctuation, ',')
- else:
- if start is None:
+
+ # Both tokens around tcomma belong to a list
+ if bpassed and apassed:
+ # Set the start of the identifier list if not defined before
+ if start == None:
start = before
- next_ = tlist.token_next(after)
- if next_ is None or not next_.match(T.Punctuation, ','):
- # Reached the end of the list
- tokens = tlist.tokens_between(start, after)
- group = tlist.group_tokens(sql.IdentifierList, tokens)
- start = None
- tcomma = tlist.token_next_match(tlist.token_index(group) + 1,
- T.Punctuation, ',')
- else:
- tcomma = next_
+
+ # Check the next token
+ next_ = tlist.token_next(after)
+ while next_:
+ if next_.value != ',':
+ passed = False
+ for func in fend1_funcs:
+ if func(next_):
+ passed = True
+ break
+
+ if not passed:
+ break
+
+ after = next_
+ next_ = tlist.token_next(next_)
+
+ # Reached the end of the list
+ # Create and group the identifiers list
+ tcomma = group_identifierlist(start, after)
+
+ # Skip ahead to next identifier list
+ start = None
+ tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1,
+ T.Punctuation, ',')
+
+ # There's an open identifier list, create and group the identifiers list
+ if start:
+ group_identifierlist(start, after)
def group_parenthesis(tlist):
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
index fd1bb2b..fcd1c8d 100644
--- a/sqlparse/filters.py
+++ b/sqlparse/filters.py
@@ -262,6 +262,9 @@ class StripWhitespaceFilter:
class ReindentFilter:
+ """
+ Filter that return a correctly indented version of the SQL string
+ """
def __init__(self, width=2, char=' ', line_width=None):
self.width = width
@@ -273,159 +276,284 @@ class ReindentFilter:
self._last_stmt = None
def _get_offset(self, token):
+ """
+ Return the offset where the token should be indented
+ """
+ # Get last processed line (the current one) up to the next token
all_ = list(self._curr_stmt.flatten())
idx = all_.index(token)
raw = ''.join(unicode(x) for x in all_[:idx + 1])
line = raw.splitlines()[-1]
+
# Now take current offset into account and return relative offset.
- full_offset = len(line) - len(self.char * (self.width * self.indent))
+ full_offset = len(line) - len(self.char * self.width * self.indent)
return full_offset - self.offset
def nl(self):
+ """
+ Return an indented new line token
+ """
# TODO: newline character should be configurable
- ws = '\n' + (self.char * ((self.indent * self.width) + self.offset))
+ ws = '\n' + self.char * (self.indent * self.width + self.offset)
return sql.Token(T.Whitespace, ws)
def _split_kwds(self, tlist):
+ """
+ Split `tlist` by its keywords
+ """
split_words = ('FROM', 'JOIN$', 'AND', 'OR',
'GROUP', 'ORDER', 'UNION', 'VALUES',
'SET', 'BETWEEN')
def _next_token(i):
- t = tlist.token_next_match(i, T.Keyword, split_words,
- regex=True)
+ """
+ Get next keyword where to split
+ """
+ # Search for the first keyword token
+ t = tlist.token_next_match(i, T.Keyword, split_words, regex=True)
+
+ # Use the BETWEEN ... AND ... struct as an unsplitable statement
if t and t.value.upper() == 'BETWEEN':
t = _next_token(tlist.token_index(t) + 1)
if t and t.value.upper() == 'AND':
t = _next_token(tlist.token_index(t) + 1)
+
+ # Return the token
return t
- idx = 0
- token = _next_token(idx)
+ # Get first token
+ token = _next_token(0)
while token:
- prev = tlist.token_prev(tlist.token_index(token), False)
offset = 1
- if prev and prev.is_whitespace():
- tlist.tokens.pop(tlist.token_index(prev))
- offset += 1
- if (prev
- and isinstance(prev, sql.Comment)
- and (str(prev).endswith('\n')
- or str(prev).endswith('\r'))):
- nl = tlist.token_next(token)
- else:
+ nl = None
+
+ # Check if we have any token before
+ prev = tlist.token_prev(tlist.token_index(token), False)
+ if prev:
+ # Previous token was a whitespace, increase offset
+ if prev.is_whitespace():
+ tlist.tokens.pop(tlist.token_index(prev))
+ offset += 1
+
+ # Previous token was a comment, add new line if necessary
+ if isinstance(prev, sql.Comment):
+ prev = str(prev)
+ if prev.endswith('\n') or prev.endswith('\r'):
+ nl = tlist.token_next(token)
+
+ # New line was not added, set it now
+ if nl == None:
nl = self.nl()
tlist.insert_before(token, nl)
+
+ # Add token now
token = _next_token(tlist.token_index(nl) + offset)
def _split_statements(self, tlist):
- idx = 0
- token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
+ """
+ Split tlist on statements
+ """
+ # Search for the first statement
+ token = tlist.token_next_by_type(0, (T.Keyword.DDL, T.Keyword.DML))
+
while token:
prev = tlist.token_prev(tlist.token_index(token), False)
- if prev and prev.is_whitespace():
- tlist.tokens.pop(tlist.token_index(prev))
- # only break if it's not the first token
if prev:
+ if prev.is_whitespace():
+ tlist.tokens.pop(tlist.token_index(prev))
+
+ # only break if it's not the first token
nl = self.nl()
tlist.insert_before(token, nl)
+
+ # Go to the next statement
token = tlist.token_next_by_type(tlist.token_index(token) + 1,
(T.Keyword.DDL, T.Keyword.DML))
def _process(self, tlist):
+ """
+ Proxy to other methods based on `tlist` class
+ """
func_name = '_process_%s' % tlist.__class__.__name__.lower()
func = getattr(self, func_name, self._process_default)
func(tlist)
def _process_where(self, tlist):
+ """
+ Process WHERE statement
+ """
+ # Look for the next WHERE keyword and add a new line
token = tlist.token_next_match(0, T.Keyword, 'WHERE')
tlist.insert_before(token, self.nl())
+
+ # Indent and process the (indented) WHERE statement as usual
self.indent += 1
self._process_default(tlist)
self.indent -= 1
def _process_parenthesis(self, tlist):
+ """
+ Process parenthesis
+ """
+ # Omit the 'open parenthesis' token
+ # and check if the next one require say us we should indent
first = tlist.token_next(0)
- indented = False
- if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ indented = first and first.ttype in (T.Keyword.DML, T.Keyword.DDL)
+
+ # If we should indent, increase indent and add a new line
+ if indented:
self.indent += 1
tlist.tokens.insert(0, self.nl())
- indented = True
- num_offset = self._get_offset(tlist.token_next_match(0,
- T.Punctuation, '('))
+
+ # Get indentation offset
+ token = tlist.token_next_match(0, T.Punctuation, '(')
+ num_offset = self._get_offset(token)
+
+ # Increase indentation offset and process the statement as usual
self.offset += num_offset
self._process_default(tlist, stmts=not indented)
+ self.offset -= num_offset
+
+ # If we indented, decrease indent to previous state
if indented:
self.indent -= 1
- self.offset -= num_offset
def _process_identifierlist(self, tlist):
- identifiers = list(tlist.get_identifiers())
- if len(identifiers) > 1 and not tlist.within(sql.Function):
- first = list(identifiers[0].flatten())[0]
- num_offset = self._get_offset(first) - len(first.value)
- self.offset += num_offset
- for token in identifiers[1:]:
- tlist.insert_before(token, self.nl())
- for token in tlist.tokens:
- if isinstance(token, sql.Comment):
- tlist.insert_after(token, self.nl())
- self.offset -= num_offset
+ """
+ Process an identifier list
+
+ If there are more than an identifier, put each on a line
+ """
+ # Split the identifier list if we are not in a function
+ if not tlist.within(sql.Function):
+ # Get identifiers from the tlist
+ identifiers = list(tlist.get_identifiers())
+
+ # Split the identifier list if we have more than one identifier
+ if len(identifiers) > 1:
+ # Get first token
+ first = list(identifiers[0].flatten())[0]
+
+ # Increase offset the size of the first token
+ num_offset = self._get_offset(first) - len(first.value)
+
+ # Increase offset and insert new lines
+ self.offset += num_offset
+ offset = 0
+
+ # Insert a new line between the tokens
+ ignore = False
+ for token in identifiers[1:]:
+ if not ignore:
+ tlist.insert_before(token, self.nl())
+ ignore = token.ttype
+
+ # Check identifiers offset
+ if token.ttype:
+ l = len(token.value)
+ if offset < l:
+ offset = l
+
+ # Imsert another new line after comment tokens
+ for token in tlist.tokens:
+ if isinstance(token, sql.Comment):
+ tlist.insert_after(token, self.nl())
+
+ # Update identifiers offset
+ if offset:
+ offset += 1
+
+ ignore = False
+ for token in identifiers:
+ if not ignore and not token.ttype:
+ tlist.insert_before(token, sql.Token(T.Whitespace,
+ " " * offset))
+ ignore = token.ttype
+
+ # Decrease offset the size of the first token
+ self.offset -= num_offset
+
+ # Process the identifier list as usual
self._process_default(tlist)
def _process_case(self, tlist):
- is_first = True
- num_offset = None
+ """
+ Process a CASE statement
+ """
+ # Increase the offset the size of the CASE keyword
case = tlist.tokens[0]
outer_offset = self._get_offset(case) - len(case.value)
self.offset += outer_offset
- for cond, value in tlist.get_cases():
- if is_first:
- tcond = list(cond[0].flatten())[0]
- is_first = False
- num_offset = self._get_offset(tcond) - len(tcond.value)
- self.offset += num_offset
- continue
+
+ # Get the case conditions
+ cases = tlist.get_cases()
+
+ # Get and increase the offset the size of the condition selector
+ cond, value = cases[0]
+ tcond = list(cond[0].flatten())[0]
+ num_offset = self._get_offset(tcond) - len(tcond.value)
+ self.offset += num_offset
+
+ # Insert a new line before each condition
+ for cond, value in cases[1:]:
if cond is None:
token = value[0]
else:
token = cond[0]
+
tlist.insert_before(token, self.nl())
+
# Line breaks on group level are done. Now let's add an offset of
# 5 (=length of "when", "then", "else") and process subgroups.
self.offset += 5
self._process_default(tlist)
self.offset -= 5
- if num_offset is not None:
- self.offset -= num_offset
+
+ # Decrease the offset the size of the condition selector
+ self.offset -= num_offset
+
+ # Insert a new line before the case END keyword
end = tlist.token_next_match(0, T.Keyword, 'END')
tlist.insert_before(end, self.nl())
+
+ # Decrease the offset the size of the CASE keyword
self.offset -= outer_offset
def _process_default(self, tlist, stmts=True, kwds=True):
+ """
+ Generic processing of `tlist` statements
+ """
if stmts:
self._split_statements(tlist)
if kwds:
self._split_kwds(tlist)
- [self._process(sgroup) for sgroup in tlist.get_sublists()]
+
+ for sgroup in tlist.get_sublists():
+ self._process(sgroup)
def process(self, stack, stmt):
warn("Deprecated, use callable objects. This will be removed at 0.2.0",
DeprecationWarning)
+ # If we are processing a statement, set it as the current one
if isinstance(stmt, sql.Statement):
self._curr_stmt = stmt
+
+ # Process the statement
self._process(stmt)
+
+ # If we are processing a statement, check if we should add a new line
if isinstance(stmt, sql.Statement):
- if self._last_stmt is not None:
+ if self._last_stmt:
if unicode(self._last_stmt).endswith('\n'):
nl = '\n'
else:
nl = '\n\n'
- stmt.tokens.insert(0,
- sql.Token(T.Whitespace, nl))
- if self._last_stmt != stmt:
- self._last_stmt = stmt
+
+ stmt.tokens.insert(0, sql.Token(T.Whitespace, nl))
+
+ # Set the statement as the current one
+ self._last_stmt = stmt
# FIXME: Doesn't work ;)
@@ -491,17 +619,21 @@ class ColumnsSelect:
mode = 1
# We have detected a SELECT statement
- elif mode == 1:
- if value == 'FROM':
+ elif mode in (1, 3):
+ if value in ('FROM', 'WHERE', 'GROUP'):
if oldValue:
yield oldValue
+ oldValue = ""
- mode = 3 # Columns have been checked
+ break # Columns have been checked
elif value == 'AS':
oldValue = ""
mode = 2
+ elif token_type in Whitespace:
+ mode = 3
+
elif (token_type == Punctuation
and value == ',' and not parenthesis):
if oldValue:
@@ -514,7 +646,11 @@ class ColumnsSelect:
elif value == ')':
parenthesis -= 1
- oldValue += value
+ if mode == 3:
+ oldValue = value
+ mode = 1
+ else:
+ oldValue += value
# We are processing an AS keyword
elif mode == 2:
@@ -523,6 +659,9 @@ class ColumnsSelect:
yield value
mode = 1
+ if oldValue:
+ yield oldValue
+
# ---------------------------
# postprocess
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
index 5be6652..f182850 100644
--- a/sqlparse/formatter.py
+++ b/sqlparse/formatter.py
@@ -11,34 +11,43 @@ from sqlparse import filters
def validate_options(options):
"""Validates options."""
+
+ # keyword_case
kwcase = options.get('keyword_case', None)
if kwcase not in [None, 'upper', 'lower', 'capitalize']:
raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
+ # identifier_case
idcase = options.get('identifier_case', None)
if idcase not in [None, 'upper', 'lower', 'capitalize']:
raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
+ # output_format
ofrmt = options.get('output_format', None)
if ofrmt not in [None, 'sql', 'python', 'php']:
raise SQLParseError('Unknown output format: %r' % ofrmt)
+ # strip_comments
strip_comments = options.get('strip_comments', False)
if strip_comments not in [True, False]:
raise SQLParseError('Invalid value for strip_comments: %r'
% strip_comments)
+ # strip_whitespace
strip_ws = options.get('strip_whitespace', False)
if strip_ws not in [True, False]:
raise SQLParseError('Invalid value for strip_whitespace: %r'
% strip_ws)
+ # reindent
reindent = options.get('reindent', False)
if reindent not in [True, False]:
raise SQLParseError('Invalid value for reindent: %r'
% reindent)
elif reindent:
options['strip_whitespace'] = True
+
+ # indent_tabs
indent_tabs = options.get('indent_tabs', False)
if indent_tabs not in [True, False]:
raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs)
@@ -46,15 +55,20 @@ def validate_options(options):
options['indent_char'] = '\t'
else:
options['indent_char'] = ' '
+
+ # indent_width
indent_width = options.get('indent_width', 2)
try:
indent_width = int(indent_width)
except (TypeError, ValueError):
raise SQLParseError('indent_width requires an integer')
+
if indent_width < 1:
raise SQLParseError('indent_width requires an positive integer')
+
options['indent_width'] = indent_width
+ # right_margin
right_margin = options.get('right_margin', None)
if right_margin is not None:
try:
@@ -65,6 +79,7 @@ def validate_options(options):
raise SQLParseError('right_margin requires an integer > 10')
options['right_margin'] = right_margin
+ # return the processed options
return options
@@ -89,8 +104,7 @@ def build_filter_stack(stack, options):
stack.enable_grouping()
stack.stmtprocess.append(filters.StripCommentsFilter())
- if (options.get('strip_whitespace', False)
- or options.get('reindent', False)):
+ if options.get('strip_whitespace', False):
stack.enable_grouping()
stack.stmtprocess.append(filters.StripWhitespaceFilter())