summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndi Albrecht <albrecht.andi@gmail.com>2016-08-13 17:38:21 +0200
committerAndi Albrecht <albrecht.andi@gmail.com>2016-08-13 17:38:21 +0200
commit2893bd1857d685cf892beac3a7429d03cf1a09f1 (patch)
tree1fc1a427841391137820355f33cdaac119c080b6
parentb7a30d04427e4e4cbc66d08b780ffbb23ab44931 (diff)
downloadsqlparse-2893bd1857d685cf892beac3a7429d03cf1a09f1.tar.gz
Parse double dollars (PostgreSQL) as literal strings (fixes #277).
-rw-r--r--CHANGELOG7
-rw-r--r--sqlparse/keywords.py13
-rw-r--r--sqlparse/lexer.py7
-rw-r--r--tests/files/function_psql4.sql12
-rw-r--r--tests/test_parse.py19
-rw-r--r--tests/test_split.py3
6 files changed, 56 insertions, 5 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 13a36ed..a7014c5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,12 +1,19 @@
Development Version
-------------------
+Notable Changes
+
+* PostgreSQL: Function bodys are parsed as literal string. Previously
+ sqlparse assumed that all function bodys are parsable psql
+ strings (see issue277).
+
Bug Fixes
* Fix a regression to parse streams again (issue273, reported and
test case by gmccreight).
* Improve Python 2/3 compatibility when using parsestream (isseu190,
by phdru).
+* Improve splitting of PostgreSQL functions (issue277).
Release 0.2.0 (Jul 20, 2016)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index a6ee1d6..eef0d02 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -10,13 +10,22 @@ import re
from sqlparse import tokens
-def is_keyword(value):
+def is_keyword(value, remaining):
val = value.upper()
return (KEYWORDS_COMMON.get(val) or
KEYWORDS_ORACLE.get(val) or
KEYWORDS.get(val, tokens.Name)), value
+def parse_literal_string(value, remaining):
+ try:
+ end = remaining[len(value):].index(value)
+ except ValueError:
+ return tokens.Name.Builtin, value
+ literal = remaining[:end + (len(value) * 2)]
+ return tokens.Literal, literal
+
+
SQL_REGEX = {
'root': [
(r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
@@ -35,7 +44,7 @@ SQL_REGEX = {
(r"`(``|[^`])*`", tokens.Name),
(r"´(´´|[^´])*´", tokens.Name),
- (r'\$([_A-Z]\w*)?\$', tokens.Name.Builtin),
+ (r'\$([_A-Z]\w*)?\$', parse_literal_string),
(r'\?', tokens.Name.Placeholder),
(r'%(\(\w+\))?s', tokens.Name.Placeholder),
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index e7996b2..1979550 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -50,11 +50,14 @@ class Lexer(object):
if not m:
continue
elif isinstance(action, tokens._TokenType):
+ consume_pos = m.end() - pos - 1
yield action, m.group()
elif callable(action):
- yield action(m.group())
+ ttype, value = action(m.group(), text[pos:])
+ consume_pos = len(value) - 1
+ yield ttype, value
- consume(iterable, m.end() - pos - 1)
+ consume(iterable, consume_pos)
break
else:
yield tokens.Error, char
diff --git a/tests/files/function_psql4.sql b/tests/files/function_psql4.sql
new file mode 100644
index 0000000..02900a6
--- /dev/null
+++ b/tests/files/function_psql4.sql
@@ -0,0 +1,12 @@
+CREATE FUNCTION doubledollarinbody(var1 text) RETURNS text
+/* see issue277 */
+LANGUAGE plpgsql
+AS $_$
+DECLARE
+ str text;
+ BEGIN
+ str = $$'foo'$$||var1;
+ execute 'select '||str into str;
+ return str;
+ END
+$_$;
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 2d23425..8dd1150 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -384,3 +384,22 @@ def test_stmt_tokens_parents():
stmt = sqlparse.parse(s)[0]
for token in stmt.tokens:
assert token.has_ancestor(stmt)
+
+
+@pytest.mark.parametrize('sql, is_literal', [
+ ('$$foo$$', True),
+ ('$_$foo$_$', True),
+ ('$token$ foo $token$', True),
+ # don't parse inner tokens
+ ('$_$ foo $token$bar$token$ baz$_$', True),
+ ('$A$ foo $B$', False) # tokens don't match
+])
+def test_dbldollar_as_literal(sql, is_literal):
+ # see issue 277
+ p = sqlparse.parse(sql)[0]
+ if is_literal:
+ assert len(p.tokens) == 1
+ assert p.tokens[0].ttype == T.Literal
+ else:
+ for token in p.tokens:
+ assert token.ttype != T.Literal
diff --git a/tests/test_split.py b/tests/test_split.py
index af7c9ce..5d846bf 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -27,7 +27,8 @@ def test_split_backslash():
@pytest.mark.parametrize('fn', ['function.sql',
'function_psql.sql',
'function_psql2.sql',
- 'function_psql3.sql'])
+ 'function_psql3.sql',
+ 'function_psql4.sql'])
def test_split_create_function(load_file, fn):
sql = load_file(fn)
stmts = sqlparse.parse(sql)