Parse double dollars (PostgreSQL) as literal strings (fixes #277).

author: Andi Albrecht <albrecht.andi@gmail.com> 2016-08-13 17:38:21 +0200
committer: Andi Albrecht <albrecht.andi@gmail.com> 2016-08-13 17:38:21 +0200
commit: 2893bd1857d685cf892beac3a7429d03cf1a09f1 (patch)
tree: 1fc1a427841391137820355f33cdaac119c080b6
parent: b7a30d04427e4e4cbc66d08b780ffbb23ab44931 (diff)
download: sqlparse-2893bd1857d685cf892beac3a7429d03cf1a09f1.tar.gz
6 files changed, 56 insertions, 5 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 13a36ed..a7014c5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,12 +1,19 @@
 Development Version
 -------------------
 
+Notable Changes
+
+* PostgreSQL: Function bodys are parsed as literal string. Previously
+  sqlparse assumed that all function bodys are parsable psql
+  strings (see issue277).
+
 Bug Fixes
 
 * Fix a regression to parse streams again (issue273, reported and
   test case by gmccreight).
 * Improve Python 2/3 compatibility when using parsestream (isseu190,
   by phdru).
+* Improve splitting of PostgreSQL functions (issue277).
 
 
 Release 0.2.0 (Jul 20, 2016)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index a6ee1d6..eef0d02 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -10,13 +10,22 @@ import re
 from sqlparse import tokens
 
 
-def is_keyword(value):
+def is_keyword(value, remaining):
     val = value.upper()
     return (KEYWORDS_COMMON.get(val) or
             KEYWORDS_ORACLE.get(val) or
             KEYWORDS.get(val, tokens.Name)), value
 
 
+def parse_literal_string(value, remaining):
+    try:
+        end = remaining[len(value):].index(value)
+    except ValueError:
+        return tokens.Name.Builtin, value
+    literal = remaining[:end + (len(value) * 2)]
+    return tokens.Literal, literal
+
+
 SQL_REGEX = {
     'root': [
         (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
@@ -35,7 +44,7 @@ SQL_REGEX = {
 
         (r"`(``|[^`])*`", tokens.Name),
         (r"´(´´|[^´])*´", tokens.Name),
-        (r'\$([_A-Z]\w*)?\$', tokens.Name.Builtin),
+        (r'\$([_A-Z]\w*)?\$', parse_literal_string),
 
         (r'\?', tokens.Name.Placeholder),
         (r'%(\(\w+\))?s', tokens.Name.Placeholder),
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index e7996b2..1979550 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -50,11 +50,14 @@ class Lexer(object):
                 if not m:
                     continue
                 elif isinstance(action, tokens._TokenType):
+                    consume_pos = m.end() - pos - 1
                     yield action, m.group()
                 elif callable(action):
-                    yield action(m.group())
+                    ttype, value = action(m.group(), text[pos:])
+                    consume_pos = len(value) - 1
+                    yield ttype, value
 
-                consume(iterable, m.end() - pos - 1)
+                consume(iterable, consume_pos)
                 break
             else:
                 yield tokens.Error, char
diff --git a/tests/files/function_psql4.sql b/tests/files/function_psql4.sql
new file mode 100644
index 0000000..02900a6
--- /dev/null
+++ b/tests/files/function_psql4.sql
@@ -0,0 +1,12 @@
+CREATE FUNCTION doubledollarinbody(var1 text) RETURNS text
+/* see issue277 */
+LANGUAGE plpgsql
+AS $_$
+DECLARE
+  str text;
+  BEGIN
+    str = $$'foo'$$||var1;
+    execute 'select '||str into str;
+    return str;
+  END
+$_$;
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 2d23425..8dd1150 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -384,3 +384,22 @@ def test_stmt_tokens_parents():
     stmt = sqlparse.parse(s)[0]
     for token in stmt.tokens:
         assert token.has_ancestor(stmt)
+
+
+@pytest.mark.parametrize('sql, is_literal', [
+    ('$$foo$$', True),
+    ('$_$foo$_$', True),
+    ('$token$ foo $token$', True),
+    # don't parse inner tokens
+    ('$_$ foo $token$bar$token$ baz$_$', True),
+    ('$A$ foo $B$', False)  # tokens don't match
+])
+def test_dbldollar_as_literal(sql, is_literal):
+    # see issue 277
+    p = sqlparse.parse(sql)[0]
+    if is_literal:
+        assert len(p.tokens) == 1
+        assert p.tokens[0].ttype == T.Literal
+    else:
+        for token in p.tokens:
+            assert token.ttype != T.Literal
diff --git a/tests/test_split.py b/tests/test_split.py
index af7c9ce..5d846bf 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -27,7 +27,8 @@ def test_split_backslash():
 @pytest.mark.parametrize('fn', ['function.sql',
                                 'function_psql.sql',
                                 'function_psql2.sql',
-                                'function_psql3.sql'])
+                                'function_psql3.sql',
+                                'function_psql4.sql'])
 def test_split_create_function(load_file, fn):
     sql = load_file(fn)
     stmts = sqlparse.parse(sql)
author	Andi Albrecht <albrecht.andi@gmail.com>	2016-08-13 17:38:21 +0200
committer	Andi Albrecht <albrecht.andi@gmail.com>	2016-08-13 17:38:21 +0200
commit	2893bd1857d685cf892beac3a7429d03cf1a09f1 (patch)
tree	1fc1a427841391137820355f33cdaac119c080b6
parent	b7a30d04427e4e4cbc66d08b780ffbb23ab44931 (diff)
download	sqlparse-2893bd1857d685cf892beac3a7429d03cf1a09f1.tar.gz