diff options
author | Robert Coup <robert@coup.net.nz> | 2019-11-19 04:33:05 +0000 |
---|---|---|
committer | Paul McGuire <ptmcg@users.noreply.github.com> | 2019-11-18 22:33:05 -0600 |
commit | bea48a41d40f1c37bea7a718cc06e9b858c8ccbf (patch) | |
tree | e4003d769ebdc2e6d174657b201389f8b0672744 /examples/select_parser.py | |
parent | 094ffc815863fa9aaad48de5ce89477019e1e26c (diff) | |
download | pyparsing-git-bea48a41d40f1c37bea7a718cc06e9b858c8ccbf.tar.gz |
select_parser example: misc improvements (#157)
* select_parser example: misc improvements
* sqlite now supports TRUE and FALSE as literal values
* use common numeric expressions
* fix identifier quoting
* downcase identifiers unless they're quoted
* fix string quoting
* add support for sql comments
* additional test cases
* Reformat test-runner aspects
* Improve support for NOT expressions (eg. NOT IN, NOT LIKE)
Diffstat (limited to 'examples/select_parser.py')
-rw-r--r-- | examples/select_parser.py | 246 |
1 files changed, 94 insertions, 152 deletions
diff --git a/examples/select_parser.py b/examples/select_parser.py index 723f8b2..fd0e680 100644 --- a/examples/select_parser.py +++ b/examples/select_parser.py @@ -4,6 +4,7 @@ # a simple SELECT statement parser, taken from SQLite's SELECT statement # definition at https://www.sqlite.org/lang_select.html # +import sys from pyparsing import * ParserElement.enablePackrat() @@ -13,132 +14,24 @@ DOT, STAR = map(Literal, ".*") select_stmt = Forward().setName("select statement") # keywords -( - UNION, - ALL, - AND, - INTERSECT, - EXCEPT, - COLLATE, - ASC, - DESC, - ON, - USING, - NATURAL, - INNER, - CROSS, - LEFT, - OUTER, - JOIN, - AS, - INDEXED, - NOT, - SELECT, - DISTINCT, - FROM, - WHERE, - GROUP, - BY, - HAVING, - ORDER, - BY, - LIMIT, - OFFSET, - OR, -) = map( - CaselessKeyword, - """UNION, ALL, AND, INTERSECT, - EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, - DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace( - ",", "" - ).split(), -) -( - CAST, - ISNULL, - NOTNULL, - NULL, - IS, - BETWEEN, - ELSE, - END, - CASE, - WHEN, - THEN, - EXISTS, - IN, - LIKE, - GLOB, - REGEXP, - MATCH, - ESCAPE, - CURRENT_TIME, - CURRENT_DATE, - CURRENT_TIMESTAMP, -) = map( - CaselessKeyword, - """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB, - REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace( - ",", "" - ).split(), -) -keyword = MatchFirst( - ( - UNION, - ALL, - INTERSECT, - EXCEPT, - COLLATE, - ASC, - DESC, - ON, - USING, - NATURAL, - INNER, - CROSS, - LEFT, - OUTER, - JOIN, - AS, - INDEXED, - NOT, - SELECT, - DISTINCT, - FROM, - WHERE, - GROUP, - BY, - HAVING, - ORDER, - BY, - LIMIT, - OFFSET, - CAST, - ISNULL, - NOTNULL, - NULL, - IS, - BETWEEN, - ELSE, - END, - CASE, - WHEN, - THEN, - EXISTS, - COLLATE, - IN, - LIKE, - GLOB, - REGEXP, - MATCH, - ESCAPE, - CURRENT_TIME, - CURRENT_DATE, - CURRENT_TIMESTAMP, - ) -) +keywords = { + k: CaselessKeyword(k) + for k in """\ + UNION ALL AND INTERSECT EXCEPT COLLATE ASC DESC ON USING NATURAL INNER CROSS LEFT OUTER JOIN AS INDEXED NOT + SELECT DISTINCT FROM WHERE GROUP BY HAVING ORDER LIMIT OFFSET OR CAST ISNULL NOTNULL NULL IS BETWEEN ELSE END + CASE WHEN THEN EXISTS IN LIKE GLOB REGEXP MATCH ESCAPE CURRENT_TIME CURRENT_DATE CURRENT_TIMESTAMP TRUE FALSE + """.split() +} +vars().update(keywords) + +keyword = MatchFirst(keywords.values()) identifier = ~keyword + Word(alphas, alphanums + "_") + +quoted_identifier = QuotedString('"', escQuote='""') +identifier = (~keyword + Word(alphas, alphanums + "_")).setParseAction( + pyparsing_common.downcaseTokens +) | quoted_identifier collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() @@ -149,17 +42,20 @@ function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() +comment = "--" + restOfLine + # expression expr = Forward().setName("expression") -integer = Regex(r"[+-]?\d+") -numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?") -string_literal = QuotedString("'") +numeric_literal = pyparsing_common.number +string_literal = QuotedString("'", escQuote="''") blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'") literal_value = ( numeric_literal | string_literal | blob_literal + | TRUE + | FALSE | NULL | CURRENT_TIME | CURRENT_DATE @@ -184,25 +80,44 @@ expr_term = ( | Group(identifier("col")) ) +NOT_NULL = Group(NOT + NULL) +NOT_BETWEEN = Group(NOT + BETWEEN) +NOT_IN = Group(NOT + IN) +NOT_LIKE = Group(NOT + LIKE) +NOT_MATCH = Group(NOT + MATCH) +NOT_GLOB = Group(NOT + GLOB) +NOT_REGEXP = Group(NOT + REGEXP) + UNARY, BINARY, TERNARY = 1, 2, 3 expr << infixNotation( expr_term, [ (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT), - (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT), + (ISNULL | NOTNULL | NOT_NULL, UNARY, opAssoc.LEFT), ("||", BINARY, opAssoc.LEFT), (oneOf("* / %"), BINARY, opAssoc.LEFT), (oneOf("+ -"), BINARY, opAssoc.LEFT), (oneOf("<< >> & |"), BINARY, opAssoc.LEFT), (oneOf("< <= > >="), BINARY, opAssoc.LEFT), ( - oneOf("= == != <>") | IS | IN | LIKE | GLOB | MATCH | REGEXP, + oneOf("= == != <>") + | IS + | IN + | LIKE + | GLOB + | MATCH + | REGEXP + | NOT_IN + | NOT_LIKE + | NOT_GLOB + | NOT_MATCH + | NOT_REGEXP, BINARY, opAssoc.LEFT, ), - ((BETWEEN, AND), TERNARY, opAssoc.LEFT), + ((BETWEEN | NOT_BETWEEN, AND), TERNARY, opAssoc.LEFT), ( - IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, + (IN | NOT_IN) + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT, ), @@ -272,24 +187,51 @@ select_stmt << ( ) ) -tests = """\ - select * from xyzzy where z > 100 - select * from xyzzy where z > 100 order by zz - select * from xyzzy - select z.* from xyzzy - select a, b from test_table where 1=1 and b='yes' - select a, b from test_table where 1=1 and b in (select bb from foo) - select z.a, b from test_table where 1=1 and b in (select bb from foo) - select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d - select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo) - select a, db.table.b as BBB from db.table where 1=1 and BBB='yes' - select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' - select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50 - select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1 - SELECT emp.ename as e FROM scott.employee as emp - SELECT ename as e, fname as f FROM scott.employee as emp - SELECT emp.eid, fname,lname FROM scott.employee as emp - SELECT ename, lname, emp.eid FROM scott.employee as emp - select emp.salary * (1.0 + emp.bonus) as salary_plus_bonus from scott.employee as emp -""" -select_stmt.runTests(tests) +select_stmt.ignore(comment) + +if __name__ == "__main__": + tests = """\ + select * from xyzzy where z > 100 + select * from xyzzy where z > 100 order by zz + select * from xyzzy + select z.* from xyzzy + select a, b from test_table where 1=1 and b='yes' + select a, b from test_table where 1=1 and b in (select bb from foo) + select z.a, b from test_table where 1=1 and b in (select bb from foo) + select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d + select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo) + select a, db.table.b as BBB from db.table where 1=1 and BBB='yes' + select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' + select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50 + select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1 + SELECT emp.ename as e FROM scott.employee as emp + SELECT ename as e, fname as f FROM scott.employee as emp + SELECT emp.eid, fname,lname FROM scott.employee as emp + SELECT ename, lname, emp.eid FROM scott.employee as emp + select emp.salary * (1.0 + emp.bonus) as salary_plus_bonus from scott.employee as emp + SELECT * FROM abcd WHERE (ST_Overlaps("GEOM", 'POINT(0 0)')) + SELECT * FROM abcd WHERE CAST(foo AS REAL) > -999.123 + SELECT * FROM abcd WHERE bar BETWEEN +180 AND +10E9 + SELECT * FROM abcd WHERE CAST(foo AS REAL) < (4 + -9.876E-4) + SELECT SomeFunc(99) + SELECT * FROM abcd WHERE ST_X(ST_Centroid(geom)) BETWEEN (-180*2) AND (180*2) + SELECT * FROM abcd WHERE a + SELECT * FROM abcd WHERE snowy_things REGEXP '[⛄️☃️☃🎿🏂🌨❄️⛷🏔🗻❄︎❆❅]' + SELECT * FROM abcd WHERE a."b" IN 4 + SELECT * FROM abcd WHERE a."b" In ('4') + SELECT * FROM "a".b AS "E" WHERE "E"."C" >= CURRENT_Time + SELECT * FROM abcd WHERE "dave" != "Dave" -- names & things ☃️ + SELECT * FROM a WHERE a.dave is not null + SELECT * FROM abcd WHERE pete == FALSE or peter is true + SELECT * FROM abcd WHERE a >= 10 * (2 + 3) + SELECT * FROM abcd WHERE frank = 'is ''scary''' + SELECT * FROM abcd WHERE "identifier with ""quotes"" and a trailing space " IS NOT FALSE + SELECT * FROM abcd WHERE blobby == x'C0FFEE' -- hex + SELECT * FROM abcd WHERE ff NOT IN (1,2,4,5) + SELECT * FROM abcd WHERE ff not between 3 and 9 + SELECT * FROM abcd WHERE ff not like 'bob%' + """ + + success, _ = select_stmt.runTests(tests) + print("\n{}".format("OK" if success else "FAIL")) + sys.exit(0 if success else 1) |