summaryrefslogtreecommitdiff
path: root/examples/select_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/select_parser.py')
-rw-r--r--examples/select_parser.py308
1 files changed, 226 insertions, 82 deletions
diff --git a/examples/select_parser.py b/examples/select_parser.py
index 7f9273c..723f8b2 100644
--- a/examples/select_parser.py
+++ b/examples/select_parser.py
@@ -5,29 +5,140 @@
# definition at https://www.sqlite.org/lang_select.html
#
from pyparsing import *
+
ParserElement.enablePackrat()
-LPAR,RPAR,COMMA = map(Suppress,"(),")
-DOT,STAR = map(Literal, ".*")
+LPAR, RPAR, COMMA = map(Suppress, "(),")
+DOT, STAR = map(Literal, ".*")
select_stmt = Forward().setName("select statement")
# keywords
-(UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
- CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
- HAVING, ORDER, BY, LIMIT, OFFSET, OR) = map(CaselessKeyword, """UNION, ALL, AND, INTERSECT,
+(
+ UNION,
+ ALL,
+ AND,
+ INTERSECT,
+ EXCEPT,
+ COLLATE,
+ ASC,
+ DESC,
+ ON,
+ USING,
+ NATURAL,
+ INNER,
+ CROSS,
+ LEFT,
+ OUTER,
+ JOIN,
+ AS,
+ INDEXED,
+ NOT,
+ SELECT,
+ DISTINCT,
+ FROM,
+ WHERE,
+ GROUP,
+ BY,
+ HAVING,
+ ORDER,
+ BY,
+ LIMIT,
+ OFFSET,
+ OR,
+) = map(
+ CaselessKeyword,
+ """UNION, ALL, AND, INTERSECT,
EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT,
- DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace(",","").split())
-(CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB, REGEXP,
- MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP) = map(CaselessKeyword,
- """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB,
- REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",","").split())
-keyword = MatchFirst((UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
- CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
- HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END,
- CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE,
- CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP))
-
-identifier = ~keyword + Word(alphas, alphanums+"_")
+ DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace(
+ ",", ""
+ ).split(),
+)
+(
+ CAST,
+ ISNULL,
+ NOTNULL,
+ NULL,
+ IS,
+ BETWEEN,
+ ELSE,
+ END,
+ CASE,
+ WHEN,
+ THEN,
+ EXISTS,
+ IN,
+ LIKE,
+ GLOB,
+ REGEXP,
+ MATCH,
+ ESCAPE,
+ CURRENT_TIME,
+ CURRENT_DATE,
+ CURRENT_TIMESTAMP,
+) = map(
+ CaselessKeyword,
+ """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB,
+ REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(
+ ",", ""
+ ).split(),
+)
+keyword = MatchFirst(
+ (
+ UNION,
+ ALL,
+ INTERSECT,
+ EXCEPT,
+ COLLATE,
+ ASC,
+ DESC,
+ ON,
+ USING,
+ NATURAL,
+ INNER,
+ CROSS,
+ LEFT,
+ OUTER,
+ JOIN,
+ AS,
+ INDEXED,
+ NOT,
+ SELECT,
+ DISTINCT,
+ FROM,
+ WHERE,
+ GROUP,
+ BY,
+ HAVING,
+ ORDER,
+ BY,
+ LIMIT,
+ OFFSET,
+ CAST,
+ ISNULL,
+ NOTNULL,
+ NULL,
+ IS,
+ BETWEEN,
+ ELSE,
+ END,
+ CASE,
+ WHEN,
+ THEN,
+ EXISTS,
+ COLLATE,
+ IN,
+ LIKE,
+ GLOB,
+ REGEXP,
+ MATCH,
+ ESCAPE,
+ CURRENT_TIME,
+ CURRENT_DATE,
+ CURRENT_TIMESTAMP,
+ )
+)
+
+identifier = ~keyword + Word(alphas, alphanums + "_")
collation_name = identifier.copy()
column_name = identifier.copy()
column_alias = identifier.copy()
@@ -46,87 +157,120 @@ numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
string_literal = QuotedString("'")
blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
literal_value = (
- numeric_literal
- | string_literal
- | blob_literal
- | NULL
- | CURRENT_TIME
- | CURRENT_DATE
- | CURRENT_TIMESTAMP
- )
-bind_parameter = (
- Word("?",nums)
- | Combine(oneOf(": @ $") + parameter_name)
- )
+ numeric_literal
+ | string_literal
+ | blob_literal
+ | NULL
+ | CURRENT_TIME
+ | CURRENT_DATE
+ | CURRENT_TIMESTAMP
+)
+bind_parameter = Word("?", nums) | Combine(oneOf(": @ $") + parameter_name)
type_name = oneOf("TEXT REAL INTEGER BLOB NULL")
expr_term = (
CAST + LPAR + expr + AS + type_name + RPAR
| EXISTS + LPAR + select_stmt + RPAR
- | function_name.setName("function_name") + LPAR + Optional(STAR | delimitedList(expr)) + RPAR
+ | function_name.setName("function_name")
+ + LPAR
+ + Optional(STAR | delimitedList(expr))
+ + RPAR
| literal_value
| bind_parameter
- | Group(identifier('col_db') + DOT + identifier('col_tab') + DOT + identifier('col'))
- | Group(identifier('col_tab') + DOT + identifier('col'))
- | Group(identifier('col'))
+ | Group(
+ identifier("col_db") + DOT + identifier("col_tab") + DOT + identifier("col")
)
+ | Group(identifier("col_tab") + DOT + identifier("col"))
+ | Group(identifier("col"))
+)
-UNARY,BINARY,TERNARY=1,2,3
-expr << infixNotation(expr_term,
+UNARY, BINARY, TERNARY = 1, 2, 3
+expr << infixNotation(
+ expr_term,
[
- (oneOf('- + ~') | NOT, UNARY, opAssoc.RIGHT),
- (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT),
- ('||', BINARY, opAssoc.LEFT),
- (oneOf('* / %'), BINARY, opAssoc.LEFT),
- (oneOf('+ -'), BINARY, opAssoc.LEFT),
- (oneOf('<< >> & |'), BINARY, opAssoc.LEFT),
- (oneOf('< <= > >='), BINARY, opAssoc.LEFT),
- (oneOf('= == != <>') | IS | IN | LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT),
- ((BETWEEN,AND), TERNARY, opAssoc.LEFT),
- (IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT),
- (AND, BINARY, opAssoc.LEFT),
- (OR, BINARY, opAssoc.LEFT),
- ])
-
-compound_operator = (UNION + Optional(ALL) | INTERSECT | EXCEPT)
-
-ordering_term = Group(expr('order_key')
- + Optional(COLLATE + collation_name('collate'))
- + Optional(ASC | DESC)('direction'))
-
-join_constraint = Group(Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR))
-
-join_op = COMMA | Group(Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN)
+ (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT),
+ (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT),
+ ("||", BINARY, opAssoc.LEFT),
+ (oneOf("* / %"), BINARY, opAssoc.LEFT),
+ (oneOf("+ -"), BINARY, opAssoc.LEFT),
+ (oneOf("<< >> & |"), BINARY, opAssoc.LEFT),
+ (oneOf("< <= > >="), BINARY, opAssoc.LEFT),
+ (
+ oneOf("= == != <>") | IS | IN | LIKE | GLOB | MATCH | REGEXP,
+ BINARY,
+ opAssoc.LEFT,
+ ),
+ ((BETWEEN, AND), TERNARY, opAssoc.LEFT),
+ (
+ IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR,
+ UNARY,
+ opAssoc.LEFT,
+ ),
+ (AND, BINARY, opAssoc.LEFT),
+ (OR, BINARY, opAssoc.LEFT),
+ ],
+)
+
+compound_operator = UNION + Optional(ALL) | INTERSECT | EXCEPT
+
+ordering_term = Group(
+ expr("order_key")
+ + Optional(COLLATE + collation_name("collate"))
+ + Optional(ASC | DESC)("direction")
+)
+
+join_constraint = Group(
+ Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR)
+)
+
+join_op = COMMA | Group(
+ Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN
+)
join_source = Forward()
single_source = (
- Group(database_name("database") + DOT + table_name("table*")
- | table_name("table*"))
- + Optional(Optional(AS) + table_alias("table_alias*"))
- + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index")
- | (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias))
- | (LPAR + join_source + RPAR)
- )
+ Group(database_name("database") + DOT + table_name("table*") | table_name("table*"))
+ + Optional(Optional(AS) + table_alias("table_alias*"))
+ + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index")
+ | (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias))
+ | (LPAR + join_source + RPAR)
+)
-join_source <<= (Group(single_source + OneOrMore(join_op + single_source + join_constraint))
- | single_source)
+join_source <<= (
+ Group(single_source + OneOrMore(join_op + single_source + join_constraint))
+ | single_source
+)
# result_column = "*" | table_name + "." + "*" | Group(expr + Optional(Optional(AS) + column_alias))
-result_column = Group(STAR('col')
- | table_name('col_table') + DOT + STAR('col')
- | expr('col') + Optional(Optional(AS) + column_alias('alias'))
- )
-
-select_core = (SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns")
- + Optional(FROM + join_source("from*"))
- + Optional(WHERE + expr("where_expr"))
- + Optional(GROUP + BY + Group(delimitedList(ordering_term))("group_by_terms")
- + Optional(HAVING + expr("having_expr"))))
-
-select_stmt << (select_core + ZeroOrMore(compound_operator + select_core)
- + Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms"))
- + Optional(LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit"))
- )
+result_column = Group(
+ STAR("col")
+ | table_name("col_table") + DOT + STAR("col")
+ | expr("col") + Optional(Optional(AS) + column_alias("alias"))
+)
+
+select_core = (
+ SELECT
+ + Optional(DISTINCT | ALL)
+ + Group(delimitedList(result_column))("columns")
+ + Optional(FROM + join_source("from*"))
+ + Optional(WHERE + expr("where_expr"))
+ + Optional(
+ GROUP
+ + BY
+ + Group(delimitedList(ordering_term))("group_by_terms")
+ + Optional(HAVING + expr("having_expr"))
+ )
+)
+
+select_stmt << (
+ select_core
+ + ZeroOrMore(compound_operator + select_core)
+ + Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms"))
+ + Optional(
+ LIMIT
+ + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit")
+ )
+)
tests = """\
select * from xyzzy where z > 100