1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
# select_parser.py
# Copyright 2010,2019 Paul McGuire
#
# a simple SELECT statement parser, taken from SQLite's SELECT statement
# definition at https://www.sqlite.org/lang_select.html
#
from pyparsing import *
ParserElement.enablePackrat()
LPAR,RPAR,COMMA = map(Suppress,"(),")
DOT,STAR = map(Literal, ".*")
select_stmt = Forward().setName("select statement")
# keywords
(UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
HAVING, ORDER, BY, LIMIT, OFFSET, OR) = map(CaselessKeyword, """UNION, ALL, AND, INTERSECT,
EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT,
DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace(",","").split())
(CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB, REGEXP,
MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP) = map(CaselessKeyword,
"""CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB,
REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",","").split())
keyword = MatchFirst((UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END,
CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE,
CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP))
identifier = ~keyword + Word(alphas, alphanums+"_")
collation_name = identifier.copy()
column_name = identifier.copy()
column_alias = identifier.copy()
table_name = identifier.copy()
table_alias = identifier.copy()
index_name = identifier.copy()
function_name = identifier.copy()
parameter_name = identifier.copy()
database_name = identifier.copy()
# expression
expr = Forward().setName("expression")
integer = Regex(r"[+-]?\d+")
numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
string_literal = QuotedString("'")
blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
literal_value = (
numeric_literal
| string_literal
| blob_literal
| NULL
| CURRENT_TIME
| CURRENT_DATE
| CURRENT_TIMESTAMP
)
bind_parameter = (
Word("?",nums)
| Combine(oneOf(": @ $") + parameter_name)
)
type_name = oneOf("TEXT REAL INTEGER BLOB NULL")
expr_term = (
CAST + LPAR + expr + AS + type_name + RPAR
| EXISTS + LPAR + select_stmt + RPAR
| function_name.setName("function_name") + LPAR + Optional(STAR | delimitedList(expr)) + RPAR
| literal_value
| bind_parameter
| Group(identifier('col_db') + DOT + identifier('col_tab') + DOT + identifier('col'))
| Group(identifier('col_tab') + DOT + identifier('col'))
| Group(identifier('col'))
)
UNARY,BINARY,TERNARY=1,2,3
expr << infixNotation(expr_term,
[
(oneOf('- + ~') | NOT, UNARY, opAssoc.RIGHT),
(ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT),
('||', BINARY, opAssoc.LEFT),
(oneOf('* / %'), BINARY, opAssoc.LEFT),
(oneOf('+ -'), BINARY, opAssoc.LEFT),
(oneOf('<< >> & |'), BINARY, opAssoc.LEFT),
(oneOf('< <= > >='), BINARY, opAssoc.LEFT),
(oneOf('= == != <>') | IS | IN | LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT),
((BETWEEN,AND), TERNARY, opAssoc.LEFT),
(IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT),
(AND, BINARY, opAssoc.LEFT),
(OR, BINARY, opAssoc.LEFT),
])
compound_operator = (UNION + Optional(ALL) | INTERSECT | EXCEPT)
ordering_term = Group(expr('order_key')
+ Optional(COLLATE + collation_name('collate'))
+ Optional(ASC | DESC)('direction'))
join_constraint = Group(Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR))
join_op = COMMA | Group(Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN)
join_source = Forward()
single_source = (
Group(database_name("database") + DOT + table_name("table*")
| table_name("table*"))
+ Optional(Optional(AS) + table_alias("table_alias*"))
+ Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index")
| (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias))
| (LPAR + join_source + RPAR)
)
join_source <<= (Group(single_source + OneOrMore(join_op + single_source + join_constraint))
| single_source)
# result_column = "*" | table_name + "." + "*" | Group(expr + Optional(Optional(AS) + column_alias))
result_column = Group(STAR('col')
| table_name('col_table') + DOT + STAR('col')
| expr('col') + Optional(Optional(AS) + column_alias('alias'))
)
select_core = (SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns")
+ Optional(FROM + join_source("from*"))
+ Optional(WHERE + expr("where_expr"))
+ Optional(GROUP + BY + Group(delimitedList(ordering_term))("group_by_terms")
+ Optional(HAVING + expr("having_expr"))))
select_stmt << (select_core + ZeroOrMore(compound_operator + select_core)
+ Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms"))
+ Optional(LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit"))
)
tests = """\
select * from xyzzy where z > 100
select * from xyzzy where z > 100 order by zz
select * from xyzzy
select z.* from xyzzy
select a, b from test_table where 1=1 and b='yes'
select a, b from test_table where 1=1 and b in (select bb from foo)
select z.a, b from test_table where 1=1 and b in (select bb from foo)
select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d
select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo)
select a, db.table.b as BBB from db.table where 1=1 and BBB='yes'
select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes'
select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50
select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1
SELECT emp.ename as e FROM scott.employee as emp
SELECT ename as e, fname as f FROM scott.employee as emp
SELECT emp.eid, fname,lname FROM scott.employee as emp
SELECT ename, lname, emp.eid FROM scott.employee as emp
select emp.salary * (1.0 + emp.bonus) as salary_plus_bonus from scott.employee as emp
"""
select_stmt.runTests(tests)
|