summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/extract_table_names.py53
1 files changed, 53 insertions, 0 deletions
diff --git a/examples/extract_table_names.py b/examples/extract_table_names.py
new file mode 100644
index 0000000..b153850
--- /dev/null
+++ b/examples/extract_table_names.py
@@ -0,0 +1,53 @@
+# This example illustrates how to extract table names from nested
+# SELECT statements.
+
+# See:
+# http://groups.google.com/group/sqlparse/browse_thread/thread/b0bd9a022e9d4895
+
+sql = """
+select K.a from (select H.b from (select G.c from (select F.d from
+(select E.e from A, B, C, D, E), F), G), H), I, J, K;
+"""
+
+import sqlparse
+from sqlparse.sql import IdentifierList, Identifier
+from sqlparse.tokens import Keyword, DML
+
+def is_subselect(parsed):
+ if not parsed.is_group():
+ return False
+ for item in parsed.tokens:
+ if item.ttype is DML and item.value.upper() == 'SELECT':
+ return True
+ return False
+
+def extract_from_part(parsed):
+ from_seen = False
+ for item in parsed.tokens:
+ if from_seen:
+ if is_subselect(item):
+ for x in extract_from_part(item):
+ yield x
+ else:
+ yield item
+ elif item.ttype is Keyword and item.value.upper() == 'FROM':
+ from_seen = True
+
+def extract_table_identifiers(token_stream):
+ for item in token_stream:
+ if isinstance(item, IdentifierList):
+ for identifier in item.get_identifiers():
+ yield identifier.get_name()
+ elif isinstance(item, Identifier):
+ yield item.get_name()
+ # It's a bug to check for Keyword here, but in the example
+ # above some tables names are identified as keywords...
+ elif item.ttype is Keyword:
+ yield item.value
+
+def extract_tables():
+ stream = extract_from_part(sqlparse.parse(sql)[0])
+ return list(extract_table_identifiers(stream))
+
+if __name__ == '__main__':
+ print 'Tables: %s' % ', '.join(extract_tables())