From 361122eb22d5681c58dac731009e4814b3dd5fa5 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 3 Apr 2009 21:26:42 +0200 Subject: Initial import. --- AUTHORS | 5 + CHANGES | 3 + COPYING | 25 + MANIFEST.in | 3 + Makefile | 18 + README | 45 ++ TODO | 1 + bin/sqlformat | 103 ++++ docs/Makefile | 88 +++ docs/source/api.rst | 12 + docs/source/changes.rst | 7 + docs/source/cmdline.rst | 8 + docs/source/conf.py | 197 +++++++ docs/source/index.rst | 26 + docs/source/intro.rst | 89 ++++ docs/tango/static/bgfooter.png | Bin 0 -> 434 bytes docs/tango/static/bgtop.png | Bin 0 -> 430 bytes docs/tango/static/default.css_t | 90 ++++ docs/tango/theme.conf | 28 + extras/appengine/Makefile | 51 ++ extras/appengine/README | 22 + extras/appengine/__init__.py | 0 extras/appengine/app.yaml | 27 + extras/appengine/examples/customers.sql | 1 + extras/appengine/examples/multiple_inserts.sql | 1 + extras/appengine/examples/pg_view.sql | 1 + extras/appengine/examples/subquery.sql | 1 + extras/appengine/examples/subquery2.sql | 1 + extras/appengine/index.yaml | 0 extras/appengine/main.py | 131 +++++ extras/appengine/make_release.sh | 49 ++ extras/appengine/settings.py | 37 ++ extras/appengine/sqlformat/__init__.py | 0 extras/appengine/sqlformat/urls.py | 11 + extras/appengine/sqlformat/views.py | 204 +++++++ extras/appengine/static/bg_options.png | Bin 0 -> 202 bytes extras/appengine/static/bgfieldset.png | Bin 0 -> 227 bytes extras/appengine/static/bgfooter.png | Bin 0 -> 434 bytes extras/appengine/static/bgtop.png | Bin 0 -> 430 bytes extras/appengine/static/blank.gif | Bin 0 -> 64 bytes extras/appengine/static/canvas.html | 114 ++++ extras/appengine/static/hotkeys.js | 1 + extras/appengine/static/img_loading.gif | Bin 0 -> 1348 bytes .../static/jquery.textarearesizer.compressed.js | 1 + extras/appengine/static/loading.gif | Bin 0 -> 4331 bytes extras/appengine/static/lynx_screenshot.png | Bin 0 -> 66017 bytes extras/appengine/static/pygments.css | 59 +++ extras/appengine/static/resize-grip.png | Bin 0 -> 167 bytes extras/appengine/static/robots.txt | 8 + extras/appengine/static/rpc_relay.html | 1 + extras/appengine/static/script.js | 103 ++++ .../appengine/static/sqlformat_client_example.py | 17 + extras/appengine/static/styles.css | 245 +++++++++ extras/appengine/templates/about.html | 44 ++ extras/appengine/templates/api.html | 50 ++ extras/appengine/templates/index.html | 107 ++++ extras/appengine/templates/master.html | 103 ++++ .../appengine/templates/python-client-example.html | 17 + extras/appengine/templates/source.html | 56 ++ extras/sqlformat.png | Bin 0 -> 5359 bytes extras/sqlformat.svg | 115 ++++ setup.py | 28 + sqlparse/__init__.py | 65 +++ sqlparse/dialects.py | 88 +++ sqlparse/engine/__init__.py | 81 +++ sqlparse/engine/_grouping.py | 499 +++++++++++++++++ sqlparse/engine/filter.py | 98 ++++ sqlparse/engine/grouping.py | 537 +++++++++++++++++++ sqlparse/filters.py | 432 +++++++++++++++ sqlparse/formatter.py | 163 ++++++ sqlparse/keywords.py | 589 +++++++++++++++++++++ sqlparse/lexer.py | 310 +++++++++++ sqlparse/tokens.py | 131 +++++ tests/__init__.py | 0 tests/files/begintag.sql | 4 + tests/files/dashcomment.sql | 5 + tests/files/function.sql | 13 + tests/files/function_psql.sql | 72 +++ tests/files/function_psql2.sql | 7 + tests/files/function_psql3.sql | 8 + tests/run_tests.py | 31 ++ tests/test_format.py | 146 +++++ tests/test_grouping.py | 86 +++ tests/test_parse.py | 39 ++ tests/test_split.py | 88 +++ tests/test_tokenize.py | 21 + tests/utils.py | 38 ++ 87 files changed, 5905 insertions(+) create mode 100644 AUTHORS create mode 100644 CHANGES create mode 100644 COPYING create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README create mode 100644 TODO create mode 100755 bin/sqlformat create mode 100644 docs/Makefile create mode 100644 docs/source/api.rst create mode 100644 docs/source/changes.rst create mode 100644 docs/source/cmdline.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/intro.rst create mode 100644 docs/tango/static/bgfooter.png create mode 100644 docs/tango/static/bgtop.png create mode 100644 docs/tango/static/default.css_t create mode 100644 docs/tango/theme.conf create mode 100644 extras/appengine/Makefile create mode 100644 extras/appengine/README create mode 100644 extras/appengine/__init__.py create mode 100644 extras/appengine/app.yaml create mode 100644 extras/appengine/examples/customers.sql create mode 100644 extras/appengine/examples/multiple_inserts.sql create mode 100644 extras/appengine/examples/pg_view.sql create mode 100644 extras/appengine/examples/subquery.sql create mode 100644 extras/appengine/examples/subquery2.sql create mode 100644 extras/appengine/index.yaml create mode 100644 extras/appengine/main.py create mode 100755 extras/appengine/make_release.sh create mode 100644 extras/appengine/settings.py create mode 100644 extras/appengine/sqlformat/__init__.py create mode 100644 extras/appengine/sqlformat/urls.py create mode 100644 extras/appengine/sqlformat/views.py create mode 100644 extras/appengine/static/bg_options.png create mode 100644 extras/appengine/static/bgfieldset.png create mode 100644 extras/appengine/static/bgfooter.png create mode 100644 extras/appengine/static/bgtop.png create mode 100644 extras/appengine/static/blank.gif create mode 100644 extras/appengine/static/canvas.html create mode 100644 extras/appengine/static/hotkeys.js create mode 100644 extras/appengine/static/img_loading.gif create mode 100644 extras/appengine/static/jquery.textarearesizer.compressed.js create mode 100644 extras/appengine/static/loading.gif create mode 100644 extras/appengine/static/lynx_screenshot.png create mode 100644 extras/appengine/static/pygments.css create mode 100644 extras/appengine/static/resize-grip.png create mode 100644 extras/appengine/static/robots.txt create mode 100644 extras/appengine/static/rpc_relay.html create mode 100644 extras/appengine/static/script.js create mode 100644 extras/appengine/static/sqlformat_client_example.py create mode 100644 extras/appengine/static/styles.css create mode 100644 extras/appengine/templates/about.html create mode 100644 extras/appengine/templates/api.html create mode 100644 extras/appengine/templates/index.html create mode 100644 extras/appengine/templates/master.html create mode 100644 extras/appengine/templates/python-client-example.html create mode 100644 extras/appengine/templates/source.html create mode 100644 extras/sqlformat.png create mode 100644 extras/sqlformat.svg create mode 100755 setup.py create mode 100644 sqlparse/__init__.py create mode 100644 sqlparse/dialects.py create mode 100644 sqlparse/engine/__init__.py create mode 100644 sqlparse/engine/_grouping.py create mode 100644 sqlparse/engine/filter.py create mode 100644 sqlparse/engine/grouping.py create mode 100644 sqlparse/filters.py create mode 100644 sqlparse/formatter.py create mode 100644 sqlparse/keywords.py create mode 100644 sqlparse/lexer.py create mode 100644 sqlparse/tokens.py create mode 100644 tests/__init__.py create mode 100644 tests/files/begintag.sql create mode 100644 tests/files/dashcomment.sql create mode 100644 tests/files/function.sql create mode 100644 tests/files/function_psql.sql create mode 100644 tests/files/function_psql2.sql create mode 100644 tests/files/function_psql3.sql create mode 100755 tests/run_tests.py create mode 100644 tests/test_format.py create mode 100644 tests/test_grouping.py create mode 100644 tests/test_parse.py create mode 100644 tests/test_split.py create mode 100644 tests/test_tokenize.py create mode 100644 tests/utils.py diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..4aec012 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,5 @@ +python-sqlparse is written and maintained by Andi Albrecht . + +This module contains code (namely the lexer and filter mechanism) from +the pygments project that was written by Georg Brandl. + diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..4dfa2e5 --- /dev/null +++ b/CHANGES @@ -0,0 +1,3 @@ +Release 0.1.0 (In Development) +------------------------------ + * Initial. \ No newline at end of file diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..0334d9f --- /dev/null +++ b/COPYING @@ -0,0 +1,25 @@ +Copyright (c) 2008, Andi Albrecht +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the authors nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..60ab652 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include COPYING +include test.py +recursive-include docs *.rst \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..868043f --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +# Makefile to simplify some common development tasks. +# Run 'make help' for a list of commands. + +PYTHON=`which python` + +default: help + +help: + @echo "Available commands:" + @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' >sys.stderr, '[ERROR] %s' % msg + if exit_ is not None: + sys.exit(exit_) + + +def _build_formatter_opts(options): + """Convert command line options to dictionary.""" + d = {} + for option in _FORMATTING_GROUP.option_list: + d[option.dest] = getattr(options, option.dest) + return d + + +def main(): + options, args = parser.parse_args() + if options.verbose: + print >>sys.stderr, 'Verbose mode' + + if len(args) != 1: + _error('No input data.') + parser.print_usage() + sys.exit(1) + + if '-' in args: # read from stdin + data = sys.stdin.read() + else: + try: + data = '\n'.join(open(args[0]).readlines()) + except OSError, err: + _error('Failed to read %s: %s' % (args[0], err), exit_=1) + + if options.outfile: + try: + stream = open(options.outfile, 'w') + except OSError, err: + _error('Failed to open %s: %s' % (options.outfile, err), exit_=1) + else: + stream = sys.stdout + + formatter_opts = _build_formatter_opts(options) + try: + formatter_opts = sqlparse.formatter.validate_options(formatter_opts) + except sqlparse.SQLParseError, err: + _error('Invalid options: %s' % err, exit_=1) + + stream.write(sqlparse.format(data, **formatter_opts).encode('utf-8', + 'replace')) + stream.flush() + + +if __name__ == '__main__': + main() diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..cb23107 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,88 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf build/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html + @echo + @echo "Build finished. The HTML pages are in build/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) build/dirhtml + @echo + @echo "Build finished. The HTML pages are in build/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in build/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) build/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in build/qthelp, like this:" + @echo "# qcollectiongenerator build/qthelp/python-sqlparse.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile build/qthelp/python-sqlparse.qhc" + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex + @echo + @echo "Build finished; the LaTeX files are in build/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes + @echo + @echo "The overview file is in build/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in build/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) build/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in build/doctest/output.txt." diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..3bce389 --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,12 @@ +:mod:`sqlparse` -- Parse SQL statements +======================================= + +The :mod:`sqlparse` module provides the following functions on module-level. + +.. autofunction:: sqlparse.split + +.. autofunction:: sqlparse.format + +.. autofunction:: sqlparse.parse + + diff --git a/docs/source/changes.rst b/docs/source/changes.rst new file mode 100644 index 0000000..5811c57 --- /dev/null +++ b/docs/source/changes.rst @@ -0,0 +1,7 @@ +.. _changes: + +Changes in python-sqlparse +========================== + +.. include:: ../../CHANGES + diff --git a/docs/source/cmdline.rst b/docs/source/cmdline.rst new file mode 100644 index 0000000..93a35a7 --- /dev/null +++ b/docs/source/cmdline.rst @@ -0,0 +1,8 @@ +``sqlformat`` -- Command Line Script +=========================================== + +The :mod:`sqlparse` module is shipped with the script +:program:`sqlformat` which provides a command line interface to the formatting +functions. + +.. todo:: Describe options and example usage. diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..b84245a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# +# python-sqlparse documentation build configuration file, created by +# sphinx-quickstart on Thu Feb 26 08:19:28 2009. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.append(os.path.abspath('.')) +sys.path.append(os.path.abspath('../../')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', + 'sphinx.ext.autosummary'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'python-sqlparse' +copyright = u'2009, Andi Albrecht' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.1.0' +# The full version, including alpha/beta/rc tags. +release = '0.1.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'sphinxdoc' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = [os.path.abspath('../')] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'python-sqlparsedoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'python-sqlparse.tex', ur'python-sqlparse Documentation', + ur'Andi Albrecht', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True +todo_include_todos = True diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..40e99e0 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,26 @@ +.. python-sqlparse documentation master file, created by + sphinx-quickstart on Thu Feb 26 08:19:28 2009. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to python-sqlparse's documentation! +=========================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + api + cmdline + changes + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/source/intro.rst b/docs/source/intro.rst new file mode 100644 index 0000000..3d7a888 --- /dev/null +++ b/docs/source/intro.rst @@ -0,0 +1,89 @@ +Introduction +============ + +:mod:`sqlparse` is a non-validating SQL parser for Python. + +It provides support for parsing, splitting and formatting SQL statements. + +:mod:`sqlparse` is released under the terms of the +`New BSD license `_. + +Visit http://sqlformat.appspot.com to try it's formatting features. + + +Download & Installation +----------------------- + +To download and install :mod:`sqlparse` on your system run the following +commands: + +.. code-block:: bash + + $ git clone git://github.com/andialbrecht/python-sqlparse.git + $ cd python-sqlparse.git/ + $ sudo python setup.py install + +A tarball of the current sources is available under the following URL: +http://github.com/andialbrecht/python-sqlparse/tarball/master + + +Example Usage +------------- + +Here are some usage examples of this module. + +Splitting statements:: + + >>> import sqlparse + >>> sql = 'select * from foo; select * from bar;' + >>> sqlparse.split(sql) + <<< [u'select * from foo; ', u'select * from bar;'] + +Formatting statemtents:: + + >>> sql = 'select * from foo where id in (select id from bar);' + >>> print sqlparse.format(sql, reindent=True, keyword_case='upper') + SELECT * + FROM foo + WHERE id IN + (SELECT id + FROM bar); + +Now, let's have a deeper look at the internals:: + + >>> sql = 'select * from "someschema"."mytable" where id = 1' + >>> pared = sqlparse.parse(sql) + >>> pared + <<< (,) + >>> stmt = parsed[0] + >>> stmt.to_unicode() # converting it back to unicode + <<< u'select * from "someschema"."mytable" where id = 1' + >>> # This is how the internal representation looks like: + >>> stmt.tokens + <<< + (, + , + , + , + , + , + , + , + ) + >>> + + +.. todo:: Describe general concepts + Why non-validating? Processing stages (tokens, groups,...), filter, + + +Development & Contributing +-------------------------- + +Please file bug reports and feature requests on the project site at +http://code.google.com/p/python-sqlparse/issues/entry or if you have +code to contribute upload it to http://codereview.appspot.com and +add albrecht.andi@googlemail.com as reviewer. + +For more information about the review tool and how to use it visit +it's project page: http://code.google.com/p/rietveld. diff --git a/docs/tango/static/bgfooter.png b/docs/tango/static/bgfooter.png new file mode 100644 index 0000000..9ce5bdd Binary files /dev/null and b/docs/tango/static/bgfooter.png differ diff --git a/docs/tango/static/bgtop.png b/docs/tango/static/bgtop.png new file mode 100644 index 0000000..a0d4709 Binary files /dev/null and b/docs/tango/static/bgtop.png differ diff --git a/docs/tango/static/default.css_t b/docs/tango/static/default.css_t new file mode 100644 index 0000000..c86759a --- /dev/null +++ b/docs/tango/static/default.css_t @@ -0,0 +1,90 @@ +/** + * Sphinx stylesheet -- default theme + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +@import url("basic.css"); + +body { + color: #000000; + font-family: "Free Sans", Arial, Verdana, sans; + font-size: 11pt; + width: 70em; + margin: 0; + margin-left: auto; + margin-right: auto; +} + + +div.related { + background: #eeeeec; /* #d3d7cf; */ + color: white; + padding-bottom: .6em; + padding-top: .6em; +} + +div.related a { + color: #2e3436; + text-decoration: none; +} + +div.document { + background: #eeeeec; +} + +div.body { + background: white; + padding: .5em; + -moz-border-radius: 0px 10px 10px 10px; + border-right: 1px solid #eeeeec; +} + +div.document h1, h2, h3, h4 { + color: #204a87; + font-weight: normal; + letter-spacing: .05em; +} + +div.document p, dl, div.highlight { + margin-left: 25px; +} + +div.document dd p, dd { + margin-left: .5em; +} + +div.document a { + color: #f57900; +} + +div.sphinxsidebarwrapper { + background: url(bgfooter.png) top left repeat-x #eeeeec; + padding-right: .5em; +} + +div.sphinxsidebar ul { + margin: 0; + padding: 0; +} + +div.sphinxsidebar a { + color: #888a85; + text-decoration: none; +} + +div.footer { + font-size: .8em; + color: #888a85; + text-align: right; + padding: 10px; +} + +div.footer a { + color: #888a85; +} + +div.highlight pre { + background-color: #eeeeec; + border: 1px solid #babdb6; + padding: 7px; +} \ No newline at end of file diff --git a/docs/tango/theme.conf b/docs/tango/theme.conf new file mode 100644 index 0000000..812330f --- /dev/null +++ b/docs/tango/theme.conf @@ -0,0 +1,28 @@ +[theme] +inherit = basic +stylesheet = default.css +pygments_style = sphinx + +[options] +rightsidebar = false +stickysidebar = false + +footerbgcolor = #11303d +footertextcolor = #ffffff +sidebarbgcolor = #1c4e63 +sidebartextcolor = #ffffff +sidebarlinkcolor = #98dbcc +relbarbgcolor = #133f52 +relbartextcolor = #ffffff +relbarlinkcolor = #ffffff +bgcolor = #ffffff +textcolor = #000000 +headbgcolor = #f2f2f2 +headtextcolor = #20435c +headlinkcolor = #c60f0f +linkcolor = #355f7c +codebgcolor = #eeffcc +codetextcolor = #333333 + +bodyfont = sans-serif +headfont = 'Trebuchet MS', sans-serif diff --git a/extras/appengine/Makefile b/extras/appengine/Makefile new file mode 100644 index 0000000..28380cf --- /dev/null +++ b/extras/appengine/Makefile @@ -0,0 +1,51 @@ +# Makefile to simplify some common AppEngine actions. +# Use 'make help' for a list of commands. + +PYTHON=`which python2.5` +DEV_APPSERVER=$(PYTHON) `which dev_appserver.py` +APPCFG=$(PYTHON) `which appcfg.py` +PORT=8080 + + +default: help + +help: + @echo "Available commands:" + @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' dt.refobjid)) AND (dt.classid = ('pg_rewrite'::regclass)::oid)) AND (dt.refclassid = ('pg_class'::regclass)::oid)) AND (dt.refobjid = t.oid)) AND (t.relnamespace = nt.oid)) AND (t.relkind = ANY (ARRAY['r'::"char", 'v'::"char"]))) AND pg_has_role(t.relowner, 'USAGE'::text)) ORDER BY (current_database())::information_schema.sql_identifier, (nv.nspname)::information_schema.sql_identifier, (v.relname)::information_schema.sql_identifier, (current_database())::information_schema.sql_identifier, (nt.nspname)::information_schema.sql_identifier, (t.relname)::information_schema.sql_identifier; diff --git a/extras/appengine/examples/subquery.sql b/extras/appengine/examples/subquery.sql new file mode 100644 index 0000000..dd4bbc1 --- /dev/null +++ b/extras/appengine/examples/subquery.sql @@ -0,0 +1 @@ +select sum(a1.Sales) from Store_Information a1 where a1.Store_name in (select store_name from Geography a2 where a2.store_name = a1.store_name); \ No newline at end of file diff --git a/extras/appengine/examples/subquery2.sql b/extras/appengine/examples/subquery2.sql new file mode 100644 index 0000000..6c00a87 --- /dev/null +++ b/extras/appengine/examples/subquery2.sql @@ -0,0 +1 @@ +select user_id, count(*) as how_many from bboard where not exists (select 1 from bboard_authorized_maintainers bam where bam.user_id = bboard.user_id) and posting_time + 60 > sysdate group by user_id order by how_many desc; \ No newline at end of file diff --git a/extras/appengine/index.yaml b/extras/appengine/index.yaml new file mode 100644 index 0000000..e69de29 diff --git a/extras/appengine/main.py b/extras/appengine/main.py new file mode 100644 index 0000000..d0a8418 --- /dev/null +++ b/extras/appengine/main.py @@ -0,0 +1,131 @@ +# Copyright 2008 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main program for Rietveld. + +This is also a template for running a Django app under Google App +Engine, especially when using a newer version of Django than provided +in the App Engine standard library. + +The site-specific code is all in other files: urls.py, models.py, +views.py, settings.py. +""" + +# Standard Python imports. +import os +import sys +import logging + +# Log a message each time this module get loaded. +logging.info('Loading %s, app version = %s', + __name__, os.getenv('CURRENT_VERSION_ID')) + +# Delete the preloaded copy of Django. +for key in [key for key in sys.modules if key.startswith('django')]: + del sys.modules[key] + +os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' + +# Force sys.path to have our own directory first, so we can import from it. +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) + +# Import Django from a zipfile. +sys.path.insert(0, os.path.abspath('django.zip')) + +# Fail early if we can't import Django. Log identifying information. +import django +logging.info('django.__file__ = %r, django.VERSION = %r', + django.__file__, django.VERSION) +assert django.VERSION[0] >= 1, "This Django version is too old" + +# AppEngine imports. +from google.appengine.ext.webapp import util + + +# Helper to enter the debugger. This passes in __stdin__ and +# __stdout__, because stdin and stdout are connected to the request +# and response streams. You must import this from __main__ to use it. +# (I tried to make it universally available via __builtin__, but that +# doesn't seem to work for some reason.) +def BREAKPOINT(): + import pdb + p = pdb.Pdb(None, sys.__stdin__, sys.__stdout__) + p.set_trace() + + +# Custom Django configuration. +from django.conf import settings +settings._target = None + +# Import various parts of Django. +import django.core.handlers.wsgi +import django.core.signals +import django.db +import django.dispatch.dispatcher +import django.forms + +# Work-around to avoid warning about django.newforms in djangoforms. +django.newforms = django.forms + + +def log_exception(*args, **kwds): + """Django signal handler to log an exception.""" + cls, err = sys.exc_info()[:2] + logging.exception('Exception in request: %s: %s', cls.__name__, err) + + +# Log all exceptions detected by Django. +django.core.signals.got_request_exception.connect(log_exception) + +# Unregister Django's default rollback event handler. +django.core.signals.got_request_exception.disconnect( + django.db._rollback_on_exception) + + +def real_main(): + """Main program.""" + # Create a Django application for WSGI. + application = django.core.handlers.wsgi.WSGIHandler() + # Run the WSGI CGI handler with that application. + util.run_wsgi_app(application) + + +def profile_main(): + """Main program for profiling.""" + import cProfile + import pstats + import StringIO + + prof = cProfile.Profile() + prof = prof.runctx('real_main()', globals(), locals()) + stream = StringIO.StringIO() + stats = pstats.Stats(prof, stream=stream) + # stats.strip_dirs() # Don't; too many modules are named __init__.py. + stats.sort_stats('time') # 'time', 'cumulative' or 'calls' + stats.print_stats() # Optional arg: how many to print + # The rest is optional. + # stats.print_callees() + # stats.print_callers() + print '\n
' + print '

Profile

' + print '
'
+  print stream.getvalue()[:1000000]
+  print '
' + +# Set this to profile_main to enable profiling. +main = real_main + + +if __name__ == '__main__': + main() diff --git a/extras/appengine/make_release.sh b/extras/appengine/make_release.sh new file mode 100755 index 0000000..adb8a5c --- /dev/null +++ b/extras/appengine/make_release.sh @@ -0,0 +1,49 @@ +#!/bin/sh + +# Script to create a "release" subdirectory. This is a subdirectory +# containing a bunch of symlinks, from which the app can be updated. +# The main reason for this is to import Django from a zipfile, which +# saves dramatically in upload time: statting and computing the SHA1 +# for 1000s of files is slow. Even if most of those files don't +# actually need to be uploaded, they still add to the work done for +# each update. + +ZIPFILE=django.zip +RELEASE=release +FILES="app.yaml index.yaml __init__.py main.py settings.py" +DIRS="static templates sqlparse pygments sqlformat examples" + +# Remove old $ZIPFILE file. +rm -rf $ZIPFILE + +# Create new $ZIPFILE file. +# We prune: +# - .svn subdirectories for obvious reasons. +# - contrib/gis/ and related files because it's huge and unneeded. +# - *.po and *.mo files because they are bulky and unneeded. +# - *.pyc and *.pyo because they aren't used by App Engine anyway. +zip -q $ZIPFILE `find django/ \ + -name .svn -prune -o \ + -name gis -prune -o \ + -name admin -prune -o \ + -name localflavor -prune -o \ + -name mysql -prune -o \ + -name mysql_old -prune -o \ + -name oracle -prune -o \ + -name postgresql-prune -o \ + -name postgresql_psycopg2 -prune -o \ + -name sqlite3 -prune -o \ + -name test -prune -o \ + -type f ! -name \*.py[co] ! -name \*.[pm]o -print` + +# Remove old $RELEASE directory. +rm -rf $RELEASE + +# Create new $RELEASE directory. +mkdir $RELEASE + +# Create symbolic links. +for x in $FILES $DIRS $ZIPFILE +do + ln -s ../$x $RELEASE/$x +done diff --git a/extras/appengine/settings.py b/extras/appengine/settings.py new file mode 100644 index 0000000..fad6bb5 --- /dev/null +++ b/extras/appengine/settings.py @@ -0,0 +1,37 @@ +# Copyright 2008 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Minimal Django settings.""" + +import os + +APPEND_SLASH = False +DEBUG = os.environ['SERVER_SOFTWARE'].startswith('Dev') +INSTALLED_APPS = ( + 'sqlformat', +) +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.middleware.http.ConditionalGetMiddleware', +# 'codereview.middleware.AddUserToRequestMiddleware', +) +ROOT_URLCONF = 'sqlformat.urls' +TEMPLATE_CONTEXT_PROCESSORS = () +TEMPLATE_DEBUG = DEBUG +TEMPLATE_DIRS = ( + os.path.join(os.path.dirname(__file__), 'templates'), + ) +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.load_template_source', + ) diff --git a/extras/appengine/sqlformat/__init__.py b/extras/appengine/sqlformat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/extras/appengine/sqlformat/urls.py b/extras/appengine/sqlformat/urls.py new file mode 100644 index 0000000..c83290e --- /dev/null +++ b/extras/appengine/sqlformat/urls.py @@ -0,0 +1,11 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns( + 'sqlformat.views', + (r'^$', 'index'), + (r'^source/$', 'source'), + (r'^about/$', 'about'), + (r'^api/$', 'api'), + (r'^format/$', 'format'), + (r'^load_example', 'load_example'), +) diff --git a/extras/appengine/sqlformat/views.py b/extras/appengine/sqlformat/views.py new file mode 100644 index 0000000..d135c44 --- /dev/null +++ b/extras/appengine/sqlformat/views.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- + +import logging +import md5 +import os +import time + +from django import forms +from django.http import HttpResponse +from django.shortcuts import render_to_response +from django.utils import simplejson as json + +from google.appengine.api import users + +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import SqlLexer, PythonLexer, PhpLexer + +import sqlparse + + +INITIAL_SQL = "select * from foo join bar on val1 = val2 where id = 123;" +EXAMPLES_DIR = os.path.join(os.path.dirname(__file__), '../examples') + +def _get_user_image(user): + if user is None: + return None + digest = md5.new(user.email().lower()).hexdigest() + if os.environ['SERVER_SOFTWARE'].startswith('Dev'): + host = 'localhost%3A8080' + else: + host = 'sqlformat.appspot.com' + default = 'http%3A%2F%2F'+host+'%2Fstatic%2Fblank.gif' + return 'http://gravatar.com/avatar/%s?s=32&d=%s' % (digest, default) + +def _get_examples(): + fnames = os.listdir(EXAMPLES_DIR) + fnames.sort() + return fnames + + +class FormOptions(forms.Form): + data = forms.CharField(widget=forms.Textarea({'class': 'resizable'}), + initial=INITIAL_SQL, required=False) + datafile = forms.FileField(required=False) + highlight = forms.BooleanField(initial=True, required=False, + widget=forms.CheckboxInput(), + label='Enable syntax highlighting') + remove_comments = forms.BooleanField(initial=False, required=False, + widget=forms.CheckboxInput(), + label='Remove comments') + keyword_case = forms.CharField( + widget=forms.Select(choices=(('', 'Unchanged'), + ('lower', 'Lower case'), + ('upper', 'Upper case'), + ('capitalize', 'Capitalize'))), + required=False, initial='upper', label='Keywords') + identifier_case = forms.CharField( + widget=forms.Select(choices=(('', 'Unchanged'), + ('lower', 'Lower case'), + ('upper', 'Upper case'), + ('capitalize', 'Capitalize'))), + required=False, initial='', label='Identifiers') + n_indents = forms.IntegerField(min_value=0, max_value=30, + initial=2, required=False, + label='spaces', + widget=forms.TextInput({'size': 2, + 'maxlength': 2})) +# right_margin = forms.IntegerField(min_value=10, max_value=500, +# initial=60, required=False, +# label='characters', +# widget=forms.TextInput({'size': 3, +# 'maxlength': 3})) + output_format = forms.CharField( + widget=forms.Select(choices=(('sql', 'SQL'), + ('python', 'Python'), + ('php', 'PHP'), + )), + required=False, initial='sql', label='Language') + + def clean(self): + super(FormOptions, self).clean() + data = self.cleaned_data.get('data') + logging.info(self.files) + if 'datafile' in self.files: + self._datafile = self.files['datafile'].read() + else: + self._datafile = None + if not data and not self._datafile: + raise forms.ValidationError('Whoops, I need a file or text!') + elif data and self._datafile: + raise forms.ValidationError('Whoops, I need a file OR text!') + return self.cleaned_data + + def get_data(self): + data = self.cleaned_data.get('data') + if self._datafile: + return self._datafile + else: + return data + + +def format_sql(form, format='html'): + data = form.cleaned_data + popts = {} + sql = form.get_data() + if data.get('remove_comments'): + popts['strip_comments'] = True + if data.get('keyword_case'): + popts['keyword_case'] = data.get('keyword_case') + if data.get('identifier_case'): + popts['identifier_case'] = data.get('identifier_case') + if data.get('n_indents', None) is not None: + popts['reindent'] = True + popts['indent_width'] = data.get('n_indents') + if data.get('right_margin', None) is not None: + popts['right_margin'] = data.get('right_margin') + if data.get('output_format', None) is not None: + popts['output_format'] = data.get('output_format') + sql = sqlparse.format(sql, **popts) + if format in ('html', 'json'): + if data.get('highlight', False): + if popts['output_format'] == 'python': + lexer = PythonLexer() + elif popts['output_format'] == 'php': + lexer = PhpLexer() + else: + lexer = SqlLexer() + sql = highlight(sql, lexer, HtmlFormatter()) + else: + sql = ('' + % sql) + return sql + + +def index(request): + output = None + data = {} + proc_time = None + if request.method == 'POST': + logging.debug(request.POST) + form = FormOptions(request.POST, request.FILES) + if form.is_valid(): + start = time.time() + output = format_sql(form, + format=request.POST.get('format', 'html')) + proc_time = time.time()-start + else: + form = FormOptions() + if request.POST.get('format', None) == 'json': + logging.warning(form.errors) + data['errors'] = str(form.errors) + data['output'] = output + logging.info('%r', proc_time) + data['proc_time'] = '%.3f' % proc_time or 0.0 + data = json.dumps(data) + return HttpResponse(data, content_type='text/x-json') + elif request.POST.get('format', None) == 'text': + if not form.is_valid(): + data = str(form.errors) # XXX convert to plain text + else: + data = output + return HttpResponse(data, content_type='text/plain') + return render_to_response('index.html', + {'form': form, 'output': output, + 'proc_time': proc_time and '%.3f' % proc_time or None, + 'user': users.get_current_user(), + 'login_url': users.create_login_url('/'), + 'logout_url': users.create_logout_url('/'), + 'userimg': _get_user_image(users.get_current_user()), + 'examples': _get_examples()}) + + +def format(request): + if request.method == 'POST': + form = FormOptions(request.POST) + if form.is_valid(): + response = format_sql(form, format='text') + else: + response = 'ERROR: %s' % str(form.errors) + else: + response = 'POST request required' + return HttpResponse(response, content_type='text/plain') + +def source(request): + return render_to_response('source.html') + +def about(request): + return render_to_response('about.html') + +def api(request): + return render_to_response('api.html') + +def load_example(request): + fname = request.POST.get('fname') + if fname is None: + answer = 'Uups, I\'ve got no filename...' + elif fname not in _get_examples(): + answer = 'Hmm, I think you don\'t want to do that.' + else: + answer = open(os.path.join(EXAMPLES_DIR, fname)).read() + data = json.dumps({'answer': answer}) + return HttpResponse(data, content_type='text/x-json') diff --git a/extras/appengine/static/bg_options.png b/extras/appengine/static/bg_options.png new file mode 100644 index 0000000..bc1a6ed Binary files /dev/null and b/extras/appengine/static/bg_options.png differ diff --git a/extras/appengine/static/bgfieldset.png b/extras/appengine/static/bgfieldset.png new file mode 100644 index 0000000..4d55f4a Binary files /dev/null and b/extras/appengine/static/bgfieldset.png differ diff --git a/extras/appengine/static/bgfooter.png b/extras/appengine/static/bgfooter.png new file mode 100644 index 0000000..9ce5bdd Binary files /dev/null and b/extras/appengine/static/bgfooter.png differ diff --git a/extras/appengine/static/bgtop.png b/extras/appengine/static/bgtop.png new file mode 100644 index 0000000..a0d4709 Binary files /dev/null and b/extras/appengine/static/bgtop.png differ diff --git a/extras/appengine/static/blank.gif b/extras/appengine/static/blank.gif new file mode 100644 index 0000000..3be2119 Binary files /dev/null and b/extras/appengine/static/blank.gif differ diff --git a/extras/appengine/static/canvas.html b/extras/appengine/static/canvas.html new file mode 100644 index 0000000..ab642d0 --- /dev/null +++ b/extras/appengine/static/canvas.html @@ -0,0 +1,114 @@ + + + + + + + +
+ + + + + +
+ + +
+ + \ No newline at end of file diff --git a/extras/appengine/static/hotkeys.js b/extras/appengine/static/hotkeys.js new file mode 100644 index 0000000..0e62a92 --- /dev/null +++ b/extras/appengine/static/hotkeys.js @@ -0,0 +1 @@ +(function(B){B.fn.__bind__=B.fn.bind;B.fn.__unbind__=B.fn.unbind;B.fn.__find__=B.fn.find;var A={version:"0.7.8",override:/keydown|keypress|keyup/g,triggersMap:{},specialKeys:{27:"esc",9:"tab",32:"space",13:"return",8:"backspace",145:"scroll",20:"capslock",144:"numlock",19:"pause",45:"insert",36:"home",46:"del",35:"end",33:"pageup",34:"pagedown",37:"left",38:"up",39:"right",40:"down",112:"f1",113:"f2",114:"f3",115:"f4",116:"f5",117:"f6",118:"f7",119:"f8",120:"f9",121:"f10",122:"f11",123:"f12"},shiftNums:{"`":"~","1":"!","2":"@","3":"#","4":"$","5":"%","6":"^","7":"&","8":"*","9":"(","0":")","-":"_","=":"+",";":":","'":'"',",":"<",".":">","/":"?","\\":"|"},newTrigger:function(E,D,F){var C={};C[E]={};C[E][D]={cb:F,disableInInput:false};return C}};if(B.browser.mozilla){A.specialKeys=B.extend(A.specialKeys,{96:"0",97:"1",98:"2",99:"3",100:"4",101:"5",102:"6",103:"7",104:"8",105:"9"})}B.fn.find=function(C){this.query=C;return B.fn.__find__.apply(this,arguments)};B.fn.unbind=function(H,E,G){if(B.isFunction(E)){G=E;E=null}if(E&&typeof E==="string"){var F=((this.prevObject&&this.prevObject.query)||(this[0].id&&this[0].id)||this[0]).toString();var D=H.split(" ");for(var C=0;C').parent().append($('
').bind("mousedown",{el:this},startDrag));var grippie=$('div.grippie',$(this).parent())[0];grippie.style.marginRight=(grippie.offsetWidth-$(this)[0].offsetWidth)+'px'})};function startDrag(e){textarea=$(e.data.el);textarea.blur();iLastMousePos=mousePosition(e).y;staticOffset=textarea.height()-iLastMousePos;textarea.css('opacity',0.25);$(document).mousemove(performDrag).mouseup(endDrag);return false}function performDrag(e){var iThisMousePos=mousePosition(e).y;var iMousePos=staticOffset+iThisMousePos;if(iLastMousePos>=(iThisMousePos)){iMousePos-=5}iLastMousePos=iThisMousePos;iMousePos=Math.max(iMin,iMousePos);textarea.height(iMousePos+'px');if(iMousePos \ No newline at end of file diff --git a/extras/appengine/static/script.js b/extras/appengine/static/script.js new file mode 100644 index 0000000..71bbabb --- /dev/null +++ b/extras/appengine/static/script.js @@ -0,0 +1,103 @@ +var initialized = false; + +function update_output() { + data = {} + data.data = $('#id_data').val(); + data.format = 'json'; + if ( $('#id_remove_comments').attr('checked') ) { + data.remove_comments = 1 + } + if ( $('#id_highlight').attr('checked') ) { data.highlight = 1 } + data.keyword_case = $('#id_keyword_case').val(); + data.identifier_case = $('#id_identifier_case').val(); + data.n_indents = $('#id_n_indents').val(); + data.right_margin = $('#id_right_margin').val(); + data.output_format = $('#id_output_format').val(); + form = document.getElementById('form_options'); + $(form.elements).attr('disabled', 'disabled'); + $('#response').addClass('loading'); + $.post('/', data, + function(data) { + if ( data.output ) { + $('#response').html(data.output); + proc_time = 'Processed in '+data.proc_time+' seconds.'; + } else { + $('#response').html('An error occured: '+data.errors); + proc_time = ''; + } + $('#proc_time').html(proc_time); + $(form.elements).each( function(idx) { + obj = $(this); + if ( ! obj.is('.keep-disabled') ) { + obj.removeAttr('disabled'); + } + }); + $('#response').removeClass('loading'); + }, 'json'); + return false; +} + +function toggle_fieldset(event) { + id = $(this).attr('id'); + $('#'+id+'_content').slideDown(); + $('legend').each(function(idx) { + obj = $('#'+this.id+'_content'); + if ( this.id != id ) { + obj.slideUp(); + } + }); +} + + +function textarea_grab_focus(evt) { + evt.stopPropagation(); + evt.preventDefault(); + $('#id_data').focus(); + return false; +} + + +function show_help() { + $('#help').toggle(); + return false; +} + + +function hide_help() { + $('#help').hide(); + return false; +} + +function load_example() { + fname = $('#sel_example').val(); + data = {fname: fname}; + $.post('/load_example', data, + function(data) { + $('#id_data').val(data.answer); + }, 'json'); +} + + +function init() { + if (initialized) { return } + //$('legend').bind('click', toggle_fieldset); + // $('legend').each(function(idx) { + // obj = $('#'+this.id+'_content'); + // if ( this.id != 'general' ) { + // obj.hide(); + // } + // }); + $(document).bind('keydown', {combi:'Ctrl+f'}, + update_output); + $('#btn_format').val('Format SQL [Ctrl+F]'); + $(document).bind('keydown', {combi: 'h', disableInInput: true}, + show_help); + $(document).bind('keydown', 'Esc', hide_help); + $(document).bind('keydown', {combi: 't', disableInInput: true}, + textarea_grab_focus); + initialized = true; + /* jQuery textarea resizer plugin usage */ + $(document).ready(function() { + $('textarea.resizable:not(.processed)').TextAreaResizer(); + }); +} \ No newline at end of file diff --git a/extras/appengine/static/sqlformat_client_example.py b/extras/appengine/static/sqlformat_client_example.py new file mode 100644 index 0000000..3b3bf0f --- /dev/null +++ b/extras/appengine/static/sqlformat_client_example.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +import urllib +import urllib2 + +payload = ( + ('data', 'select * from foo join bar on val1 = val2 where id = 123;'), + ('format', 'text'), + ('keyword_case', 'upper'), + ('reindent', True), + ('n_indents', 2), + ) + +response = urllib2.urlopen('http://sqlformat.appspot.com/format/', + urllib.urlencode(payload)) +print response.read() + diff --git a/extras/appengine/static/styles.css b/extras/appengine/static/styles.css new file mode 100644 index 0000000..41a540a --- /dev/null +++ b/extras/appengine/static/styles.css @@ -0,0 +1,245 @@ +body { + color: #000000; + background: #eeeeec; + font-family: "Free Sans", Arial, Verdana, sans; + font-size: 10pt; + margin: 0; + padding: 0; +} + +#header { + background: url(/static/bgtop.png) top left repeat-x; + border-bottom: 3px solid #2e3436; +} + +#header-inner, #main-inner, #footer-inner { + width: 70em; + margin-left: auto; + margin-right: auto; +} + + +#header-inner h1 { + margin: 0; + padding: 0; + margin-bottom: .2em; + font-weight: normal; + float: left; + font-size: 2em; + letter-spacing: .07em; +} + +#header-inner .q { + color: #f57900; + padding-right: 3px; +} + +#header-inner .q2 { + font-family: Georgia, "Times New Roman", serif; +} + +#header-inner h1 a { + text-decoration: none; + color: #eeeeec; +} + +#header-inner #slogan { + float: left; + color: #babdb6; + font-size: 1.4em; + margin-left: 1em; + letter-spacing: .18em; + padding-top: .2em; +} + + +#topmenu { + color: #729fcf; + clear: left; + padding-top: .5em; + padding-bottom: .5em; + font-size: 1.1em; +} + +#topmenu a { + color: #eeeeec; + text-decoration: none; +} + +#topmenu a:hover { + color: #ce5c00; +} + + +#main { + padding: 10px; + background: white; + line-height: 1.5em; + text-align: justify; +} + +#main form ul { + margin: 0; + padding: 0; + list-style-type: none; +} + +#main p, #main ol, #main .example, #main dl { + font-size: 12pt; + margin-left: 2em; +} + +#main dt { + font-weight: bold; +} + +#main li { + margin-bottom: .7em; +} + +#main a { + color: #f57900; +} + +#main h1, h2, h3, h4 { + color: #204a87; + font-weight: normal; + letter-spacing: .05em; +} + +#main pre, #main code.pre { + font-size: 10pt; + line-height: 1em; + padding: 4px; + background-color: #eeeeec; + border: 1px solid #babdb6; +} + +#input { + width: 50em; + float: right; + margin-left: 2em; +} + +#options { + width: 18em; + float: left; + color: #2e3436; + margin-top: .75em; + text-align: left; +} + +#options fieldset { + border: 1px solid #dddddd; + margin-bottom: .6em; + background: url(/static/bgfieldset.png) bottom left repeat-x; + -moz-border-radius: 3px; +} + + +#options input, select { + border: 1px solid #dddddd; +} + +#options .help { + font-size: .9em; + color: #888a85; + margin-bottom: .6em; +} + + +#footer { + background: url(/static/bgfooter.png) top left repeat-x; + padding: 10px; + min-height: 80px; + border-top: 4px solid #babdb6; +} + +#footer-inner { + width: 70em; + margin-left: auto; + margin-right: auto; + color: #888a85; +} + +#footer-inner a { + color: #888a85; +} + +#footer-inner a:hover { + color: #555753; +} + +.clearfix { + clear: both; +} + +.skip { + display: none; +} + +textarea { + border: 1px solid #cccccc; + border-bottom: none; + padding: 4px; + font-size: 12pt; + width: 100%; +} + +textarea:focus { + background-color: #eeeeec; +} + +div.grippie { + background: url(/static/resize-grip.png) bottom right no-repeat #eeeeec; + border-color: #cccccc; + border-style: solid; + border-width: 0pt 1px 1px; + cursor: se-resize; + height: 14px; + overflow: hidden; +} + +#help { + display: none; + position: fixed; + right: 10%; + left: 10%; + top: 0; + opacity: 0.85; + -moz-opacity: 0.85; + -khtml-opacity: 0.85; + filter: alpha(opacity=85); + -moz-border-radius: 0px 0px 10px 10px; + + background: #2e3436; + color: white; + font-weight: bold; + + padding: 1em; + z-index: 1; + overflow-x: hidden; + overflow-y: auto; +} + +#help .shortcut { + color: #f57900; + font-weight: bold; + width: 20px; + display: inline; +} + +.loading { + background: url(/static/loading.gif) top left no-repeat; +} + +.dev { + color: #cc0000; + font-size: .9em; + letter-spacing: 1; +} + +#proc_time { + color: #888a85; + font-size: .85em; +} \ No newline at end of file diff --git a/extras/appengine/templates/about.html b/extras/appengine/templates/about.html new file mode 100644 index 0000000..3222b97 --- /dev/null +++ b/extras/appengine/templates/about.html @@ -0,0 +1,44 @@ +{% extends "master.html" %} + +{% block main %} +

About this Application

+

+ This application is a online SQL formatting tool. +

+

+ Basically it's a playground for a Python module to parse and format + SQL statements. Sometimes it's easier to combine the available + options and to see the resulting output using a web front-end than + on the command line ;-) +

+

+ To get started, enter a SQL statement in the text box on the top, + choose some options and click on "Format SQL" (Ctrl+F) + to see the result. +

+

+ Note: The SQL formatter and parser is in an early stage + of development. If you're looking for a mature tool, try one of + these. +

+

Using it from the Command Line

+

+ There are three ways to use this SQL formatter from the command line: +

+
    +
  1. Grab the sources and use the module in your + Python scripts.
  2. +
  3. + Write a little script in your favorite language that sends a POST + request to this application.
    + Read the API Documentation for more information. +
  4. +
  5. Use + Lynx + +
  6. +
+ +{% endblock %} diff --git a/extras/appengine/templates/api.html b/extras/appengine/templates/api.html new file mode 100644 index 0000000..b9aaae7 --- /dev/null +++ b/extras/appengine/templates/api.html @@ -0,0 +1,50 @@ +{% extends "master.html" %} + +{% block main %} +

API Documentation

+ +

+ Using the API for this application is pretty simple. Just send a + POST request to +

+

+ http://sqlformat.appspot.com/format/ +

+ +

Options

+

+ The POST request accepts various options to control + formatting. Only the data option is required. All others + are optional. +

+ +
+
data
+
The SQL statement to format.
+
remove_comments
+
Set to 1 to remove comments.
+
keyword_case
+
How to convert keywords. Allowed values are 'lower', 'upper', + 'capitalize'.
+
identifier_case
+
How to convert identifiers. Allowed values are 'lower', 'upper', + 'capitalize'.
+
n_indents
+
An integer indicating the indendation depth.
+
right_margin
+
An integer indicating the maximum line length.
+
output_format
+
Transfer the statement into another programming language. + Allowed values are 'python', 'php'
+
+ +

Example

+

+ Here's a example in Python: +

+{% include "python-client-example.html" %} +

+ Download sqlformat_example_client.py +

+ +{% endblock %} diff --git a/extras/appengine/templates/index.html b/extras/appengine/templates/index.html new file mode 100644 index 0000000..57e7ed3 --- /dev/null +++ b/extras/appengine/templates/index.html @@ -0,0 +1,107 @@ +{% extends "master.html" %} + +{% block main %} + +{% if output %} + +{% endif %} + +
+
+ {% if form.non_field_errors %}{{form.non_field_errors}}{% endif %} +
+ Type your SQL here:
+ {{form.data}} + {% if form.data.errors %}{{form.data.errors}}{% endif %} +
+
+ ...or upload a file: + {{form.datafile}} +
+
+
+ +
+ {% if output %} +
{{output|safe}}
+ {% else %} +
+ {% endif %} +
+ {% if proc_time %}Processed in {{proc_time}} seconds.{% endif %} +
+
+ + +
+ +
+
+

Options

+
General Options +
+ {{form.remove_comments}} + +
+ {{form.highlight}} + + {% if form.highlight.errors %} +
    {{form.highlight.errors
+ {% endif %} +
+
+
+ Keywords & Identifiers +
+ {{form.keyword_case.label}}: {{form.keyword_case}} +
+
+ {{form.identifier_case.label}}: {{form.identifier_case}} +
+
+
Indentation & Margins + +
+ + {{form.n_indents}} {{form.n_indents.label}} +
Empty field means leave indentation unchanged.
+ +
+
+
Output Format + + {{form.output_format}} +
+ +
This software is in development.
+ +
+
+
+ + +{% endblock %} + diff --git a/extras/appengine/templates/master.html b/extras/appengine/templates/master.html new file mode 100644 index 0000000..4294cf4 --- /dev/null +++ b/extras/appengine/templates/master.html @@ -0,0 +1,103 @@ + + + SQLFormat - Online SQL Formatting Service + + + + + + + + + + + +
+

Keyboard Shortcuts

+

+ H - Show / hide this help window
+ Ctrl+F - Format SQL and display result
+ O - Show / hide options
+ T - Set focus on SQL input
+

+
+ + + +
+
+ {% block main %}MAIN CONTENT GOES HERE{% endblock %} +
+
+ + + + + + + + + diff --git a/extras/appengine/templates/python-client-example.html b/extras/appengine/templates/python-client-example.html new file mode 100644 index 0000000..68bf820 --- /dev/null +++ b/extras/appengine/templates/python-client-example.html @@ -0,0 +1,17 @@ +
#!/usr/bin/env python
+
+import urllib
+import urllib2
+
+payload = (
+    ('data', 'select * from foo join bar on val1 = val2 where id = 123;'),
+    ('format', 'text'),
+    ('keyword_case', 'upper'),
+    ('reindent', True),
+    ('n_indents', 2),
+    )
+
+response = urllib2.urlopen('http://sqlformat.appspot.com/format/',
+                           urllib.urlencode(payload))
+print response.read()
+
diff --git a/extras/appengine/templates/source.html b/extras/appengine/templates/source.html new file mode 100644 index 0000000..4988f2f --- /dev/null +++ b/extras/appengine/templates/source.html @@ -0,0 +1,56 @@ +{% extends "master.html" %} + +{% block main %} +
+

Source Code

+ +

Python Module

+

+ The sources for the SQL parser and formatter module are currently + hosted on Gitorious. + To clone the repository run: +

+ git clone git://github.com/andialbrecht/python-sqlparse.git +

+

+ Visit the project page + | + Browse the sources online +

+

+ Some relevant parts of the Python module contain code from the + pygments syntax highlighter. + The underlying Python module uses a non-validating SQL parser. + This approach makes it possible to parse even syntactically incorrect + SQL statements. +

+ +

+ Currently the parser module is used by + CrunchyFrog - a + database front-end for Gnome. +

+ +

+ The sqlparse module is released under the terms of the + New BSD License. +

+ +

App Engine Application

+

+ The source code for this App Engine application is available in the + examples directory of the Python module + (but it's really nothing special ;-). +

+ +

Contributing

+

+ Please file bug reports and feature requests on the project site at + http://code.google.com/p/python-sqlparse/issues/entry + or if you have code to contribute upload it to + http://codereview.appspot.com + and add albrecht.andi@googlemail.com as reviewer. +

+ +
+{% endblock %} diff --git a/extras/sqlformat.png b/extras/sqlformat.png new file mode 100644 index 0000000..4189bc4 Binary files /dev/null and b/extras/sqlformat.png differ diff --git a/extras/sqlformat.svg b/extras/sqlformat.svg new file mode 100644 index 0000000..59e1183 --- /dev/null +++ b/extras/sqlformat.svg @@ -0,0 +1,115 @@ + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + SQL + Format + + diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..6ed5178 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This setup script is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +from distutils.core import setup + +setup( + name='sqlparse', + version='0.1.0', + py_modules=['sqlparse'], + description='Provides simple SQL parsing', + author='Andi Albrecht', + author_email='albrecht.andi@gmail.com', + #long_description=release.long_description, + license='BSD', + url='http://python-sqlparse.googlecode.com/', + classifiers = [ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Topic :: Database', + 'Topic :: Software Development' + ], + scripts=['bin/sqlformat'], +) diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py new file mode 100644 index 0000000..01b3bd8 --- /dev/null +++ b/sqlparse/__init__.py @@ -0,0 +1,65 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""Parse SQL statements.""" + +__version__ = '0.1.0' + +import logging +import os + + +if 'SQLPARSE_DEBUG' in os.environ: + logging.basicConfig(level=logging.DEBUG) + + +class SQLParseError(Exception): + """Base class for exceptions in this module.""" + + +# Setup namespace +from sqlparse import engine +from sqlparse import filters +from sqlparse import formatter + + +def parse(sql): + """Parse sql and return a list of statements. + + *sql* is a single string containting one or more SQL statements. + + The returned :class:`~sqlparse.parser.Statement` are fully analyzed. + + Returns a list of :class:`~sqlparse.parser.Statement` instances. + """ + stack = engine.FilterStack() + stack.full_analyze() + return tuple(stack.run(sql)) + + +def format(sql, **options): + """Format *sql* according to *options*. + + Returns a list of :class:`~sqlparse.parse.Statement` instances like + :meth:`parse`, but the statements are formatted according to *options*. + + Available options are documented in the :mod:`~sqlparse.format` module. + """ + stack = engine.FilterStack() + options = formatter.validate_options(options) + stack = formatter.build_filter_stack(stack, options) + stack.postprocess.append(filters.SerializerUnicode()) + return ''.join(stack.run(sql)) + + +def split(sql): + """Split *sql* into separate statements. + + Returns a list of strings. + """ + stack = engine.FilterStack() + stack.split_statements = True + return [unicode(stmt) for stmt in stack.run(sql)] + diff --git a/sqlparse/dialects.py b/sqlparse/dialects.py new file mode 100644 index 0000000..cabe503 --- /dev/null +++ b/sqlparse/dialects.py @@ -0,0 +1,88 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""This module contains classes that represent SQL dialects.""" + +from tokens import * + + +class Dialect(object): + """Base class for SQL dialect implementations.""" + + def handle_token(self, tokentype, text): + """Handle a token. + + Arguments: + tokentype: A token type. + text: Text representation of the token. + + Returns: + A tuple of three items: tokentype, text, splitlevel. + splitlevel is either -1, 0 or 1 and describes an identation level. + """ + raise NotImplementedError + + def reset(self): + """Reset Dialect state.""" + pass + + +class DefaultDialect(Dialect): + + def __init__(self): + self._in_declare = False + self._stmt_type = None + + def get_statement_type(self): + return self._stmt_type + + def set_statement_type(self, type_): + self._stmt_type = type_ + + def handle_token(self, tokentype, text): + if not tokentype == Keyword: + return tokentype, text, 0 + unified = text.upper() + if unified == 'DECLARE': + self._in_declare = True + return tokentype, text, 1 + if unified == 'BEGIN': + if self._in_declare: + return tokentype, text, 0 + return tokentype, text, 0 + if unified == 'END': + return tokentype, text, -1 + # TODO: Use a constant here + if unified in ('IF', 'FOR') and self._stmt_type == 6: + return tokentype, text, 1 + return tokentype, text, 0 + + def reset(self): + self._in_declare = False + + +class PSQLDialect(DefaultDialect): + + def __init__(self): + super(PSQLDialect, self).__init__() + self._in_dbldollar = False + + def handle_token(self, tokentype, text): + if (tokentype == Name.Builtin + and text.startswith('$') and text.endswith('$')): + if self._in_dbldollar: + self._in_dbldollar = False + return tokentype, text, -1 + else: + self._in_dbldollar = True + return tokentype, text, 1 + elif self._in_dbldollar: + return tokentype, text, 0 + else: + return super(PSQLDialect, self).handle_token(tokentype, text) + + def reset(self): + self._dollar_started = False + self._in_dbldollar = False diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py new file mode 100644 index 0000000..5cac528 --- /dev/null +++ b/sqlparse/engine/__init__.py @@ -0,0 +1,81 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""filter""" + +import logging +import re + +from sqlparse import lexer, SQLParseError +from sqlparse.engine import grouping +from sqlparse.engine.filter import StatementFilter + +# XXX remove this when cleanup is complete +Filter = object + + +class FilterStack(object): + + def __init__(self): + self.preprocess = [] + self.stmtprocess = [] + self.postprocess = [] + self.split_statements = False + self._grouping = False + + def _flatten(self, stream): + for token in stream: + if token.is_group(): + for t in self._flatten(token.tokens): + yield t + else: + yield token + + def enable_grouping(self): + self._grouping = True + + def full_analyze(self): + self.enable_grouping() + + def run(self, sql): + stream = lexer.tokenize(sql) + # Process token stream + if self.preprocess: + for filter_ in self.preprocess: + stream = filter_.process(self, stream) + + if (self.stmtprocess or self.postprocess or self.split_statements + or self._grouping): + splitter = StatementFilter() + stream = splitter.process(self, stream) + + if self._grouping: + def _group(stream): + for stmt in stream: + grouping.group(stmt) + yield stmt + stream = _group(stream) + + if self.stmtprocess: + def _run(stream): + ret = [] + for stmt in stream: + for filter_ in self.stmtprocess: + filter_.process(self, stmt) + ret.append(stmt) + return ret + stream = _run(stream) + + if self.postprocess: + def _run(stream): + for stmt in stream: + stmt.tokens = list(self._flatten(stmt.tokens)) + for filter_ in self.postprocess: + stmt = filter_.process(self, stmt) + yield stmt + stream = _run(stream) + + return stream + diff --git a/sqlparse/engine/_grouping.py b/sqlparse/engine/_grouping.py new file mode 100644 index 0000000..512c590 --- /dev/null +++ b/sqlparse/engine/_grouping.py @@ -0,0 +1,499 @@ +# -*- coding: utf-8 -*- + +import re + +from sqlparse.engine.filter import TokenFilter +from sqlparse import tokens as T + +class _Base(object): + + __slots__ = ('to_unicode', 'to_str', '_get_repr_name') + + def __unicode__(self): + return 'Unkown _Base object' + + def __str__(self): + return unicode(self).encode('latin-1') + + def __repr__(self): + raw = unicode(self) + if len(raw) > 7: + short = raw[:6]+u'...' + else: + short = raw + short = re.sub('\s+', ' ', short) + return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), + short, id(self)) + + def _get_repr_name(self): + return self.__class__.__name__ + + def to_unicode(self): + return unicode(self) + + def to_str(self): + return str(self) + + +class Token(_Base): + + __slots__ = ('value', 'ttype') + + def __init__(self, ttype, value): + self.value = value + self.ttype = ttype + + def __unicode__(self): + return self.value + + def _get_repr_name(self): + return str(self.ttype).split('.')[-1] + + def match(self, ttype, values): + if self.ttype is not ttype: + return False + if isinstance(values, basestring): + values = [values] + if self.ttype is T.Keyword: + return self.value.upper() in [v.upper() for v in values] + else: + return self.value in values + + def is_group(self): + return False + + def is_whitespace(self): + return self.ttype and self.ttype is T.Whitespace + + +class _Group(Token): + + __slots__ = ('value', 'ttype', 'tokens') + + def __init__(self, tokens=None): + super(_Group, self).__init__(None, None) + if tokens is None: + tokens = [] + self._tokens = tokens + + def _set_tokens(self, tokens): + self._tokens = tokens + def _get_tokens(self): + if type(self._tokens) is not types.TupleType: + self._tokens = tuple(self._tokens) + return self._tokens + tokens = property(fget=_get_tokens, fset=_set_tokens) + + def _get_repr_name(self): + return self.__class__.__name__ + + def _pprint_tree(self, depth=0): + """Pretty-print the object tree.""" + indent = ' '*(depth*2) + for token in self.tokens: + print '%s%r' % (indent, token) + if token.is_group(): + token._pprint_tree(depth+1) + + def __unicode__(self): + return u''.join(unicode(t) for t in self.tokens) + + @property + def subgroups(self): + #return [x for x in self.tokens if isinstance(x, _Group)] + for item in self.tokens: + if item.is_group(): + yield item + + def is_group(self): + return True + + +class Statement(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class Parenthesis(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class Where(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class CommentMulti(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class Identifier(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + +class TypeCast(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + @property + def casted_object(self): + return self.tokens[0] + + @property + def casted_type(self): + return self.tokens[-1] + + +class Alias(_Group): + __slots__ = ('value', 'ttype', '_tokens') + + @property + def aliased_object(self): + return self.tokens[0] + + @property + def alias(self): + return self.tokens[-1] + + + + +# - Filter + +class StatementFilter(TokenFilter): + + def __init__(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _reset(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _change_splitlevel(self, ttype, value): + # PostgreSQL + if (ttype == T.Name.Builtin + and value.startswith('$') and value.endswith('$')): + if self._in_dbldollar: + self._in_dbldollar = False + return -1 + else: + self._in_dbldollar = True + return 1 + elif self._in_dbldollar: + return 0 + + # ANSI + if ttype is not T.Keyword: + return 0 + + unified = value.upper() + + if unified == 'DECLARE': + self._in_declare = True + return 1 + + if unified == 'BEGIN': + if self._in_declare: + return 0 + return 0 + + if unified == 'END': + return -1 + + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + + if unified in ('IF', 'FOR') and self._is_create: + return 1 + + # Default + return 0 + + def process(self, stack, stream): + splitlevel = 0 + stmt = None + consume_ws = False + stmt_tokens = [] + for ttype, value in stream: + # Before appending the token + if (consume_ws and ttype is not T.Whitespace + and ttype is not T.Comment.Single): + consume_ws = False + stmt.tokens = stmt_tokens + yield stmt + self._reset() + stmt = None + splitlevel = 0 + if stmt is None: + stmt = Statement() + stmt_tokens = [] + splitlevel += self._change_splitlevel(ttype, value) + # Append the token + stmt_tokens.append(Token(ttype, value)) + # After appending the token + if (not splitlevel and ttype is T.Punctuation + and value == ';'): + consume_ws = True + if stmt is not None: + stmt.tokens = stmt_tokens + yield stmt + + +class GroupFilter(object): + + def process(self, stream): + pass + + +class GroupParenthesis(GroupFilter): + """Group parenthesis groups.""" + + def _finish_group(self, group): + start = group[0] + end = group[-1] + tokens = list(self._process(group[1:-1])) + return [start]+tokens+[end] + + def _process(self, stream): + group = None + depth = 0 + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if token.match(T.Punctuation, '('): + if depth == 0: + group = [] + depth += 1 + if group is not None: + group.append(token) + if token.match(T.Punctuation, ')'): + depth -= 1 + if depth == 0: + yield Parenthesis(self._finish_group(group)) + group = None + continue + if group is None: + yield token + + def process(self, group): + if not isinstance(group, Parenthesis): + group.tokens = self._process(group.tokens) + + +class GroupWhere(GroupFilter): + + def _process(self, stream): + group = None + depth = 0 + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if token.match(T.Keyword, 'WHERE'): + if depth == 0: + group = [] + depth += 1 + # Process conditions here? E.g. "A =|!=|in|is|... B"... + elif (token.ttype is T.Keyword + and token.value.upper() in ('ORDER', 'GROUP', + 'LIMIT', 'UNION')): + depth -= 1 + if depth == 0: + yield Where(group) + group = None + if depth < 0: + depth = 0 + if group is not None: + group.append(token) + else: + yield token + if group is not None: + yield Where(group) + + def process(self, group): + if not isinstance(group, Where): + group.tokens = self._process(group.tokens) + + +class GroupMultiComments(GroupFilter): + """Groups Comment.Multiline and adds trailing whitespace up to first lb.""" + + def _process(self, stream): + new_tokens = [] + grp = None + consume_ws = False + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if token.ttype is T.Comment.Multiline: + if grp is None: + grp = [] + consume_ws = True + grp.append(token) + elif consume_ws and token.ttype is not T.Whitespace: + yield CommentMulti(grp) + grp = None + consume_ws = False + yield token + elif consume_ws: + lines = token.value.splitlines(True) + grp.append(Token(T.Whitespace, lines[0])) + if lines[0].endswith('\n'): + yield CommentMulti(grp) + grp = None + consume_ws = False + if lines[1:]: + yield Token(T.Whitespace, ''.join(lines[1:])) + else: + yield token + + def process(self, group): + if not isinstance(group, CommentMulti): + group.tokens = self._process(group.tokens) + + +## class GroupIdentifier(GroupFilter): + +## def _process(self, stream): +## buff = [] +## expect_dot = False +## for token in stream: +## if token.is_group(): +## token.tokens = self._process(token.tokens) +## if (token.ttype is T.String.Symbol or token.ttype is T.Name +## and not expect_dot): +## buff.append(token) +## expect_dot = True +## elif expect_dot and token.match(T.Punctuation, '.'): +## buff.append(token) +## expect_dot = False +## else: +## if expect_dot == False: +## # something's wrong, it ends with a dot... +## while buff: +## yield buff.pop(0) +## expect_dot = False +## elif buff: +## idt = Identifier() +## idt.tokens = buff +## yield idt +## buff = [] +## yield token +## if buff and expect_dot: +## idt = Identifier() +## idt.tokens = buff +## yield idt +## buff = [] +## while buff: +## yield buff.pop(0) + +## def process(self, group): +## if not isinstance(group, Identifier): +## group.tokens = self._process(group.tokens) + + +class AddTypeCastFilter(GroupFilter): + + def _process(self, stream): + buff = [] + expect_colon = False + has_colons = False + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if ((isinstance(token, Parenthesis) + or isinstance(token, Identifier)) + and not expect_colon): + buff.append(token) + expect_colon = True + elif expect_colon and token.match(T.Punctuation, ':'): + buff.append(token) + has_colons = True + elif (expect_colon + and (token.ttype in T.Name + or isinstance(token, Identifier)) + ): + if not has_colons: + while buff: + yield buff.pop(0) + yield token + else: + buff.append(token) + grp = TypeCast() + grp.tokens = buff + buff = [] + yield grp + expect_colons = has_colons = False + else: + while buff: + yield buff.pop(0) + yield token + while buff: + yield buff.pop(0) + + def process(self, group): + if not isinstance(group, TypeCast): + group.tokens = self._process(group.tokens) + + +class AddAliasFilter(GroupFilter): + + def _process(self, stream): + buff = [] + search_alias = False + lazy = False + for token in stream: + if token.is_group(): + token.tokens = self._process(token.tokens) + if search_alias and (isinstance(token, Identifier) + or token.ttype in (T.Name, + T.String.Symbol) + or (lazy and not token.is_whitespace())): + buff.append(token) + search_alias = lazy = False + grp = Alias() + grp.tokens = buff + buff = [] + yield grp + elif (isinstance(token, (Identifier, TypeCast)) + or token.ttype in (T.Name, T.String.Symbol)): + buff.append(token) + search_alias = True + elif search_alias and (token.is_whitespace() + or token.match(T.Keyword, 'as')): + buff.append(token) + if token.match(T.Keyword, 'as'): + lazy = True + else: + while buff: + yield buff.pop(0) + yield token + search_alias = False + while buff: + yield buff.pop(0) + + def process(self, group): + if not isinstance(group, Alias): + group.tokens = self._process(group.tokens) + + +GROUP_FILTER = (GroupParenthesis(), + GroupMultiComments(), + GroupWhere(), + GroupIdentifier(), + AddTypeCastFilter(), + AddAliasFilter(), + ) + +import types +def group_tokens(group): + def _materialize(g): + if type(g.tokens) is not types.TupleType: + g.tokens = tuple(g.tokens) + for sg in g.subgroups: + _materialize(sg) + for groupfilter in GROUP_FILTER: + groupfilter.process(group) +# _materialize(group) +# group.tokens = tuple(group.tokens) +# for subgroup in group.subgroups: +# group_tokens(subgroup) diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py new file mode 100644 index 0000000..146690c --- /dev/null +++ b/sqlparse/engine/filter.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +from sqlparse import tokens as T +from sqlparse.engine.grouping import Statement, Token + + +class TokenFilter(object): + + def __init__(self, **options): + self.options = options + + def process(self, stack, stream): + """Process token stream.""" + raise NotImplementedError + + +class StatementFilter(TokenFilter): + + def __init__(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _reset(self): + self._in_declare = False + self._in_dbldollar = False + self._is_create = False + + def _change_splitlevel(self, ttype, value): + # PostgreSQL + if (ttype == T.Name.Builtin + and value.startswith('$') and value.endswith('$')): + if self._in_dbldollar: + self._in_dbldollar = False + return -1 + else: + self._in_dbldollar = True + return 1 + elif self._in_dbldollar: + return 0 + + # ANSI + if ttype is not T.Keyword: + return 0 + + unified = value.upper() + + if unified == 'DECLARE': + self._in_declare = True + return 1 + + if unified == 'BEGIN': + if self._in_declare: + return 0 + return 0 + + if unified == 'END': + # Should this respect a preceeding BEGIN? + # In CASE ... WHEN ... END this results in a split level -1. + return -1 + + if ttype is T.Keyword.DDL and unified.startswith('CREATE'): + self._is_create = True + + if unified in ('IF', 'FOR') and self._is_create: + return 1 + + # Default + return 0 + + def process(self, stack, stream): + splitlevel = 0 + stmt = None + consume_ws = False + stmt_tokens = [] + for ttype, value in stream: + # Before appending the token + if (consume_ws and ttype is not T.Whitespace + and ttype is not T.Comment.Single): + consume_ws = False + stmt.tokens = stmt_tokens + yield stmt + self._reset() + stmt = None + splitlevel = 0 + if stmt is None: + stmt = Statement() + stmt_tokens = [] + splitlevel += self._change_splitlevel(ttype, value) + # Append the token + stmt_tokens.append(Token(ttype, value)) + # After appending the token + if (splitlevel <= 0 and ttype is T.Punctuation + and value == ';'): + consume_ws = True + if stmt is not None: + stmt.tokens = stmt_tokens + yield stmt diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py new file mode 100644 index 0000000..433f539 --- /dev/null +++ b/sqlparse/engine/grouping.py @@ -0,0 +1,537 @@ +# -*- coding: utf-8 -*- + +import itertools +import re +import types + +from sqlparse import tokens as T + + +class Token(object): + + __slots__ = ('value', 'ttype') + + def __init__(self, ttype, value): + self.value = value + self.ttype = ttype + + def __str__(self): + return unicode(self).encode('latin-1') + + def __repr__(self): + short = self._get_repr_value() + return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), + short, id(self)) + + def __unicode__(self): + return self.value + + def to_unicode(self): + return unicode(self) + + def _get_repr_name(self): + return str(self.ttype).split('.')[-1] + + def _get_repr_value(self): + raw = unicode(self) + if len(raw) > 7: + short = raw[:6]+u'...' + else: + short = raw + return re.sub('\s+', ' ', short) + + def match(self, ttype, values, regex=False): + if self.ttype is not ttype: + return False + if values is None: + return self.ttype is ttype + if isinstance(values, basestring): + values = [values] + if regex: + if self.ttype is T.Keyword: + values = [re.compile(v, re.IGNORECASE) for v in values] + else: + values = [re.compile(v) for v in values] + for pattern in values: + if pattern.search(self.value): + return True + return False + else: + if self.ttype is T.Keyword: + return self.value.upper() in [v.upper() for v in values] + else: + return self.value in values + + def is_group(self): + return False + + def is_whitespace(self): + return self.ttype and self.ttype in T.Whitespace + + +class TokenList(Token): + + __slots__ = ('value', 'ttype', 'tokens') + + def __init__(self, tokens=None): + if tokens is None: + tokens = [] + self.tokens = tokens + Token.__init__(self, None, None) + + def __unicode__(self): + return ''.join(unicode(x) for x in self.flatten()) + + def __str__(self): + return unicode(self).encode('latin-1') + + def _get_repr_name(self): + return self.__class__.__name__ + + def _pprint_tree(self, max_depth=None, depth=0): + """Pretty-print the object tree.""" + indent = ' '*(depth*2) + for token in self.tokens: + if token.is_group(): + pre = ' | ' + else: + pre = ' | ' + print '%s%s%s \'%s\'' % (indent, pre, token._get_repr_name(), + token._get_repr_value()) + if (token.is_group() and max_depth is not None + and depth < max_depth): + token._pprint_tree(max_depth, depth+1) + + def flatten(self): + for token in self.tokens: + if isinstance(token, TokenList): + for item in token.flatten(): + yield item + else: + yield token + + def is_group(self): + return True + + def get_sublists(self): + return [x for x in self.tokens if isinstance(x, TokenList)] + + def token_first(self, ignore_whitespace=True): + for token in self.tokens: + if ignore_whitespace and token.is_whitespace(): + continue + return token + return None + + def token_next_by_instance(self, idx, clss): + if type(clss) not in (types.ListType, types.TupleType): + clss = (clss,) + if type(clss) is not types.TupleType: + clss = tuple(clss) + for token in self.tokens[idx:]: + if isinstance(token, clss): + return token + return None + + def token_next_by_type(self, idx, ttypes): + if not isinstance(ttypes, (types.TupleType, types.ListType)): + ttypes = [ttypes] + for token in self.tokens[idx:]: + if token.ttype in ttypes: + return token + return None + + def token_next_match(self, idx, ttype, value, regex=False): + if type(idx) != types.IntType: + idx = self.token_index(idx) + for token in self.tokens[idx:]: + if token.match(ttype, value, regex): + return token + return None + + def token_not_matching(self, idx, funcs): + for token in self.tokens[idx:]: + passed = False + for func in funcs: + if func(token): + passed = True + break + if not passed: + return token + return None + + def token_prev(self, idx, skip_ws=True): + while idx != 0: + idx -= 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_next(self, idx, skip_ws=True): + while idx < len(self.tokens)-1: + idx += 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_index(self, token): + """Return list index of token.""" + return self.tokens.index(token) + + def tokens_between(self, start, end, exclude_end=False): + """Return all tokens between (and including) start and end.""" + if exclude_end: + offset = 0 + else: + offset = 1 + return self.tokens[self.token_index(start):self.token_index(end)+offset] + + def group_tokens(self, grp_cls, tokens): + """Replace tokens by instance of grp_cls.""" + idx = self.token_index(tokens[0]) + for t in tokens: + self.tokens.remove(t) + grp = grp_cls(tokens) + self.tokens.insert(idx, grp) + return grp + + def insert_before(self, where, token): + self.tokens.insert(self.token_index(where), token) + + +class Statement(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def get_type(self): + first_token = self.token_first() + if first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return first_token.value.upper() + else: + return 'UNKNOWN' + + +class Identifier(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def has_alias(self): + return self.get_alias() is not None + + def get_alias(self): + kw = self.token_next_match(0, T.Keyword, 'AS') + if kw is not None: + alias = self.token_next(self.token_index(kw)) + if alias is None: + return None + else: + next_ = self.token_next(0) + if next_ is None or not isinstance(next_, Identifier): + return None + alias = next_ + if isinstance(alias, Identifier): + return alias.get_name() + else: + return alias.to_unicode() + + def get_name(self): + alias = self.get_alias() + if alias is not None: + return alias + return self.get_real_name() + + def get_real_name(self): + return self.token_next_by_type(0, T.Name).value + + def get_typecast(self): + marker = self.token_next_match(0, T.Punctuation, '::') + if marker is None: + return None + next_ = self.token_next(self.token_index(marker), False) + if next_ is None: + return None + return next_.to_unicode() + + +class IdentifierList(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def get_identifiers(self): + return [x for x in self.tokens if isinstance(x, Identifier)] + + +class Parenthesis(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + + +class Assignment(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class If(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class For(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class Comparsion(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class Comment(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + +class Where(TokenList): + __slots__ = ('value', 'ttype', 'tokens') + + +class Case(TokenList): + + __slots__ = ('value', 'ttype', 'tokens') + + def get_cases(self): + """Returns a list of 2-tuples (condition, value). + + If an ELSE exists condition is None. + """ + ret = [] + in_condition = in_value = False + for token in self.tokens: + if token.match(T.Keyword, 'WHEN'): + ret.append(([], [])) + in_condition = True + in_value = False + elif token.match(T.Keyword, 'ELSE'): + ret.append((None, [])) + in_condition = False + in_value = True + elif token.match(T.Keyword, 'THEN'): + in_condition = False + in_value = True + elif token.match(T.Keyword, 'END'): + in_condition = False + in_value = False + if in_condition: + ret[-1][0].append(token) + elif in_value: + ret[-1][1].append(token) + return ret + +def _group_left_right(tlist, ttype, value, cls, + check_right=lambda t: True, + include_semicolon=False): +# [_group_left_right(sgroup, ttype, value, cls, check_right, +# include_semicolon) for sgroup in tlist.get_sublists() +# if not isinstance(sgroup, cls)] + idx = 0 + token = tlist.token_next_match(idx, ttype, value) + while token: + right = tlist.token_next(tlist.token_index(token)) + left = tlist.token_prev(tlist.token_index(token)) + if (right is None or not check_right(right) + or left is None): + token = tlist.token_next_match(tlist.token_index(token)+1, + ttype, value) + else: + if include_semicolon: + right = tlist.token_next_match(tlist.token_index(right), + T.Punctuation, ';') + tokens = tlist.tokens_between(left, right)[1:] + if not isinstance(left, cls): + new = cls([left]) + new_idx = tlist.token_index(left) + tlist.tokens.remove(left) + tlist.tokens.insert(new_idx, new) + left = new + left.tokens.extend(tokens) + for t in tokens: + tlist.tokens.remove(t) + token = tlist.token_next_match(tlist.token_index(left)+1, + ttype, value) + +def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, + cls, include_semicolon=False, recurse=False): + def _find_matching(i, tl, stt, sva, ett, eva): + depth = 1 + for t in tl.tokens[i:]: + if t.match(stt, sva): + depth += 1 + elif t.match(ett, eva): + depth -= 1 + if depth == 1: + return t + return None + [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, + cls, include_semicolon) for sgroup in tlist.get_sublists() + if recurse] + if isinstance(tlist, cls): + idx = 1 + else: + idx = 0 + token = tlist.token_next_match(idx, start_ttype, start_value) + while token: + tidx = tlist.token_index(token) + end = _find_matching(tidx, tlist, start_ttype, start_value, + end_ttype, end_value) + if end is None: + idx = tidx+1 + else: + if include_semicolon: + next_ = tlist.token_next(tlist.token_index(end)) + if next_ and next_.match(T.Punctuation, ';'): + end = next_ + group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) + _group_matching(group, start_ttype, start_value, + end_ttype, end_value, cls, include_semicolon) + idx = tlist.token_index(group)+1 + token = tlist.token_next_match(idx, start_ttype, start_value) + +def group_if(tlist): + _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', If, True) + +def group_for(tlist): + _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', For, True) + +def group_as(tlist): + _group_left_right(tlist, T.Keyword, 'AS', Identifier) + +def group_assignment(tlist): + _group_left_right(tlist, T.Assignment, ':=', Assignment, + include_semicolon=True) + +def group_comparsion(tlist): + _group_left_right(tlist, T.Operator, None, Comparsion) + + +def group_case(tlist): + _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', Case, True) + + +def group_identifier(tlist): + def _consume_cycle(tl, i): + x = itertools.cycle((lambda y: y.match(T.Punctuation, '.'), + lambda y: y.ttype in (T.String.Symbol, T.Name))) + for t in tl.tokens[i:]: + if x.next()(t): + yield t + else: + raise StopIteration + + # bottom up approach: group subgroups first + [group_identifier(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Identifier)] + + # real processing + idx = 0 + token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) + while token: + identifier_tokens = [token]+list( + _consume_cycle(tlist, + tlist.token_index(token)+1)) + group = tlist.group_tokens(Identifier, identifier_tokens) + idx = tlist.token_index(group)+1 + token = tlist.token_next_by_type(idx, (T.String.Symbol, T.Name)) + + +def group_identifier_list(tlist): + [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, IdentifierList)] + idx = 0 + token = tlist.token_next_by_instance(idx, Identifier) + while token: + tidx = tlist.token_index(token) + end = tlist.token_not_matching(tidx+1, + [lambda t: isinstance(t, Identifier), + lambda t: t.is_whitespace(), + lambda t: t.match(T.Punctuation, + ',') + ]) + if end is None: + idx = tidx + 1 + else: + grp_tokens = tlist.tokens_between(token, end, exclude_end=True) + while grp_tokens and (grp_tokens[-1].is_whitespace() + or grp_tokens[-1].match(T.Punctuation, ',')): + grp_tokens.pop() + if len(grp_tokens) <= 1: + idx = tidx + 1 + else: + group = tlist.group_tokens(IdentifierList, grp_tokens) + idx = tlist.token_index(group) + token = tlist.token_next_by_instance(idx, Identifier) + + +def group_parenthesis(tlist): + _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', Parenthesis) + +def group_comments(tlist): + [group_comments(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Comment)] + idx = 0 + token = tlist.token_next_by_type(idx, T.Comment) + while token: + tidx = tlist.token_index(token) + end = tlist.token_not_matching(tidx+1, + [lambda t: t.ttype in T.Comment, + lambda t: t.is_whitespace()]) + if end is None: + idx = tidx + 1 + else: + eidx = tlist.token_index(end) + grp_tokens = tlist.tokens_between(token, + tlist.token_prev(eidx, False)) + group = tlist.group_tokens(Comment, grp_tokens) + idx = tlist.token_index(group) + token = tlist.token_next_by_type(idx, T.Comment) + +def group_where(tlist): + [group_where(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Where)] + idx = 0 + token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION') + while token: + tidx = tlist.token_index(token) + end = tlist.token_next_match(tidx+1, T.Keyword, stopwords) + if end is None: + end = tlist.tokens[-1] + else: + end = tlist.tokens[tlist.token_index(end)-1] + group = tlist.group_tokens(Where, tlist.tokens_between(token, end)) + idx = tlist.token_index(group) + token = tlist.token_next_match(idx, T.Keyword, 'WHERE') + +def group_aliased(tlist): + [group_aliased(sgroup) for sgroup in tlist.get_sublists() + if not isinstance(sgroup, Identifier)] + idx = 0 + token = tlist.token_next_by_instance(idx, Identifier) + while token: + next_ = tlist.token_next(tlist.token_index(token)) + if next_ is not None and isinstance(next_, Identifier): + grp = tlist.tokens_between(token, next_)[1:] + token.tokens.extend(grp) + for t in grp: + tlist.tokens.remove(t) + idx = tlist.token_index(token)+1 + token = tlist.token_next_by_instance(idx, Identifier) + + +def group_typecasts(tlist): + _group_left_right(tlist, T.Punctuation, '::', Identifier) + + +def group(tlist): + for func in [group_parenthesis, + group_comments, + group_where, + group_case, + group_identifier, + group_typecasts, + group_as, + group_aliased, + group_assignment, + group_comparsion, + group_identifier_list, + group_if, + group_for,]: + func(tlist) diff --git a/sqlparse/filters.py b/sqlparse/filters.py new file mode 100644 index 0000000..695b298 --- /dev/null +++ b/sqlparse/filters.py @@ -0,0 +1,432 @@ +# -*- coding: utf-8 -*- + +import re + +from sqlparse.engine import grouping +from sqlparse import tokens as T + + +class Filter(object): + + def process(self, *args): + raise NotImplementedError + + +class TokenFilter(Filter): + + def process(self, stack, stream): + raise NotImplementedError + + +# FIXME: Should be removed +def rstrip(stream): + buff = [] + for token in stream: + if token.is_whitespace() and '\n' in token.value: + # assuming there's only one \n in value + before, rest = token.value.split('\n', 1) + token.value = '\n%s' % rest + buff = [] + yield token + elif token.is_whitespace(): + buff.append(token) + elif token.is_group(): + token.tokens = list(rstrip(token.tokens)) + # process group and look if it starts with a nl + if token.tokens and token.tokens[0].is_whitespace(): + before, rest = token.tokens[0].value.split('\n', 1) + token.tokens[0].value = '\n%s' % rest + buff = [] + while buff: + yield buff.pop(0) + yield token + else: + while buff: + yield buff.pop(0) + yield token + + +# -------------------------- +# token process + +class _CaseFilter(TokenFilter): + + ttype = None + + def __init__(self, case=None): + if case is None: + case = 'upper' + assert case in ['lower', 'upper', 'capitalize'] + self.convert = getattr(unicode, case) + + def process(self, stack, stream): + for ttype, value in stream: + if ttype in self.ttype: + value = self.convert(value) + yield ttype, value + + +class KeywordCaseFilter(_CaseFilter): + ttype = T.Keyword + + +class IdentifierCaseFilter(_CaseFilter): + ttype = (T.Name, T.String.Symbol) + + +# ---------------------- +# statement process + +class StripCommentsFilter(Filter): + + def _process(self, tlist): + idx = 0 + clss = set([x.__class__ for x in tlist.tokens]) + while grouping.Comment in clss: + token = tlist.token_next_by_instance(0, grouping.Comment) + tidx = tlist.token_index(token) + prev = tlist.token_prev(tidx, False) + next_ = tlist.token_next(tidx, False) + # Replace by whitespace if prev and next exist and if they're not + # whitespaces. This doesn't apply if prev or next is a paranthesis. + if (prev is not None and next_ is not None + and not prev.is_whitespace() and not next_.is_whitespace() + and not (prev.match(T.Punctuation, '(') + or next_.match(T.Punctuation, ')'))): + tlist.tokens[tidx] = grouping.Token(T.Whitespace, ' ') + else: + tlist.tokens.pop(tidx) + clss = set([x.__class__ for x in tlist.tokens]) + + def process(self, stack, stmt): + [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] + self._process(stmt) + + +class StripWhitespaceFilter(Filter): + + def _stripws(self, tlist): + func_name = '_stripws_%s' % tlist.__class__.__name__.lower() + func = getattr(self, func_name, self._stripws_default) + func(tlist) + + def _stripws_default(self, tlist): + last_was_ws = False + for token in tlist.tokens: + if token.is_whitespace(): + if last_was_ws: + token.value = '' + else: + token.value = ' ' + last_was_ws = token.is_whitespace() + + def _stripws_parenthesis(self, tlist): + if tlist.tokens[1].is_whitespace(): + tlist.tokens.pop(1) + if tlist.tokens[-2].is_whitespace(): + tlist.tokens.pop(-2) + self._stripws_default(tlist) + + def process(self, stack, stmt): + [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] + self._stripws(stmt) + if stmt.tokens[-1].is_whitespace(): + stmt.tokens.pop(-1) + + +class ReindentFilter(Filter): + + def __init__(self, width=2, char=' ', line_width=None): + self.width = width + self.char = char + self.indent = 0 + self.offset = 0 + self.line_width = line_width + self._curr_stmt = None + self._last_stmt = None + + def _get_offset(self, token): + all_ = list(self._curr_stmt.flatten()) + idx = all_.index(token) + raw = ''.join(unicode(x) for x in all_[:idx+1]) + line = raw.splitlines()[-1] + # Now take current offset into account and return relative offset. + full_offset = len(line)-(len(self.char*(self.width*self.indent))) + return full_offset - self.offset + + def nl(self): + # TODO: newline character should be configurable + ws = '\n'+(self.char*((self.indent*self.width)+self.offset)) + return grouping.Token(T.Whitespace, ws) + + def _split_kwds(self, tlist): + split_words = ('FROM', 'JOIN$', 'AND', 'OR', + 'GROUP', 'ORDER', 'UNION', 'VALUES') + idx = 0 + token = tlist.token_next_match(idx, T.Keyword, split_words, + regex=True) + while token: + prev = tlist.token_prev(tlist.token_index(token), False) + offset = 1 + if prev and prev.is_whitespace(): + tlist.tokens.pop(tlist.token_index(prev)) + offset += 1 + nl = self.nl() + tlist.insert_before(token, nl) + token = tlist.token_next_match(tlist.token_index(nl)+offset, + T.Keyword, split_words, regex=True) + + def _split_statements(self, tlist): + idx = 0 + token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) + while token: + prev = tlist.token_prev(tlist.token_index(token), False) + if prev and prev.is_whitespace(): + tlist.tokens.pop(tlist.token_index(prev)) + # only break if it's not the first token + if prev: + nl = self.nl() + tlist.insert_before(token, nl) + token = tlist.token_next_by_type(tlist.token_index(token)+1, + (T.Keyword.DDL, T.Keyword.DML)) + + def _process(self, tlist): + func_name = '_process_%s' % tlist.__class__.__name__.lower() + func = getattr(self, func_name, self._process_default) + func(tlist) + + def _process_where(self, tlist): + token = tlist.token_next_match(0, T.Keyword, 'WHERE') + tlist.insert_before(token, self.nl()) + self.indent += 1 + self._process_default(tlist) + self.indent -= 1 + + def _process_parenthesis(self, tlist): + first = tlist.token_next(0) + indented = False + if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): + self.indent += 1 + tlist.tokens.insert(0, self.nl()) + indented = True + num_offset = self._get_offset(tlist.token_next_match(0, + T.Punctuation, '(')) + self.offset += num_offset + self._process_default(tlist, stmts=not indented) + if indented: + self.indent -= 1 + self.offset -= num_offset + + def _process_identifierlist(self, tlist): + identifiers = tlist.get_identifiers() + if len(identifiers) > 1: + first = list(identifiers[0].flatten())[0] + num_offset = self._get_offset(first)-len(first.value) + self.offset += num_offset + for token in identifiers[1:]: + tlist.insert_before(token, self.nl()) + self.offset -= num_offset + self._process_default(tlist) + + def _process_case(self, tlist): + cases = tlist.get_cases() + is_first = True + num_offset = None + case = tlist.tokens[0] + outer_offset = self._get_offset(case)-len(case.value) + self.offset += outer_offset + for cond, value in tlist.get_cases(): + if is_first: + is_first = False + num_offset = self._get_offset(cond[0])-len(cond[0].value) + self.offset += num_offset + continue + if cond is None: + token = value[0] + else: + token = cond[0] + tlist.insert_before(token, self.nl()) + # Line breaks on group level are done. Now let's add an offset of + # 5 (=length of "when", "then", "else") and process subgroups. + self.offset += 5 + self._process_default(tlist) + self.offset -= 5 + if num_offset is not None: + self.offset -= num_offset + end = tlist.token_next_match(0, T.Keyword, 'END') + tlist.insert_before(end, self.nl()) + self.offset -= outer_offset + + def _process_default(self, tlist, stmts=True, kwds=True): + if stmts: + self._split_statements(tlist) + if kwds: + self._split_kwds(tlist) + [self._process(sgroup) for sgroup in tlist.get_sublists()] + + def process(self, stack, stmt): + if isinstance(stmt, grouping.Statement): + self._curr_stmt = stmt + self._process(stmt) + if isinstance(stmt, grouping.Statement): + if self._last_stmt is not None: + if self._last_stmt.to_unicode().endswith('\n'): + nl = '\n' + else: + nl = '\n\n' + stmt.tokens.insert(0, + grouping.Token(T.Whitespace, nl)) + if self._last_stmt != stmt: + self._last_stmt = stmt + + +# FIXME: Doesn't work ;) +class RightMarginFilter(Filter): + + keep_together = ( +# grouping.TypeCast, grouping.Identifier, grouping.Alias, + ) + + def __init__(self, width=79): + self.width = width + self.line = '' + + def _process(self, stack, group, stream): + for token in stream: + if token.is_whitespace() and '\n' in token.value: + if token.value.endswith('\n'): + self.line = '' + else: + self.line = token.value.splitlines()[-1] + elif (token.is_group() + and not token.__class__ in self.keep_together): + token.tokens = self._process(stack, token, token.tokens) + else: + val = token.to_unicode() + if len(self.line) + len(val) > self.width: + match = re.search('^ +', self.line) + if match is not None: + indent = match.group() + else: + indent = '' + yield grouping.Token(T.Whitespace, '\n%s' % indent) + self.line = indent + self.line += val + yield token + + def process(self, stack, group): + return + group.tokens = self._process(stack, group, group.tokens) + + +# --------------------------- +# postprocess + +class SerializerUnicode(Filter): + + def process(self, stack, stmt): + raw = stmt.to_unicode() + add_nl = raw.endswith('\n') + res = '\n'.join(line.rstrip() for line in raw.splitlines()) + if add_nl: + res += '\n' + return res + + +class OutputPythonFilter(Filter): + + def __init__(self, varname='sql'): + self.varname = varname + self.cnt = 0 + + def _process(self, stream, varname, count, has_nl): + if count > 1: + yield grouping.Token(T.Whitespace, '\n') + yield grouping.Token(T.Name, varname) + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Operator, '=') + yield grouping.Token(T.Whitespace, ' ') + if has_nl: + yield grouping.Token(T.Operator, '(') + yield grouping.Token(T.Text, "'") + cnt = 0 + for token in stream: + cnt += 1 + if token.is_whitespace() and '\n' in token.value: + if cnt == 1: + continue + after_lb = token.value.split('\n', 1)[1] + yield grouping.Token(T.Text, "'") + yield grouping.Token(T.Whitespace, '\n') + for i in range(len(varname)+4): + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Text, "'") + if after_lb: # it's the indendation + yield grouping.Token(T.Whitespace, after_lb) + continue + elif token.value and "'" in token.value: + token.value = token.value.replace("'", "\\'") + yield grouping.Token(T.Text, token.value or '') + yield grouping.Token(T.Text, "'") + if has_nl: + yield grouping.Token(T.Operator, ')') + + def process(self, stack, stmt): + self.cnt += 1 + if self.cnt > 1: + varname = '%s%d' % (self.varname, self.cnt) + else: + varname = self.varname + has_nl = len(stmt.to_unicode().strip().splitlines()) > 1 + stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl) + return stmt + + +class OutputPHPFilter(Filter): + + def __init__(self, varname='sql'): + self.varname = '$%s' % varname + self.count = 0 + + def _process(self, stream, varname): + if self.count > 1: + yield grouping.Token(T.Whitespace, '\n') + yield grouping.Token(T.Name, varname) + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Operator, '=') + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Text, '"') + cnt = 0 + for token in stream: + if token.is_whitespace() and '\n' in token.value: + cnt += 1 + if cnt == 1: + continue + after_lb = token.value.split('\n', 1)[1] + yield grouping.Token(T.Text, '"') + yield grouping.Token(T.Operator, ';') + yield grouping.Token(T.Whitespace, '\n') + yield grouping.Token(T.Name, varname) + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Punctuation, '.') + yield grouping.Token(T.Operator, '=') + yield grouping.Token(T.Whitespace, ' ') + yield grouping.Token(T.Text, '"') + if after_lb: + yield grouping.Token(T.Text, after_lb) + continue + elif '"' in token.value: + token.value = token.value.replace('"', '\\"') + yield grouping.Token(T.Text, token.value) + yield grouping.Token(T.Text, '"') + yield grouping.Token(T.Punctuation, ';') + + def process(self, stack, stmt): + self.count += 1 + if self.count > 1: + varname = '%s%d' % (self.varname, self.count) + else: + varname = self.varname + stmt.tokens = tuple(self._process(stmt.tokens, varname)) + return stmt + diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py new file mode 100644 index 0000000..9d443ca --- /dev/null +++ b/sqlparse/formatter.py @@ -0,0 +1,163 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL formatter""" + +import logging + +from sqlparse import SQLParseError +from sqlparse import filters + + +def validate_options(options): + """Validates options.""" + kwcase = options.get('keyword_case', None) + if kwcase not in [None, 'upper', 'lower', 'capitalize']: + raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) + + idcase = options.get('identifier_case', None) + if idcase not in [None, 'upper', 'lower', 'capitalize']: + raise SQLParseError('Invalid value for identifier_case: %r' % idcase) + + ofrmt = options.get('output_format', None) + if ofrmt not in [None, 'sql', 'python', 'php']: + raise SQLParseError('Unknown output format: %r' % ofrmt) + + strip_comments = options.get('strip_comments', False) + if strip_comments not in [True, False]: + raise SQLParseError('Invalid value for strip_comments: %r' + % strip_comments) + + strip_ws = options.get('strip_whitespace', False) + if strip_ws not in [True, False]: + raise SQLParseError('Invalid value for strip_whitespace: %r' + % strip_ws) + + reindent = options.get('reindent', False) + if reindent not in [True, False]: + raise SQLParseError('Invalid value for reindent: %r' + % reindent) + elif reindent: + options['strip_whitespace'] = True + indent_tabs = options.get('indent_tabs', False) + if indent_tabs not in [True, False]: + raise SQLParserError('Invalid value for indent_tabs: %r' % indent_tabs) + elif indent_tabs: + options['indent_char'] = '\t' + else: + options['indent_char'] = ' ' + indent_width = options.get('indent_width', 2) + try: + indent_width = int(indent_width) + except (TypeError, ValueError): + raise SQLParseError('indent_width requires an integer') + if indent_width < 1: + raise SQLParseError('indent_width requires an positive integer') + options['indent_width'] = indent_width + + right_margin = options.get('right_margin', None) + if right_margin is not None: + try: + right_margin = int(right_margin) + except (TypeError, ValueError): + raise SQLParseError('right_margin requires an integer') + if right_margin < 10: + raise SQLParseError('right_margin requires an integer > 10') + options['right_margin'] = right_margin + + return options + + +def build_filter_stack(stack, options): + """Setup and return a filter stack. + + Args: + stack: :class:`~sqlparse.filters.FilterStack` instance + options: Dictionary with options validated by validate_options. + """ + # Token filter + if 'keyword_case' in options: + stack.preprocess.append( + filters.KeywordCaseFilter(options['keyword_case'])) + + if 'identifier_case' in options: + stack.preprocess.append( + filters.IdentifierCaseFilter(options['identifier_case'])) + + # After grouping + if options.get('strip_comments', False): + stack.enable_grouping() + stack.stmtprocess.append(filters.StripCommentsFilter()) + + if (options.get('strip_whitespace', False) + or options.get('reindent', False)): + stack.enable_grouping() + stack.stmtprocess.append(filters.StripWhitespaceFilter()) + + if options.get('reindent', False): + stack.enable_grouping() + stack.stmtprocess.append( + filters.ReindentFilter(char=options['indent_char'], + width=options['indent_width'])) + + if options.get('right_margin', False): + stack.enable_grouping() + stack.stmtprocess.append( + filters.RightMarginFilter(width=options['right_margin'])) + + # Serializer + if options.get('output_format'): + frmt = options['output_format'] + if frmt.lower() == 'php': + fltr = filters.OutputPHPFilter() + elif frmt.lower() == 'python': + fltr = filters.OutputPythonFilter() + else: + fltr = None + if fltr is not None: + stack.postprocess.append(fltr) + + return stack + + +def format(statement, **options): + import filters + logging.info('OPTIONS %r', options) + lexer = Lexer() +# lexer.add_filter('whitespace') + lexer.add_filter(filters.GroupFilter()) + if options.get('reindent', False): + lexer.add_filter(filters.StripWhitespaceFilter()) + lexer.add_filter(filters.IndentFilter( + n_indents=options.get('n_indents', 2))) + if options.get('ltrim', False): + lexer.add_filter(filters.LTrimFilter()) + keyword_case = options.get('keyword_case', None) + if keyword_case is not None: + assert keyword_case in ('lower', 'upper', 'capitalize') + lexer.add_filter(filters.KeywordCaseFilter(case=keyword_case)) + identifier_case = options.get('identifier_case', None) + if identifier_case is not None: + assert identifier_case in ('lower', 'upper', 'capitalize') + lexer.add_filter(filters.IdentifierCaseFilter(case=identifier_case)) + if options.get('strip_comments', False): + lexer.add_filter(filters.StripCommentsFilter()) + right_margin = options.get('right_margin', None) + if right_margin is not None: + right_margin = int(right_margin) + assert right_margin > 0 + lexer.add_filter(filters.RightMarginFilter(margin=right_margin)) + lexer.add_filter(filters.UngroupFilter()) + if options.get('output_format', None): + ofrmt = options['output_format'] + assert ofrmt in ('sql', 'python', 'php') + if ofrmt == 'python': + lexer.add_filter(filters.OutputPythonFilter()) + elif ofrmt == 'php': + lexer.add_filter(filters.OutputPHPFilter()) + tokens = [] + for ttype, value in lexer.get_tokens(unicode(statement)): + tokens.append((ttype, value)) + return statement.__class__(tokens) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py new file mode 100644 index 0000000..3f0632e --- /dev/null +++ b/sqlparse/keywords.py @@ -0,0 +1,589 @@ +from sqlparse.tokens import * + +KEYWORDS = { + 'ABORT': Keyword, + 'ABS': Keyword, + 'ABSOLUTE': Keyword, + 'ACCESS': Keyword, + 'ADA': Keyword, + 'ADD': Keyword, + 'ADMIN': Keyword, + 'AFTER': Keyword, + 'AGGREGATE': Keyword, + 'ALIAS': Keyword, + 'ALL': Keyword, + 'ALLOCATE': Keyword, + 'ANALYSE': Keyword, + 'ANALYZE': Keyword, + 'AND': Keyword, + 'ANY': Keyword, + 'ARE': Keyword, + 'AS': Keyword, + 'ASC': Keyword, + 'ASENSITIVE': Keyword, + 'ASSERTION': Keyword, + 'ASSIGNMENT': Keyword, + 'ASYMMETRIC': Keyword, + 'AT': Keyword, + 'ATOMIC': Keyword, + 'AUTHORIZATION': Keyword, + 'AVG': Keyword, + + 'BACKWARD': Keyword, + 'BEFORE': Keyword, + 'BEGIN': Keyword, + 'BETWEEN': Keyword, + 'BITVAR': Keyword, + 'BIT_LENGTH': Keyword, + 'BOTH': Keyword, + 'BREADTH': Keyword, + 'BY': Keyword, + + 'C': Keyword, + 'CACHE': Keyword, + 'CALL': Keyword, + 'CALLED': Keyword, + 'CARDINALITY': Keyword, + 'CASCADE': Keyword, + 'CASCADED': Keyword, + 'CASE': Keyword, + 'CAST': Keyword, + 'CATALOG': Keyword, + 'CATALOG_NAME': Keyword, + 'CHAIN': Keyword, + 'CHARACTERISTICS': Keyword, + 'CHARACTER_LENGTH': Keyword, + 'CHARACTER_SET_CATALOG': Keyword, + 'CHARACTER_SET_NAME': Keyword, + 'CHARACTER_SET_SCHEMA': Keyword, + 'CHAR_LENGTH': Keyword, + 'CHECK': Keyword, + 'CHECKED': Keyword, + 'CHECKPOINT': Keyword, + 'CLASS': Keyword, + 'CLASS_ORIGIN': Keyword, + 'CLOB': Keyword, + 'CLOSE': Keyword, + 'CLUSTER': Keyword, + 'COALSECE': Keyword, + 'COBOL': Keyword, + 'COLLATE': Keyword, + 'COLLATION': Keyword, + 'COLLATION_CATALOG': Keyword, + 'COLLATION_NAME': Keyword, + 'COLLATION_SCHEMA': Keyword, + 'COLUMN': Keyword, + 'COLUMN_NAME': Keyword, + 'COMMAND_FUNCTION': Keyword, + 'COMMAND_FUNCTION_CODE': Keyword, + 'COMMENT': Keyword, + 'COMMIT': Keyword, + 'COMMITTED': Keyword, + 'COMPLETION': Keyword, + 'CONDITION_NUMBER': Keyword, + 'CONNECT': Keyword, + 'CONNECTION': Keyword, + 'CONNECTION_NAME': Keyword, + 'CONSTRAINT': Keyword, + 'CONSTRAINTS': Keyword, + 'CONSTRAINT_CATALOG': Keyword, + 'CONSTRAINT_NAME': Keyword, + 'CONSTRAINT_SCHEMA': Keyword, + 'CONSTRUCTOR': Keyword, + 'CONTAINS': Keyword, + 'CONTINUE': Keyword, + 'CONVERSION': Keyword, + 'CONVERT': Keyword, + 'COPY': Keyword, + 'CORRESPONTING': Keyword, + 'COUNT': Keyword, + 'CREATEDB': Keyword, + 'CREATEUSER': Keyword, + 'CROSS': Keyword, + 'CUBE': Keyword, + 'CURRENT': Keyword, + 'CURRENT_DATE': Keyword, + 'CURRENT_PATH': Keyword, + 'CURRENT_ROLE': Keyword, + 'CURRENT_TIME': Keyword, + 'CURRENT_TIMESTAMP': Keyword, + 'CURRENT_USER': Keyword, + 'CURSOR': Keyword, + 'CURSOR_NAME': Keyword, + 'CYCLE': Keyword, + + 'DATA': Keyword, + 'DATABASE': Keyword, + 'DATETIME_INTERVAL_CODE': Keyword, + 'DATETIME_INTERVAL_PRECISION': Keyword, + 'DAY': Keyword, + 'DEALLOCATE': Keyword, + 'DECLARE': Keyword, + 'DEFAULT': Keyword, + 'DEFAULTS': Keyword, + 'DEFERRABLE': Keyword, + 'DEFERRED': Keyword, + 'DEFINED': Keyword, + 'DEFINER': Keyword, + 'DELIMITER': Keyword, + 'DELIMITERS': Keyword, + 'DEREF': Keyword, + 'DESC': Keyword, + 'DESCRIBE': Keyword, + 'DESCRIPTOR': Keyword, + 'DESTROY': Keyword, + 'DESTRUCTOR': Keyword, + 'DETERMINISTIC': Keyword, + 'DIAGNOSTICS': Keyword, + 'DICTIONARY': Keyword, + 'DISCONNECT': Keyword, + 'DISPATCH': Keyword, + 'DISTINCT': Keyword, + 'DO': Keyword, + 'DOMAIN': Keyword, + 'DYNAMIC': Keyword, + 'DYNAMIC_FUNCTION': Keyword, + 'DYNAMIC_FUNCTION_CODE': Keyword, + + 'EACH': Keyword, + 'ELSE': Keyword, + 'ENCODING': Keyword, + 'ENCRYPTED': Keyword, + 'END': Keyword, + 'END-EXEC': Keyword, + 'EQUALS': Keyword, + 'ESCAPE': Keyword, + 'EVERY': Keyword, + 'EXCEPT': Keyword, + 'ESCEPTION': Keyword, + 'EXCLUDING': Keyword, + 'EXCLUSIVE': Keyword, + 'EXEC': Keyword, + 'EXECUTE': Keyword, + 'EXISTING': Keyword, + 'EXISTS': Keyword, + 'EXTERNAL': Keyword, + 'EXTRACT': Keyword, + + 'FALSE': Keyword, + 'FETCH': Keyword, + 'FINAL': Keyword, + 'FIRST': Keyword, + 'FOR': Keyword, + 'FORCE': Keyword, + 'FOREIGN': Keyword, + 'FORTRAN': Keyword, + 'FORWARD': Keyword, + 'FOUND': Keyword, + 'FREE': Keyword, + 'FREEZE': Keyword, + 'FROM': Keyword, + 'FULL': Keyword, + 'FUNCTION': Keyword, + + 'G': Keyword, + 'GENERAL': Keyword, + 'GENERATED': Keyword, + 'GET': Keyword, + 'GLOBAL': Keyword, + 'GO': Keyword, + 'GOTO': Keyword, + 'GRANT': Keyword, + 'GRANTED': Keyword, + 'GROUP': Keyword, + 'GROUPING': Keyword, + + 'HANDLER': Keyword, + 'HAVING': Keyword, + 'HIERARCHY': Keyword, + 'HOLD': Keyword, + 'HOST': Keyword, + + 'IDENTITY': Keyword, + 'IF': Keyword, + 'IGNORE': Keyword, + 'ILIKE': Keyword, + 'IMMEDIATE': Keyword, + 'IMMUTABLE': Keyword, + + 'IMPLEMENTATION': Keyword, + 'IMPLICIT': Keyword, + 'IN': Keyword, + 'INCLUDING': Keyword, + 'INCREMENT': Keyword, + 'INDEX': Keyword, + + 'INDITCATOR': Keyword, + 'INFIX': Keyword, + 'INHERITS': Keyword, + 'INITIALIZE': Keyword, + 'INITIALLY': Keyword, + 'INNER': Keyword, + 'INOUT': Keyword, + 'INPUT': Keyword, + 'INSENSITIVE': Keyword, + 'INSTANTIABLE': Keyword, + 'INSTEAD': Keyword, + 'INTERSECT': Keyword, + 'INTO': Keyword, + 'INVOKER': Keyword, + 'IS': Keyword, + 'ISNULL': Keyword, + 'ISOLATION': Keyword, + 'ITERATE': Keyword, + + 'JOIN': Keyword, + + 'K': Keyword, + 'KEY': Keyword, + 'KEY_MEMBER': Keyword, + 'KEY_TYPE': Keyword, + + 'LANCOMPILER': Keyword, + 'LANGUAGE': Keyword, + 'LARGE': Keyword, + 'LAST': Keyword, + 'LATERAL': Keyword, + 'LEADING': Keyword, + 'LEFT': Keyword, + 'LENGTH': Keyword, + 'LESS': Keyword, + 'LEVEL': Keyword, + 'LIKE': Keyword, + 'LILMIT': Keyword, + 'LISTEN': Keyword, + 'LOAD': Keyword, + 'LOCAL': Keyword, + 'LOCALTIME': Keyword, + 'LOCALTIMESTAMP': Keyword, + 'LOCATION': Keyword, + 'LOCATOR': Keyword, + 'LOCK': Keyword, + 'LOWER': Keyword, + + 'M': Keyword, + 'MAP': Keyword, + 'MATCH': Keyword, + 'MAX': Keyword, + 'MAXVALUE': Keyword, + 'MESSAGE_LENGTH': Keyword, + 'MESSAGE_OCTET_LENGTH': Keyword, + 'MESSAGE_TEXT': Keyword, + 'METHOD': Keyword, + 'MIN': Keyword, + 'MINUTE': Keyword, + 'MINVALUE': Keyword, + 'MOD': Keyword, + 'MODE': Keyword, + 'MODIFIES': Keyword, + 'MODIFY': Keyword, + 'MONTH': Keyword, + 'MORE': Keyword, + 'MOVE': Keyword, + 'MUMPS': Keyword, + + 'NAMES': Keyword, + 'NATIONAL': Keyword, + 'NATURAL': Keyword, + 'NCHAR': Keyword, + 'NCLOB': Keyword, + 'NEW': Keyword, + 'NEXT': Keyword, + 'NO': Keyword, + 'NOCREATEDB': Keyword, + 'NOCREATEUSER': Keyword, + 'NONE': Keyword, + 'NOT': Keyword, + 'NOTHING': Keyword, + 'NOTIFY': Keyword, + 'NOTNULL': Keyword, + 'NULL': Keyword, + 'NULLABLE': Keyword, + 'NULLIF': Keyword, + + 'OBJECT': Keyword, + 'OCTET_LENGTH': Keyword, + 'OF': Keyword, + 'OFF': Keyword, + 'OFFSET': Keyword, + 'OIDS': Keyword, + 'OLD': Keyword, + 'ON': Keyword, + 'ONLY': Keyword, + 'OPEN': Keyword, + 'OPERATION': Keyword, + 'OPERATOR': Keyword, + 'OPTION': Keyword, + 'OPTIONS': Keyword, + 'OR': Keyword, + 'ORDER': Keyword, + 'ORDINALITY': Keyword, + 'OUT': Keyword, + 'OUTER': Keyword, + 'OUTPUT': Keyword, + 'OVERLAPS': Keyword, + 'OVERLAY': Keyword, + 'OVERRIDING': Keyword, + 'OWNER': Keyword, + + 'PAD': Keyword, + 'PARAMETER': Keyword, + 'PARAMETERS': Keyword, + 'PARAMETER_MODE': Keyword, + 'PARAMATER_NAME': Keyword, + 'PARAMATER_ORDINAL_POSITION': Keyword, + 'PARAMETER_SPECIFIC_CATALOG': Keyword, + 'PARAMETER_SPECIFIC_NAME': Keyword, + 'PARAMATER_SPECIFIC_SCHEMA': Keyword, + 'PARTIAL': Keyword, + 'PASCAL': Keyword, + 'PENDANT': Keyword, + 'PLACING': Keyword, + 'PLI': Keyword, + 'POSITION': Keyword, + 'POSTFIX': Keyword, + 'PRECISION': Keyword, + 'PREFIX': Keyword, + 'PREORDER': Keyword, + 'PREPARE': Keyword, + 'PRESERVE': Keyword, + 'PRIMARY': Keyword, + 'PRIOR': Keyword, + 'PRIVILEGES': Keyword, + 'PROCEDURAL': Keyword, + 'PROCEDURE': Keyword, + 'PUBLIC': Keyword, + + 'RAISE': Keyword, + 'READ': Keyword, + 'READS': Keyword, + 'RECHECK': Keyword, + 'RECURSIVE': Keyword, + 'REF': Keyword, + 'REFERENCES': Keyword, + 'REFERENCING': Keyword, + 'REINDEX': Keyword, + 'RELATIVE': Keyword, + 'RENAME': Keyword, + 'REPEATABLE': Keyword, + 'REPLACE': Keyword, + 'RESET': Keyword, + 'RESTART': Keyword, + 'RESTRICT': Keyword, + 'RESULT': Keyword, + 'RETURN': Keyword, + 'RETURNED_LENGTH': Keyword, + 'RETURNED_OCTET_LENGTH': Keyword, + 'RETURNED_SQLSTATE': Keyword, + 'RETURNS': Keyword, + 'REVOKE': Keyword, + 'RIGHT': Keyword, + 'ROLE': Keyword, + 'ROLLBACK': Keyword, + 'ROLLUP': Keyword, + 'ROUTINE': Keyword, + 'ROUTINE_CATALOG': Keyword, + 'ROUTINE_NAME': Keyword, + 'ROUTINE_SCHEMA': Keyword, + 'ROW': Keyword, + 'ROWS': Keyword, + 'ROW_COUNT': Keyword, + 'RULE': Keyword, + + 'SAVE_POINT': Keyword, + 'SCALE': Keyword, + 'SCHEMA': Keyword, + 'SCHEMA_NAME': Keyword, + 'SCOPE': Keyword, + 'SCROLL': Keyword, + 'SEARCH': Keyword, + 'SECOND': Keyword, + 'SECURITY': Keyword, + 'SELF': Keyword, + 'SENSITIVE': Keyword, + 'SERIALIZABLE': Keyword, + 'SERVER_NAME': Keyword, + 'SESSION': Keyword, + 'SESSION_USER': Keyword, + 'SETOF': Keyword, + 'SETS': Keyword, + 'SHARE': Keyword, + 'SHOW': Keyword, + 'SIMILAR': Keyword, + 'SIMPLE': Keyword, + 'SIZE': Keyword, + 'SOME': Keyword, + 'SOURCE': Keyword, + 'SPACE': Keyword, + 'SPECIFIC': Keyword, + 'SPECIFICTYPE': Keyword, + 'SPECIFIC_NAME': Keyword, + 'SQL': Keyword, + 'SQLCODE': Keyword, + 'SQLERROR': Keyword, + 'SQLEXCEPTION': Keyword, + 'SQLSTATE': Keyword, + 'SQLWARNINIG': Keyword, + 'STABLE': Keyword, + 'START': Keyword, + 'STATE': Keyword, + 'STATEMENT': Keyword, + 'STATIC': Keyword, + 'STATISTICS': Keyword, + 'STDIN': Keyword, + 'STDOUT': Keyword, + 'STORAGE': Keyword, + 'STRICT': Keyword, + 'STRUCTURE': Keyword, + 'STYPE': Keyword, + 'SUBCLASS_ORIGIN': Keyword, + 'SUBLIST': Keyword, + 'SUBSTRING': Keyword, + 'SUM': Keyword, + 'SYMMETRIC': Keyword, + 'SYSID': Keyword, + 'SYSTEM': Keyword, + 'SYSTEM_USER': Keyword, + + 'TABLE': Keyword, + 'TABLE_NAME': Keyword, + ' TEMP': Keyword, + 'TEMPLATE': Keyword, + 'TEMPORARY': Keyword, + 'TERMINATE': Keyword, + 'THAN': Keyword, + 'THEN': Keyword, + 'TIMESTAMP': Keyword, + 'TIMEZONE_HOUR': Keyword, + 'TIMEZONE_MINUTE': Keyword, + 'TO': Keyword, + 'TOAST': Keyword, + 'TRAILING': Keyword, + 'TRANSATION': Keyword, + 'TRANSACTIONS_COMMITTED': Keyword, + 'TRANSACTIONS_ROLLED_BACK': Keyword, + 'TRANSATION_ACTIVE': Keyword, + 'TRANSFORM': Keyword, + 'TRANSFORMS': Keyword, + 'TRANSLATE': Keyword, + 'TRANSLATION': Keyword, + 'TREAT': Keyword, + 'TRIGGER': Keyword, + 'TRIGGER_CATALOG': Keyword, + 'TRIGGER_NAME': Keyword, + 'TRIGGER_SCHEMA': Keyword, + 'TRIM': Keyword, + 'TRUE': Keyword, + 'TRUNCATE': Keyword, + 'TRUSTED': Keyword, + 'TYPE': Keyword, + + 'UNCOMMITTED': Keyword, + 'UNDER': Keyword, + 'UNENCRYPTED': Keyword, + 'UNION': Keyword, + 'UNIQUE': Keyword, + 'UNKNOWN': Keyword, + 'UNLISTEN': Keyword, + 'UNNAMED': Keyword, + 'UNNEST': Keyword, + 'UNTIL': Keyword, + 'UPPER': Keyword, + 'USAGE': Keyword, + 'USER': Keyword, + 'USER_DEFINED_TYPE_CATALOG': Keyword, + 'USER_DEFINED_TYPE_NAME': Keyword, + 'USER_DEFINED_TYPE_SCHEMA': Keyword, + 'USING': Keyword, + + 'VACUUM': Keyword, + 'VALID': Keyword, + 'VALIDATOR': Keyword, + 'VALUES': Keyword, + 'VARIABLE': Keyword, + 'VERBOSE': Keyword, + 'VERSION': Keyword, + 'VIEW': Keyword, + 'VOLATILE': Keyword, + + 'WHEN': Keyword, + 'WHENEVER': Keyword, + 'WHERE': Keyword, + 'WITH': Keyword, + 'WITHOUT': Keyword, + 'WORK': Keyword, + 'WRITE': Keyword, + + 'YEAR': Keyword, + + 'ZONE': Keyword, + + + 'ARRAY': Name.Builtin, + 'BIGINT': Name.Builtin, + 'BINARY': Name.Builtin, + 'BIT': Name.Builtin, + 'BLOB': Name.Builtin, + 'BOOLEAN': Name.Builtin, + 'CHAR': Name.Builtin, + 'CHARACTER': Name.Builtin, + 'DATE': Name.Builtin, + 'DEC': Name.Builtin, + 'DECIMAL': Name.Builtin, + 'FLOAT': Name.Builtin, + 'INT': Name.Builtin, + 'INTEGER': Name.Builtin, + 'INTERVAL': Name.Builtin, + 'NUMBER': Name.Builtin, + 'NUMERIC': Name.Builtin, + 'REAL': Name.Builtin, + 'SERIAL': Name.Builtin, + 'SMALLINT': Name.Builtin, + 'VARCHAR': Name.Builtin, + 'VARYING': Name.Builtin, + 'INT8': Name.Builtin, + 'SERIAL8': Name.Builtin, + 'TEXT': Name.Builtin, + } + + +KEYWORDS_COMMON = { + 'SELECT': Keyword.DML, + 'INSERT': Keyword.DML, + 'DELETE': Keyword.DML, + 'UPDATE': Keyword.DML, + 'DROP': Keyword.DDL, + 'CREATE': Keyword.DDL, + 'ALTER': Keyword.DDL, + + 'WHERE': Keyword, + 'FROM': Keyword, + 'INNER': Keyword, + 'JOIN': Keyword, + 'AND': Keyword, + 'OR': Keyword, + 'LIKE': Keyword, + 'ON': Keyword, + 'IN': Keyword, + + 'BY': Keyword, + 'GROUP': Keyword, + 'ORDER': Keyword, + 'LEFT': Keyword, + 'OUTER': Keyword, + + 'IF': Keyword, + 'END': Keyword, + 'THEN': Keyword, + 'LOOP': Keyword, + 'AS': Keyword, + 'ELSE': Keyword, + 'FOR': Keyword, + + 'CASE': Keyword, + 'WHEN': Keyword, + 'MIN': Keyword, + 'MAX': Keyword, + 'DISTINCT': Keyword, + + } diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py new file mode 100644 index 0000000..b635fc6 --- /dev/null +++ b/sqlparse/lexer.py @@ -0,0 +1,310 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL Lexer""" + +# This code is based on the SqlLexer in pygments. +# http://pygments.org/ +# It's separated from the rest of pygments to increase performance +# and to allow some customizations. + +import re + +from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON +from sqlparse.tokens import * +from sqlparse.tokens import _TokenType + + +class include(str): + pass + +class combined(tuple): + """Indicates a state combined from multiple states.""" + + def __new__(cls, *args): + return tuple.__new__(cls, args) + + def __init__(self, *args): + # tuple.__init__ doesn't do anything + pass + +def is_keyword(value): + test = value.upper() + return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, Name)), value + + +def apply_filters(stream, filters, lexer=None): + """ + Use this method to apply an iterable of filters to + a stream. If lexer is given it's forwarded to the + filter, otherwise the filter receives `None`. + """ + def _apply(filter_, stream): + for token in filter_.filter(lexer, stream): + yield token + for filter_ in filters: + stream = _apply(filter_, stream) + return stream + + +class LexerMeta(type): + """ + Metaclass for Lexer, creates the self._tokens attribute from + self.tokens on the first instantiation. + """ + + def _process_state(cls, unprocessed, processed, state): + assert type(state) is str, "wrong state name %r" % state + assert state[0] != '#', "invalid state name %r" % state + if state in processed: + return processed[state] + tokens = processed[state] = [] + rflags = cls.flags + for tdef in unprocessed[state]: + if isinstance(tdef, include): + # it's a state reference + assert tdef != state, "circular state reference %r" % state + tokens.extend(cls._process_state(unprocessed, processed, str(tdef))) + continue + + assert type(tdef) is tuple, "wrong rule def %r" % tdef + + try: + rex = re.compile(tdef[0], rflags).match + except Exception, err: + raise ValueError("uncompilable regex %r in state %r of %r: %s" % + (tdef[0], state, cls, err)) + + assert type(tdef[1]) is _TokenType or callable(tdef[1]), \ + 'token type must be simple type or callable, not %r' % (tdef[1],) + + if len(tdef) == 2: + new_state = None + else: + tdef2 = tdef[2] + if isinstance(tdef2, str): + # an existing state + if tdef2 == '#pop': + new_state = -1 + elif tdef2 in unprocessed: + new_state = (tdef2,) + elif tdef2 == '#push': + new_state = tdef2 + elif tdef2[:5] == '#pop:': + new_state = -int(tdef2[5:]) + else: + assert False, 'unknown new state %r' % tdef2 + elif isinstance(tdef2, combined): + # combine a new state from existing ones + new_state = '_tmp_%d' % cls._tmpname + cls._tmpname += 1 + itokens = [] + for istate in tdef2: + assert istate != state, 'circular state ref %r' % istate + itokens.extend(cls._process_state(unprocessed, + processed, istate)) + processed[new_state] = itokens + new_state = (new_state,) + elif isinstance(tdef2, tuple): + # push more than one state + for state in tdef2: + assert (state in unprocessed or + state in ('#pop', '#push')), \ + 'unknown new state ' + state + new_state = tdef2 + else: + assert False, 'unknown new state def %r' % tdef2 + tokens.append((rex, tdef[1], new_state)) + return tokens + + def process_tokendef(cls): + cls._all_tokens = {} + cls._tmpname = 0 + processed = cls._all_tokens[cls.__name__] = {} + #tokendefs = tokendefs or cls.tokens[name] + for state in cls.tokens.keys(): + cls._process_state(cls.tokens, processed, state) + return processed + + def __call__(cls, *args, **kwds): + if not hasattr(cls, '_tokens'): + cls._all_tokens = {} + cls._tmpname = 0 + if hasattr(cls, 'token_variants') and cls.token_variants: + # don't process yet + pass + else: + cls._tokens = cls.process_tokendef() + + return type.__call__(cls, *args, **kwds) + + + + +class Lexer: + + __metaclass__ = LexerMeta + + encoding = 'utf-8' + stripall = False + stripnl = False + tabsize = 0 + flags = re.IGNORECASE + + tokens = { + 'root': [ + (r'--.*?(\r|\n|\r\n)', Comment.Single), + (r'(\r|\n|\r\n)', Newline), + (r'\s+', Whitespace), + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r':=', Assignment), + (r'::', Punctuation), + (r'[*]', Wildcard), + (r'[+/<>=~!@#%^&|`?^-]', Operator), + (r'[0-9]+', Number.Integer), + # TODO: Backslash escapes? + (r"'(''|[^'])*'", String.Single), + (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL + (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN', Keyword), + (r'END( IF| LOOP)?', Keyword), + (r'CREATE( OR REPLACE)?', Keyword.DDL), + (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword), + (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', Name.Builtin), + (r'[;:()\[\],\.]', Punctuation), + ], + 'multiline-comments': [ + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[^/\*]+', Comment.Multiline), + (r'[/*]', Comment.Multiline) + ] + } + + def __init__(self): + self.filters = [] + + def add_filter(self, filter_, **options): + from sqlparse.filters import Filter + if not isinstance(filter_, Filter): + filter_ = filter_(**options) + self.filters.append(filter_) + + def get_tokens(self, text, unfiltered=False): + """ + Return an iterable of (tokentype, value) pairs generated from + `text`. If `unfiltered` is set to `True`, the filtering mechanism + is bypassed even if filters are defined. + + Also preprocess the text, i.e. expand tabs and strip it if + wanted and applies registered filters. + """ + if not isinstance(text, unicode): + if self.encoding == 'guess': + try: + text = text.decode('utf-8') + if text.startswith(u'\ufeff'): + text = text[len(u'\ufeff'):] + except UnicodeDecodeError: + text = text.decode('latin1') + elif self.encoding == 'chardet': + try: + import chardet + except ImportError: + raise ImportError('To enable chardet encoding guessing, ' + 'please install the chardet library ' + 'from http://chardet.feedparser.org/') + enc = chardet.detect(text) + text = text.decode(enc['encoding']) + else: + text = text.decode(self.encoding) + if self.stripall: + text = text.strip() + elif self.stripnl: + text = text.strip('\n') + if self.tabsize > 0: + text = text.expandtabs(self.tabsize) +# if not text.endswith('\n'): +# text += '\n' + + def streamer(): + for i, t, v in self.get_tokens_unprocessed(text): + yield t, v + stream = streamer() + if not unfiltered: + stream = apply_filters(stream, self.filters, self) + return stream + + + def get_tokens_unprocessed(self, text, stack=('root',)): + """ + Split ``text`` into (tokentype, text) pairs. + + ``stack`` is the inital stack (default: ``['root']``) + """ + pos = 0 + tokendefs = self._tokens + statestack = list(stack) + statetokens = tokendefs[statestack[-1]] + known_names = {} + while 1: + for rexmatch, action, new_state in statetokens: + m = rexmatch(text, pos) + if m: + # print rex.pattern + value = m.group() + if value in known_names: + yield pos, known_names[value], value + elif type(action) is _TokenType: + yield pos, action, value + elif hasattr(action, '__call__'): + ttype, value = action(value) + known_names[value] = ttype + yield pos, ttype, value + else: + for item in action(self, m): + yield item + pos = m.end() + if new_state is not None: + # state transition + if isinstance(new_state, tuple): + for state in new_state: + if state == '#pop': + statestack.pop() + elif state == '#push': + statestack.append(statestack[-1]) + else: + statestack.append(state) + elif isinstance(new_state, int): + # pop + del statestack[new_state:] + elif new_state == '#push': + statestack.append(statestack[-1]) + else: + assert False, "wrong state def: %r" % new_state + statetokens = tokendefs[statestack[-1]] + break + else: + try: + if text[pos] == '\n': + # at EOL, reset state to "root" + pos += 1 + statestack = ['root'] + statetokens = tokendefs['root'] + yield pos, Text, u'\n' + continue + yield pos, Error, text[pos] + pos += 1 + except IndexError: + break + + +def tokenize(sql): + """Tokenize sql. + + Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream + of ``(token type, value)`` items. + """ + lexer = Lexer() + return lexer.get_tokens(sql) diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py new file mode 100644 index 0000000..2c63c41 --- /dev/null +++ b/sqlparse/tokens.py @@ -0,0 +1,131 @@ +# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +# The Token implementation is based on pygment's token system written +# by Georg Brandl. +# http://pygments.org/ + +"""Tokens""" + +try: + set +except NameError: + from sets import Set as set + + +class _TokenType(tuple): + parent = None + + def split(self): + buf = [] + node = self + while node is not None: + buf.append(node) + node = node.parent + buf.reverse() + return buf + + def __init__(self, *args): + # no need to call super.__init__ + self.subtypes = set() + + def __contains__(self, val): + return self is val or ( + type(val) is self.__class__ and + val[:len(self)] == self + ) + + def __getattr__(self, val): + if not val or not val[0].isupper(): + return tuple.__getattribute__(self, val) + new = _TokenType(self + (val,)) + setattr(self, val, new) + self.subtypes.add(new) + new.parent = self + return new + + def __hash__(self): + return hash(tuple(self)) + + def __repr__(self): + return 'Token' + (self and '.' or '') + '.'.join(self) + + +Token = _TokenType() + +# Special token types +Text = Token.Text +Whitespace = Text.Whitespace +Newline = Whitespace.Newline +Error = Token.Error +# Text that doesn't belong to this lexer (e.g. HTML in PHP) +Other = Token.Other + +# Common token types for source code +Keyword = Token.Keyword +Name = Token.Name +Literal = Token.Literal +String = Literal.String +Number = Literal.Number +Punctuation = Token.Punctuation +Operator = Token.Operator +Wildcard = Token.Wildcard +Comment = Token.Comment +Assignment = Token.Assignement + +# Generic types for non-source code +Generic = Token.Generic + +# String and some others are not direct childs of Token. +# alias them: +Token.Token = Token +Token.String = String +Token.Number = Number + +# SQL specific tokens +DML = Keyword.DML +DDL = Keyword.DDL +Command = Keyword.Command + +Group = Token.Group +Group.Parenthesis = Token.Group.Parenthesis +Group.Comment = Token.Group.Comment +Group.Where = Token.Group.Where + + +def is_token_subtype(ttype, other): + """ + Return True if ``ttype`` is a subtype of ``other``. + + exists for backwards compatibility. use ``ttype in other`` now. + """ + return ttype in other + + +def string_to_tokentype(s): + """ + Convert a string into a token type:: + + >>> string_to_token('String.Double') + Token.Literal.String.Double + >>> string_to_token('Token.Literal.Number') + Token.Literal.Number + >>> string_to_token('') + Token + + Tokens that are already tokens are returned unchanged: + + >>> string_to_token(String) + Token.Literal.String + """ + if isinstance(s, _TokenType): + return s + if not s: + return Token + node = Token + for item in s.split('.'): + node = getattr(node, item) + return node + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/files/begintag.sql b/tests/files/begintag.sql new file mode 100644 index 0000000..699b365 --- /dev/null +++ b/tests/files/begintag.sql @@ -0,0 +1,4 @@ +begin; +update foo + set bar = 1; +commit; \ No newline at end of file diff --git a/tests/files/dashcomment.sql b/tests/files/dashcomment.sql new file mode 100644 index 0000000..0d5ac62 --- /dev/null +++ b/tests/files/dashcomment.sql @@ -0,0 +1,5 @@ +select * from user; +--select * from host; +select * from user; +select * -- foo; +from foo; \ No newline at end of file diff --git a/tests/files/function.sql b/tests/files/function.sql new file mode 100644 index 0000000..d19227f --- /dev/null +++ b/tests/files/function.sql @@ -0,0 +1,13 @@ +CREATE OR REPLACE FUNCTION foo( + p_in1 VARCHAR + , p_in2 INTEGER +) RETURNS INTEGER AS + + DECLARE + v_foo INTEGER; + BEGIN + SELECT * + FROM foo + INTO v_foo; + RETURN v_foo.id; + END; \ No newline at end of file diff --git a/tests/files/function_psql.sql b/tests/files/function_psql.sql new file mode 100644 index 0000000..e485f7a --- /dev/null +++ b/tests/files/function_psql.sql @@ -0,0 +1,72 @@ +CREATE OR REPLACE FUNCTION public.delete_data ( + p_tabelle VARCHAR + , p_key VARCHAR + , p_value INTEGER +) RETURNS INTEGER AS +$$ +DECLARE + p_retval INTEGER; + v_constraint RECORD; + v_count INTEGER; + v_data RECORD; + v_fieldname VARCHAR; + v_sql VARCHAR; + v_key VARCHAR; + v_value INTEGER; +BEGIN + v_sql := 'SELECT COUNT(*) FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value; + --RAISE NOTICE '%', v_sql; + EXECUTE v_sql INTO v_count; + IF v_count::integer != 0 THEN + SELECT att.attname + INTO v_key + FROM pg_attribute att + LEFT JOIN pg_constraint con ON con.conrelid = att.attrelid + AND con.conkey[1] = att.attnum + AND con.contype = 'p', pg_type typ, pg_class rel, pg_namespace ns + WHERE att.attrelid = rel.oid + AND att.attnum > 0 + AND typ.oid = att.atttypid + AND att.attisdropped = false + AND rel.relname = p_tabelle + AND con.conkey[1] = 1 + AND ns.oid = rel.relnamespace + AND ns.nspname = 'public' + ORDER BY att.attnum; + v_sql := 'SELECT ' || v_key || ' AS id FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value; + FOR v_data IN EXECUTE v_sql + LOOP + --RAISE NOTICE ' -> % %', p_tabelle, v_data.id; + FOR v_constraint IN SELECT t.constraint_name + , t.constraint_type + , t.table_name + , c.column_name + FROM public.v_table_constraints t + , public.v_constraint_columns c + WHERE t.constraint_name = c.constraint_name + AND t.constraint_type = 'FOREIGN KEY' + AND c.table_name = p_tabelle + AND t.table_schema = 'public' + AND c.table_schema = 'public' + LOOP + v_fieldname := substring(v_constraint.constraint_name from 1 for length(v_constraint.constraint_name) - length(v_constraint.column_name) - 1); + IF (v_constraint.table_name = p_tabelle) AND (p_value = v_data.id) THEN + --RAISE NOTICE 'Skip (Selbstverweis)'; + CONTINUE; + ELSE + PERFORM delete_data(v_constraint.table_name::varchar, v_fieldname::varchar, v_data.id::integer); + END IF; + END LOOP; + END LOOP; + v_sql := 'DELETE FROM ' || p_tabelle || ' WHERE ' || p_key || ' = ' || p_value; + --RAISE NOTICE '%', v_sql; + EXECUTE v_sql; + p_retval := 1; + ELSE + --RAISE NOTICE ' -> Keine Sätze gefunden'; + p_retval := 0; + END IF; + RETURN p_retval; +END; +$$ +LANGUAGE plpgsql; \ No newline at end of file diff --git a/tests/files/function_psql2.sql b/tests/files/function_psql2.sql new file mode 100644 index 0000000..b5d494c --- /dev/null +++ b/tests/files/function_psql2.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION update_something() RETURNS void AS +$body$ +BEGIN + raise notice 'foo'; +END; +$body$ +LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER; \ No newline at end of file diff --git a/tests/files/function_psql3.sql b/tests/files/function_psql3.sql new file mode 100644 index 0000000..b25d818 --- /dev/null +++ b/tests/files/function_psql3.sql @@ -0,0 +1,8 @@ +CREATE OR REPLACE FUNCTION foo() RETURNS integer AS +$body$ +DECLARE +BEGIN + select * from foo; +END; +$body$ +LANGUAGE 'plpgsql' VOLATILE CALLED ON NULL INPUT SECURITY INVOKER; \ No newline at end of file diff --git a/tests/run_tests.py b/tests/run_tests.py new file mode 100755 index 0000000..1c7960e --- /dev/null +++ b/tests/run_tests.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Test runner for sqlparse.""" + +import os +import sys +import unittest + +sys.path.insert(1, os.path.join(os.path.dirname(__file__), '../')) + + +def main(): + """Create a TestSuite and run it.""" + loader = unittest.TestLoader() + suite = unittest.TestSuite() + fnames = [os.path.split(f)[-1] for f in sys.argv[1:]] + for fname in os.listdir(os.path.dirname(__file__)): + if (not fname.startswith('test_') or not fname.endswith('.py') + or (fnames and fname not in fnames)): + continue + modname = os.path.splitext(fname)[0] + mod = __import__(os.path.splitext(fname)[0]) + suite.addTests(loader.loadTestsFromModule(mod)) + unittest.TextTestRunner(verbosity=2).run(suite) + + + + +if __name__ == '__main__': + main() diff --git a/tests/test_format.py b/tests/test_format.py new file mode 100644 index 0000000..5748704 --- /dev/null +++ b/tests/test_format.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- + +from tests.utils import TestCaseBase + +import sqlparse + + +class TestFormat(TestCaseBase): + + def test_keywordcase(self): + sql = 'select * from bar; -- select foo\n' + res = sqlparse.format(sql, keyword_case='upper') + self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- select foo\n') + res = sqlparse.format(sql, keyword_case='capitalize') + self.ndiffAssertEqual(res, 'Select * From bar; -- select foo\n') + res = sqlparse.format(sql.upper(), keyword_case='lower') + self.ndiffAssertEqual(res, 'select * from BAR; -- SELECT FOO\n') + self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql, + keyword_case='foo') + + def test_identifiercase(self): + sql = 'select * from bar; -- select foo\n' + res = sqlparse.format(sql, identifier_case='upper') + self.ndiffAssertEqual(res, 'select * from BAR; -- select foo\n') + res = sqlparse.format(sql, identifier_case='capitalize') + self.ndiffAssertEqual(res, 'select * from Bar; -- select foo\n') + res = sqlparse.format(sql.upper(), identifier_case='lower') + self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- SELECT FOO\n') + self.assertRaises(sqlparse.SQLParseError, sqlparse.format, sql, + identifier_case='foo') + sql = 'select * from "foo"."bar"' + res = sqlparse.format(sql, identifier_case="upper") + self.ndiffAssertEqual(res, 'select * from "FOO"."BAR"') + + def test_strip_comments_single(self): + sql = 'select *-- statement starts here\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select * from foo') + sql = 'select * -- statement starts here\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select * from foo') + sql = 'select-- foo\nfrom -- bar\nwhere' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select from where') + + def test_strip_comments_multi(self): + sql = '/* sql starts here */\nselect' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select') + sql = '/* sql starts here */ select' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select') + sql = '/*\n * sql starts here\n */\nselect' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select') + sql = 'select (/* sql starts here */ select 2)' + res = sqlparse.format(sql, strip_comments=True) + self.ndiffAssertEqual(res, 'select (select 2)') + + def test_strip_ws(self): + f = lambda sql: sqlparse.format(sql, strip_whitespace=True) + s = 'select\n* from foo\n\twhere ( 1 = 2 )\n' + self.ndiffAssertEqual(f(s), 'select * from foo where (1 = 2)') + s = 'select -- foo\nfrom bar\n' + self.ndiffAssertEqual(f(s), 'select -- foo\nfrom bar') + + +class TestFormatReindent(TestCaseBase): + + def test_stmts(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select foo; select bar' + self.ndiffAssertEqual(f(s), 'select foo;\n\nselect bar') + s = 'select foo' + self.ndiffAssertEqual(f(s), 'select foo') + s = 'select foo; -- test\n select bar' + self.ndiffAssertEqual(f(s), 'select foo; -- test\n\nselect bar') + + def test_keywords(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select * from foo union select * from bar;' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'union', + 'select *', + 'from bar;'])) + + def test_parenthesis(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select count(*) from (select * from foo);' + self.ndiffAssertEqual(f(s), + '\n'.join(['select count(*)', + 'from', + ' (select *', + ' from foo);', + ]) + ) + + def test_where(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select * from foo where bar = 1 and baz = 2 or bzz = 3;' + self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n' + 'where bar = 1\n' + ' and baz = 2\n' + ' or bzz = 3;')) + s = 'select * from foo where bar = 1 and (baz = 2 or bzz = 3);' + self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n' + 'where bar = 1\n' + ' and (baz = 2\n' + ' or bzz = 3);')) + + def test_join(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select * from foo join bar on 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'join bar on 1 = 2'])) + s = 'select * from foo inner join bar on 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'inner join bar on 1 = 2'])) + s = 'select * from foo left outer join bar on 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select *', + 'from foo', + 'left outer join bar on 1 = 2'] + )) + + def test_identifier_list(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'select foo, bar, baz from table1, table2 where 1 = 2' + self.ndiffAssertEqual(f(s), '\n'.join(['select foo,', + ' bar,', + ' baz', + 'from table1,', + ' table2', + 'where 1 = 2'])) + + def test_case(self): + f = lambda sql: sqlparse.format(sql, reindent=True) + s = 'case when foo = 1 then 2 when foo = 3 then 4 else 5 end' + self.ndiffAssertEqual(f(s), '\n'.join(['case when foo = 1 then 2', + ' when foo = 3 then 4', + ' else 5', + 'end'])) + + diff --git a/tests/test_grouping.py b/tests/test_grouping.py new file mode 100644 index 0000000..fc3bea5 --- /dev/null +++ b/tests/test_grouping.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +import sqlparse +from sqlparse import tokens as T +from sqlparse.engine.grouping import * + +from tests.utils import TestCaseBase + + +class TestGrouping(TestCaseBase): + + def test_parenthesis(self): + s ='x1 (x2 (x3) x2) foo (y2) bar' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, str(parsed)) + self.assertEqual(len(parsed.tokens), 9) + self.assert_(isinstance(parsed.tokens[2], Parenthesis)) + self.assert_(isinstance(parsed.tokens[-3], Parenthesis)) + self.assertEqual(len(parsed.tokens[2].tokens), 7) + self.assert_(isinstance(parsed.tokens[2].tokens[3], Parenthesis)) + self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3) + + def test_comments(self): + s = '/*\n * foo\n */ \n bar' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, unicode(parsed)) + self.assertEqual(len(parsed.tokens), 2) + + def test_identifiers(self): + s = 'select foo.bar from "myscheme"."table" where fail. order' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, parsed.to_unicode()) + self.assert_(isinstance(parsed.tokens[2], Identifier)) + self.assert_(isinstance(parsed.tokens[6], Identifier)) + self.assert_(isinstance(parsed.tokens[8], Where)) + s = 'select * from foo where foo.id = 1' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, parsed.to_unicode()) + self.assert_(isinstance(parsed.tokens[-1].tokens[2], Identifier)) + s = 'select * from (select "foo"."id" from foo)' + parsed = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, parsed.to_unicode()) + self.assert_(isinstance(parsed.tokens[-1].tokens[3], Identifier)) + + def test_where(self): + s = 'select * from foo where bar = 1 order by id desc' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertTrue(len(p.tokens), 16) + s = 'select x from (select y from foo where bar = 1) z' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertTrue(isinstance(p.tokens[-3].tokens[-1], Where)) + + def test_typecast(self): + s = 'select foo::integer from bar' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_typecast(), 'integer') + self.assertEqual(p.tokens[2].get_name(), 'foo') + s = 'select (current_database())::information_schema.sql_identifier' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_typecast(), + 'information_schema.sql_identifier') + + def test_alias(self): + s = 'select foo as bar from mytable' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_real_name(), 'foo') + self.assertEqual(p.tokens[2].get_alias(), 'bar') + s = 'select foo from mytable t1' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[6].get_real_name(), 'mytable') + self.assertEqual(p.tokens[6].get_alias(), 't1') + s = 'select foo::integer as bar from mytable' + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[2].get_alias(), 'bar') + s = ('SELECT DISTINCT ' + '(current_database())::information_schema.sql_identifier AS view') + p = sqlparse.parse(s)[0] + self.ndiffAssertEqual(s, p.to_unicode()) + self.assertEqual(p.tokens[4].get_alias(), 'view') diff --git a/tests/test_parse.py b/tests/test_parse.py new file mode 100644 index 0000000..536b6f6 --- /dev/null +++ b/tests/test_parse.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +"""Tests sqlparse function.""" + +from utils import TestCaseBase + +import sqlparse + + +class SQLParseTest(TestCaseBase): + """Tests sqlparse.parse().""" + + def test_tokenize(self): + sql = 'select * from foo;' + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.assertEqual(str(stmts[0]), sql) + + def test_multistatement(self): + sql1 = 'select * from foo;' + sql2 = 'select * from bar;' + stmts = sqlparse.parse(sql1+sql2) + self.assertEqual(len(stmts), 2) + self.assertEqual(str(stmts[0]), sql1) + self.assertEqual(str(stmts[1]), sql2) + + def test_newlines(self): + sql = u'select\n*from foo;' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) + sql = u'select\r\n*from foo' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) + sql = u'select\r*from foo' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) + sql = u'select\r\n*from foo\n' + p = sqlparse.parse(sql)[0] + self.assertEqual(unicode(p), sql) diff --git a/tests/test_split.py b/tests/test_split.py new file mode 100644 index 0000000..782b226 --- /dev/null +++ b/tests/test_split.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +# Tests splitting functions. + +import unittest + +from utils import load_file, TestCaseBase + +import sqlparse + + +class SQLSplitTest(TestCaseBase): + """Tests sqlparse.sqlsplit().""" + + _sql1 = 'select * from foo;' + _sql2 = 'select * from bar;' + + def test_split_semicolon(self): + sql2 = 'select * from foo where bar = \'foo;bar\';' + stmts = sqlparse.parse(''.join([self._sql1, sql2])) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(unicode(stmts[0]), self._sql1) + self.ndiffAssertEqual(unicode(stmts[1]), sql2) + + def test_create_function(self): + sql = load_file('function.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_create_function_psql(self): + sql = load_file('function_psql.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_create_function_psql3(self): + sql = load_file('function_psql3.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_create_function_psql2(self): + sql = load_file('function_psql2.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 1) + self.ndiffAssertEqual(unicode(stmts[0]), sql) + + def test_dashcomments(self): + sql = load_file('dashcomment.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 3) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_begintag(self): + sql = load_file('begintag.sql') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 3) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_dropif(self): + sql = 'DROP TABLE IF EXISTS FOO;\n\nSELECT * FROM BAR;' + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_comment_with_umlaut(self): + sql = (u'select * from foo;\n' + u'-- Testing an umlaut: ä\n' + u'select * from bar;') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + + def test_comment_end_of_line(self): + sql = ('select * from foo; -- foo\n' + 'select * from bar;') + stmts = sqlparse.parse(sql) + self.assertEqual(len(stmts), 2) + self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql) + # make sure the comment belongs to first query + self.ndiffAssertEqual(unicode(stmts[0]), 'select * from foo; -- foo\n') + + def test_casewhen(self): + sql = ('SELECT case when val = 1 then 2 else null end as foo;\n' + 'comment on table actor is \'The actor table.\';') + stmts = sqlparse.split(sql) + self.assertEqual(len(stmts), 2) diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py new file mode 100644 index 0000000..7106b3c --- /dev/null +++ b/tests/test_tokenize.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +import unittest +import types + +from sqlparse import lexer +from sqlparse.tokens import * + + +class TestTokenize(unittest.TestCase): + + def test_simple(self): + sql = 'select * from foo;' + stream = lexer.tokenize(sql) + self.assert_(type(stream) is types.GeneratorType) + tokens = list(stream) + self.assertEqual(len(tokens), 8) + self.assertEqual(len(tokens[0]), 2) + self.assertEqual(tokens[0], (Keyword.DML, u'select')) + self.assertEqual(tokens[-1], (Punctuation, u';')) + diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..a78b460 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +"""Helpers for testing.""" + +import codecs +import difflib +import os +import unittest +from StringIO import StringIO + +NL = '\n' +DIR_PATH = os.path.abspath(os.path.dirname(__file__)) +PARENT_DIR = os.path.dirname(DIR_PATH) +FILES_DIR = os.path.join(DIR_PATH, 'files') + + +def load_file(filename, encoding='utf-8'): + """Opens filename with encoding and return it's contents.""" + f = codecs.open(os.path.join(FILES_DIR, filename), 'r', encoding) + data = f.read() + f.close() + return data + + +class TestCaseBase(unittest.TestCase): + """Base class for test cases with some additional checks.""" + + # Adopted from Python's tests. + def ndiffAssertEqual(self, first, second): + """Like failUnlessEqual except use ndiff for readable output.""" + if first <> second: + sfirst = unicode(first) + ssecond = unicode(second) + diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) + fp = StringIO() + print >> fp, NL, NL.join(diff) + print fp.getvalue() + raise self.failureException, fp.getvalue() -- cgit v1.2.1