- try to finish up the performance example for now

author: Mike Bayer <mike_mp@zzzcomputing.com> 2014-09-06 13:01:21 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2014-09-06 13:01:21 -0400
commit: fa7c8f88113d2e769274dee4aa4247b9c9aadec8 (patch)
tree: f3364e1816eefbabbb77c437ca0e401af6cea250 /examples
parent: d39daa43fe79c40419adf4babc5ae04f814461b7 (diff)
download: sqlalchemy-fa7c8f88113d2e769274dee4aa4247b9c9aadec8.tar.gz
5 files changed, 284 insertions, 44 deletions
diff --git a/examples/performance/__init__.py b/examples/performance/__init__.py
index b57f25b94..6e2e1fc89 100644
--- a/examples/performance/__init__.py
+++ b/examples/performance/__init__.py
@@ -1,55 +1,232 @@
 """A performance profiling suite for a variety of SQLAlchemy use cases.
 
-The suites here each focus on some specific type of use case, one which
-has a particular performance profile:
+Each suite focuses on a specific use case with a particular performance
+profile and associated implications:
 
 * bulk inserts
 * individual inserts, with or without transactions
 * fetching large numbers of rows
-* running lots of small queries
+* running lots of small queries (TODO)
 
-All suites include a variety of use patterns with both the Core and
-ORM, and are sorted in order of performance from worst to greatest,
-inversely based on amount of functionality provided by SQLAlchemy,
+All suites include a variety of use patterns illustrating both Core
+and ORM use, and are generally sorted in order of performance from worst
+to greatest, inversely based on amount of functionality provided by SQLAlchemy,
 greatest to least (these two things generally correspond perfectly).
 
-Each suite is run as a module, and provides a consistent command line
-interface::
+A command line tool is presented at the package level which allows
+individual suites to be run::
 
-    $ python -m examples.performance.bulk_inserts --profile --num 1000
+    $ python -m examples.performance --help
+    usage: python -m examples.performance [-h] [--test TEST] [--dburl DBURL]
+                                          [--num NUM] [--profile] [--dump]
+                                          [--runsnake] [--echo]
 
-Using ``--help`` will allow all options::
+                                          {bulk_inserts,large_resultsets,single_inserts}
 
-    $ python -m examples.performance.bulk_inserts --help
-usage: bulk_inserts.py [-h] [--test TEST] [--dburl DBURL] [--num NUM]
-                       [--profile] [--dump] [--runsnake] [--echo]
+    positional arguments:
+      {bulk_inserts,large_resultsets,single_inserts}
+                            suite to run
 
-optional arguments:
-  -h, --help     show this help message and exit
-  --test TEST    run specific test name
-  --dburl DBURL  database URL, default sqlite:///profile.db
-  --num NUM      Number of iterations/items/etc for tests, default 100000
-  --profile      run profiling and dump call counts
-  --dump         dump full call profile (implies --profile)
-  --runsnake     invoke runsnakerun (implies --profile)
-  --echo         Echo SQL output
+    optional arguments:
+      -h, --help            show this help message and exit
+      --test TEST           run specific test name
+      --dburl DBURL         database URL, default sqlite:///profile.db
+      --num NUM             Number of iterations/items/etc for tests; default is 0
+                            module-specific
+      --profile             run profiling and dump call counts
+      --dump                dump full call profile (implies --profile)
+      --runsnake            invoke runsnakerun (implies --profile)
+      --echo                Echo SQL output
 
+An example run looks like::
+
+    $ python -m examples.performance bulk_inserts
+
+Or with options::
+
+    $ python -m examples.performance bulk_inserts \\
+        --dburl mysql+mysqldb://scott:tiger@localhost/test \\
+        --profile --num 1000
+
+
+Running all tests with time
+---------------------------
+
+This is the default form of run::
+
+    $ python -m examples.performance single_inserts
+    Tests to run: test_orm_commit, test_bulk_save,
+                  test_bulk_insert_dictionaries, test_core,
+                  test_core_query_caching, test_dbapi_raw_w_connect,
+                  test_dbapi_raw_w_pool
+
+    test_orm_commit : Individual INSERT/COMMIT pairs via the
+        ORM (10000 iterations); total time 13.690218 sec
+    test_bulk_save : Individual INSERT/COMMIT pairs using
+        the "bulk" API  (10000 iterations); total time 11.290371 sec
+    test_bulk_insert_dictionaries : Individual INSERT/COMMIT pairs using
+        the "bulk" API with dictionaries (10000 iterations);
+        total time 10.814626 sec
+    test_core : Individual INSERT/COMMIT pairs using Core.
+        (10000 iterations); total time 9.665620 sec
+    test_core_query_caching : Individual INSERT/COMMIT pairs using Core
+        with query caching (10000 iterations); total time 9.209010 sec
+    test_dbapi_raw_w_connect : Individual INSERT/COMMIT pairs w/ DBAPI +
+        connection each time (10000 iterations); total time 9.551103 sec
+    test_dbapi_raw_w_pool : Individual INSERT/COMMIT pairs w/ DBAPI +
+        connection pool (10000 iterations); total time 8.001813 sec
+
+Dumping Profiles for Individual Tests
+--------------------------------------
+
+A Python profile output can be dumped for all tests, or more commonly
+individual tests::
+
+    $ python -m examples.performance single_inserts --test test_core --num 1000 --dump
+    Tests to run: test_core
+    test_core : Individual INSERT/COMMIT pairs using Core. (1000 iterations); total fn calls 186109
+             186109 function calls (186102 primitive calls) in 1.089 seconds
+
+       Ordered by: internal time, call count
+
+       ncalls  tottime  percall  cumtime  percall filename:lineno(function)
+         1000    0.634    0.001    0.634    0.001 {method 'commit' of 'sqlite3.Connection' objects}
+         1000    0.154    0.000    0.154    0.000 {method 'execute' of 'sqlite3.Cursor' objects}
+         1000    0.021    0.000    0.074    0.000 /Users/classic/dev/sqlalchemy/lib/sqlalchemy/sql/compiler.py:1950(_get_colparams)
+         1000    0.015    0.000    0.034    0.000 /Users/classic/dev/sqlalchemy/lib/sqlalchemy/engine/default.py:503(_init_compiled)
+            1    0.012    0.012    1.091    1.091 examples/performance/single_inserts.py:79(test_core)
+
+        ...
+
+Using RunSnake
+--------------
+
+This option requires the `RunSnake <https://pypi.python.org/pypi/RunSnakeRun>`_
+command line tool be installed::
+
+    $ python -m examples.performance single_inserts --test test_core --num 1000 --runsnake
+
+A graphical RunSnake output will be displayed.
+
+.. _examples_profiling_writeyourown:
+
+Writing your Own Suites
+-----------------------
+
+The profiler suite system is extensible, and can be applied to your own set
+of tests.  This is a valuable technique to use in deciding upon the proper
+approach for some performance-critical set of routines.  For example,
+if we wanted to profile the difference between several kinds of loading,
+we can create a file ``test_loads.py``, with the following content::
+
+    from examples.performance import Profiler
+    from sqlalchemy import Integer, Column, create_engine, ForeignKey
+    from sqlalchemy.orm import relationship, joinedload, subqueryload, Session
+    from sqlalchemy.ext.declarative import declarative_base
+
+    Base = declarative_base()
+    engine = None
+    session = None
+
+
+    class Parent(Base):
+        __tablename__ = 'parent'
+        id = Column(Integer, primary_key=True)
+        children = relationship("Child")
+
+
+    class Child(Base):
+        __tablename__ = 'child'
+        id = Column(Integer, primary_key=True)
+        parent_id = Column(Integer, ForeignKey('parent.id'))
+
+
+    # Init with name of file, default number of items
+    Profiler.init("test_loads", 1000)
+
+
+    @Profiler.setup_once
+    def setup_once(dburl, echo, num):
+        "setup once.  create an engine, insert fixture data"
+        global engine
+        engine = create_engine(dburl, echo=echo)
+        Base.metadata.drop_all(engine)
+        Base.metadata.create_all(engine)
+        sess = Session(engine)
+        sess.add_all([
+            Parent(children=[Child() for j in range(100)])
+            for i in range(num)
+        ])
+        sess.commit()
+
+
+    @Profiler.setup
+    def setup(dburl, echo, num):
+        "setup per test.  create a new Session."
+        global session
+        session = Session(engine)
+        # pre-connect so this part isn't profiled (if we choose)
+        session.connection()
+
+
+    @Profiler.profile
+    def test_lazyload(n):
+        "load everything, no eager loading."
+
+        for parent in session.query(Parent):
+            parent.children
+
+
+    @Profiler.profile
+    def test_joinedload(n):
+        "load everything, joined eager loading."
+
+        for parent in session.query(Parent).options(joinedload("children")):
+            parent.children
+
+
+    @Profiler.profile
+    def test_subqueryload(n):
+        "load everything, subquery eager loading."
+
+        for parent in session.query(Parent).options(subqueryload("children")):
+            parent.children
+
+    if __name__ == '__main__':
+        Profiler.main()
+
+We can run our new script directly::
+
+    $ python test_loads.py  --dburl postgresql+psycopg2://scott:tiger@localhost/test
+    Running setup once...
+    Tests to run: test_lazyload, test_joinedload, test_subqueryload
+    test_lazyload : load everything, no eager loading. (1000 iterations); total time 11.971159 sec
+    test_joinedload : load everything, joined eager loading. (1000 iterations); total time 2.754592 sec
+    test_subqueryload : load everything, subquery eager loading. (1000 iterations); total time 2.977696 sec
+
+As well as see RunSnake output for an individual test::
+
+    $ python test_loads.py  --num 100 --runsnake --test test_joinedload
 
 """
 import argparse
 import cProfile
-import StringIO
 import pstats
 import os
 import time
+import re
+import sys
 
 
 class Profiler(object):
     tests = []
 
-    def __init__(self, options, setup=None, setup_once=None):
-        self.setup = setup
-        self.setup_once = setup_once
+    _setup = None
+    _setup_once = None
+    name = None
+    num = 0
+
+    def __init__(self, options):
         self.test = options.test
         self.dburl = options.dburl
         self.runsnake = options.runsnake
@@ -60,10 +237,33 @@ class Profiler(object):
         self.stats = []
 
     @classmethod
+    def init(cls, name, num):
+        cls.name = name
+        cls.num = num
+
+    @classmethod
     def profile(cls, fn):
+        if cls.name is None:
+            raise ValueError(
+                "Need to call Profile.init(<suitename>, <default_num>) first.")
         cls.tests.append(fn)
         return fn
 
+    @classmethod
+    def setup(cls, fn):
+        if cls._setup is not None:
+            raise ValueError("setup function already set to %s" % cls._setup)
+        cls._setup = staticmethod(fn)
+        return fn
+
+    @classmethod
+    def setup_once(cls, fn):
+        if cls._setup_once is not None:
+            raise ValueError(
+                "setup_once function already set to %s" % cls._setup_once)
+        cls._setup_once = staticmethod(fn)
+        return fn
+
     def run(self):
         if self.test:
             tests = [fn for fn in self.tests if fn.__name__ == self.test]
@@ -72,9 +272,9 @@ class Profiler(object):
         else:
             tests = self.tests
 
-        if self.setup_once:
+        if self._setup_once:
             print("Running setup once...")
-            self.setup_once(self.dburl, self.echo, self.num)
+            self._setup_once(self.dburl, self.echo, self.num)
         print("Tests to run: %s" % ", ".join([t.__name__ for t in tests]))
         for test in tests:
             self._run_test(test)
@@ -88,8 +288,7 @@ class Profiler(object):
         finally:
             pr.disable()
 
-        output = StringIO.StringIO()
-        stats = pstats.Stats(pr, stream=output).sort_stats('cumulative')
+        stats = pstats.Stats(pr).sort_stats('cumulative')
 
         self.stats.append(TestResult(self, fn, stats=stats))
         return result
@@ -103,29 +302,42 @@ class Profiler(object):
             self.stats.append(TestResult(self, fn, total_time=total))
 
     def _run_test(self, fn):
-        if self.setup:
-            self.setup(self.dburl, self.echo, self.num)
+        if self._setup:
+            self._setup(self.dburl, self.echo, self.num)
         if self.profile or self.runsnake or self.dump:
             self._run_with_profile(fn)
         else:
             self._run_with_time(fn)
 
     @classmethod
-    def main(cls, num, setup=None, setup_once=None):
-        parser = argparse.ArgumentParser()
+    def main(cls):
+
+        parser = argparse.ArgumentParser("python -m examples.performance")
+
+        if cls.name is None:
+            parser.add_argument(
+                "name", choices=cls._suite_names(), help="suite to run")
+
+            if len(sys.argv) > 1:
+                potential_name = sys.argv[1]
+                try:
+                    suite = __import__(__name__ + "." + potential_name)
+                except ImportError:
+                    pass
 
         parser.add_argument(
             "--test", type=str,
             help="run specific test name"
         )
+
         parser.add_argument(
             '--dburl', type=str, default="sqlite:///profile.db",
             help="database URL, default sqlite:///profile.db"
         )
         parser.add_argument(
-            '--num', type=int, default=num,
+            '--num', type=int, default=cls.num,
             help="Number of iterations/items/etc for tests; "
-                 "default is %d module-specific" % num
+                 "default is %d module-specific" % cls.num
         )
         parser.add_argument(
             '--profile', action='store_true',
@@ -143,7 +355,19 @@ class Profiler(object):
 
         args.profile = args.profile or args.dump or args.runsnake
 
-        Profiler(args, setup=setup, setup_once=setup_once).run()
+        if cls.name is None:
+            suite = __import__(__name__ + "." + args.name)
+
+        Profiler(args).run()
+
+    @classmethod
+    def _suite_names(cls):
+        suites = []
+        for file_ in os.listdir(os.path.dirname(__file__)):
+            match = re.match(r'^([a-z].*).py$', file_)
+            if match:
+                suites.append(match.group(1))
+        return suites
 
 
 class TestResult(object):
@@ -185,3 +409,4 @@ class TestResult(object):
         finally:
             os.remove(filename)
 
+
diff --git a/examples/performance/__main__.py b/examples/performance/__main__.py
new file mode 100644
index 000000000..957d6c699
--- /dev/null
+++ b/examples/performance/__main__.py
@@ -0,0 +1,5 @@
+from . import Profiler
+
+if __name__ == '__main__':
+    Profiler.main()
+
diff --git a/examples/performance/bulk_inserts.py b/examples/performance/bulk_inserts.py
index 531003aa6..9c3cff5b2 100644
--- a/examples/performance/bulk_inserts.py
+++ b/examples/performance/bulk_inserts.py
@@ -20,6 +20,10 @@ class Customer(Base):
     description = Column(String(255))
 
 
+Profiler.init("bulk_inserts", num=100000)
+
+
+@Profiler.setup
 def setup_database(dburl, echo, num):
     global engine
     engine = create_engine(dburl, echo=echo)
@@ -147,4 +151,4 @@ def test_dbapi_raw(n):
     conn.close()
 
 if __name__ == '__main__':
-    Profiler.main(setup=setup_database, num=100000)
+    Profiler.main()
diff --git a/examples/performance/large_resultsets.py b/examples/performance/large_resultsets.py
index 77c0246fc..0a5857b75 100644
--- a/examples/performance/large_resultsets.py
+++ b/examples/performance/large_resultsets.py
@@ -30,6 +30,10 @@ class Customer(Base):
     description = Column(String(255))
 
 
+Profiler.init("large_resultsets", num=500000)
+
+
+@Profiler.setup_once
 def setup_database(dburl, echo, num):
     global engine
     engine = create_engine(dburl, echo=echo)
@@ -168,4 +172,4 @@ def _test_dbapi_raw(n, make_objects):
     conn.close()
 
 if __name__ == '__main__':
-    Profiler.main(setup_once=setup_database, num=500000)
+    Profiler.main()
diff --git a/examples/performance/single_inserts.py b/examples/performance/single_inserts.py
index 4178ccea8..cfce90300 100644
--- a/examples/performance/single_inserts.py
+++ b/examples/performance/single_inserts.py
@@ -21,6 +21,10 @@ class Customer(Base):
     description = Column(String(255))
 
 
+Profiler.init("single_inserts", num=10000)
+
+
+@Profiler.setup
 def setup_database(dburl, echo, num):
     global engine
     engine = create_engine(dburl, echo=echo)
@@ -106,16 +110,14 @@ def test_core_query_caching(n):
 
 @Profiler.profile
 def test_dbapi_raw_w_connect(n):
-    """Individual INSERT/COMMIT pairs using a pure DBAPI connection,
-    connect each time."""
+    """Individual INSERT/COMMIT pairs w/ DBAPI + connection each time"""
 
     _test_dbapi_raw(n, True)
 
 
 @Profiler.profile
 def test_dbapi_raw_w_pool(n):
-    """Individual INSERT/COMMIT pairs using a pure DBAPI connection,
-    using a connection pool."""
+    """Individual INSERT/COMMIT pairs w/ DBAPI + connection pool"""
 
     _test_dbapi_raw(n, False)
 
@@ -161,4 +163,4 @@ def _test_dbapi_raw(n, connect):
 
 
 if __name__ == '__main__':
-    Profiler.main(setup=setup_database, num=10000)
+    Profiler.main()
author	Mike Bayer <mike_mp@zzzcomputing.com>	2014-09-06 13:01:21 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2014-09-06 13:01:21 -0400
commit	fa7c8f88113d2e769274dee4aa4247b9c9aadec8 (patch)
tree	f3364e1816eefbabbb77c437ca0e401af6cea250 /examples
parent	d39daa43fe79c40419adf4babc5ae04f814461b7 (diff)
download	sqlalchemy-fa7c8f88113d2e769274dee4aa4247b9c9aadec8.tar.gz