From 4e1c89ec97ec90037583e85d0e9e71e9c845a19b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 16:13:32 +0200 Subject: Added performance testing foundation library, reworked existing performance tests to work on larger repositories --- test/git/performance/test_commit.py | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 test/git/performance/test_commit.py (limited to 'test/git/performance/test_commit.py') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py new file mode 100644 index 00000000..c1f8ce59 --- /dev/null +++ b/test/git/performance/test_commit.py @@ -0,0 +1,68 @@ +# test_performance.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from test.testlib import * +from git import * +from time import time +import sys + +class TestPerformance(TestBase): + + # ref with about 100 commits in its history + ref_100 = '0.1.6' + + def _query_commit_info(self, c): + c.author + c.authored_date + c.author_tz_offset + c.committer + c.committed_date + c.committer_tz_offset + c.message + c.parents + + def test_iteration(self): + no = 0 + nc = 0 + + # find the first commit containing the given path - always do a full + # iteration ( restricted to the path in question ), but in fact it should + # return quite a lot of commits, we just take one and hence abort the operation + + st = time() + for c in self.rorepo.iter_commits(self.ref_100): + nc += 1 + self._query_commit_info(c) + for obj in c.tree.traverse(): + obj.size + no += 1 + # END for each object + # END for each commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) + + def test_commit_traversal(self): + # bound to cat-file parsing performance + nc = 0 + st = time() + for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + + def test_commit_iteration(self): + # bound to stream parsing performance + nc = 0 + st = time() + for c in Commit.iter_items(self.rorepo, self.ref_100): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + -- cgit v1.2.1 From ae5a69f67822d81bbbd8f4af93be68703e730b37 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 16:41:28 +0200 Subject: commit: redesigned revlist and commit parsing, commits are always retrieved from their object information directly. This is faster, and resolves issues with the rev-list format and empty commit messages Adjusted many tests to go with the changes, as they were still mocked. The mock was removed if necessary and replaced by code that actually executes --- test/git/performance/test_commit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'test/git/performance/test_commit.py') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index c1f8ce59..b4a9d868 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -4,12 +4,12 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from test.testlib import * +from lib import * from git import * from time import time import sys -class TestPerformance(TestBase): +class TestPerformance(TestBigRepoReadOnly): # ref with about 100 commits in its history ref_100 = '0.1.6' @@ -48,7 +48,7 @@ class TestPerformance(TestBase): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -59,7 +59,7 @@ class TestPerformance(TestBase): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.rorepo, self.ref_100): + for c in Commit.iter_items(self.gitrepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit -- cgit v1.2.1 From 1e2b46138ba58033738a24dadccc265748fce2ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:20:34 +0200 Subject: commit.create_from_tree now uses pure python implementation, fixed message parsing which truncated newlines although it was ilegitimate. Its up to the reader to truncate therse, nowhere in the git code I could find anyone adding newlines to commits where it is written Added performance tests for serialization, it does about 5k commits per second if writing to tmpfs --- test/git/performance/test_commit.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'test/git/performance/test_commit.py') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index b4a9d868..2398c93d 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -6,10 +6,12 @@ from lib import * from git import * +from test.git.test_commit import assert_commit_serialization +from cStringIO import StringIO from time import time import sys -class TestPerformance(TestBigRepoReadOnly): +class TestPerformance(TestBigRepoRW): # ref with about 100 commits in its history ref_100 = '0.1.6' @@ -48,7 +50,7 @@ class TestPerformance(TestBigRepoReadOnly): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False): + for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -59,10 +61,38 @@ class TestPerformance(TestBigRepoReadOnly): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.gitrepo, self.head_sha_2k): + for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit elapsed_time = time() - st print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + def test_commit_serialization(self): + assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) + + rwrepo = self.gitrwrepo + make_object = rwrepo.odb.to_object + # direct serialization - deserialization can be tested afterwards + # serialization is probably limited on IO + hc = rwrepo.commit(self.head_sha_2k) + + commits = list() + nc = 5000 + st = time() + for i in xrange(nc): + cm = Commit( rwrepo, Commit.NULL_HEX_SHA, hc.tree, + hc.author, hc.authored_date, hc.author_tz_offset, + hc.committer, hc.committed_date, hc.committer_tz_offset, + str(i), parents=hc.parents, encoding=hc.encoding) + + stream = StringIO() + cm._serialize(stream) + slen = stream.tell() + stream.seek(0) + + cm.sha = make_object(Commit.type, slen, stream) + # END commit creation + elapsed = time() - st + + print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed) -- cgit v1.2.1 From a1e80445ad5cb6da4c0070d7cb8af89da3b0803b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 14:41:15 +0200 Subject: initial version of new odb design to facilitate a channel based multi-threading implementation of all odb functions --- test/git/performance/test_commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/git/performance/test_commit.py') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index 2398c93d..bca3ad8b 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -72,7 +72,7 @@ class TestPerformance(TestBigRepoRW): assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) rwrepo = self.gitrwrepo - make_object = rwrepo.odb.to_object + make_object = rwrepo.odb.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(self.head_sha_2k) -- cgit v1.2.1 From e746f96bcc29238b79118123028ca170adc4ff0f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 17:22:08 +0200 Subject: Fixed implementation after design change to deal with it - all tests run, but next there will have to be more through testing --- test/git/performance/test_commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/git/performance/test_commit.py') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index bca3ad8b..0571d0d9 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -91,7 +91,7 @@ class TestPerformance(TestBigRepoRW): slen = stream.tell() stream.seek(0) - cm.sha = make_object(Commit.type, slen, stream) + cm.sha = make_object(IStream(Commit.type, slen, stream)).sha # END commit creation elapsed = time() - st -- cgit v1.2.1