From df0892351a394d768489b5647d47b73c24d3ef5f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 00:48:16 +0200 Subject: commit: initial version of commit_from_tree which could create commit objects if it could serialize itself --- test/git/test_utils.py | 233 +++++++++++++++++++++++++++---------------------- 1 file changed, 131 insertions(+), 102 deletions(-) (limited to 'test') diff --git a/test/git/test_utils.py b/test/git/test_utils.py index f843c12e..2c3c392b 100644 --- a/test/git/test_utils.py +++ b/test/git/test_utils.py @@ -9,112 +9,141 @@ import tempfile from test.testlib import * from git.utils import * +from git.objects.utils import * from git import * from git.cmd import dashify import time class TestUtils(TestCase): - def setup(self): - self.testdict = { - "string": "42", - "int": 42, - "array": [ 42 ], - } + def setup(self): + self.testdict = { + "string": "42", + "int": 42, + "array": [ 42 ], + } - def test_it_should_dashify(self): - assert_equal('this-is-my-argument', dashify('this_is_my_argument')) - assert_equal('foo', dashify('foo')) - - - def test_lock_file(self): - my_file = tempfile.mktemp() - lock_file = LockFile(my_file) - assert not lock_file._has_lock() - # release lock we don't have - fine - lock_file._release_lock() - - # get lock - lock_file._obtain_lock_or_raise() - assert lock_file._has_lock() - - # concurrent access - other_lock_file = LockFile(my_file) - assert not other_lock_file._has_lock() - self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise) - - lock_file._release_lock() - assert not lock_file._has_lock() - - other_lock_file._obtain_lock_or_raise() - self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise) - - # auto-release on destruction - del(other_lock_file) - lock_file._obtain_lock_or_raise() - lock_file._release_lock() - - def test_blocking_lock_file(self): - my_file = tempfile.mktemp() - lock_file = BlockingLockFile(my_file) - lock_file._obtain_lock() - - # next one waits for the lock - start = time.time() - wait_time = 0.1 - wait_lock = BlockingLockFile(my_file, 0.05, wait_time) - self.failUnlessRaises(IOError, wait_lock._obtain_lock) - elapsed = time.time() - start - assert elapsed <= wait_time + 0.02 # some extra time it may cost - - def _cmp_contents(self, file_path, data): - # raise if data from file at file_path - # does not match data string - fp = open(file_path, "rb") - try: - assert fp.read() == data - finally: - fp.close() - - def test_safe_operation(self): - my_file = tempfile.mktemp() - orig_data = "hello" - new_data = "world" - my_file_fp = open(my_file, "wb") - my_file_fp.write(orig_data) - my_file_fp.close() - - try: - cwrite = ConcurrentWriteOperation(my_file) - - # didn't start writing, doesnt matter - cwrite._end_writing(False) - cwrite._end_writing(True) - assert not cwrite._is_writing() - - # write data and fail - stream = cwrite._begin_writing() - assert cwrite._is_writing() - stream.write(new_data) - cwrite._end_writing(successful=False) - self._cmp_contents(my_file, orig_data) - assert not os.path.exists(stream.name) - - # write data - concurrently - ocwrite = ConcurrentWriteOperation(my_file) - stream = cwrite._begin_writing() - self.failUnlessRaises(IOError, ocwrite._begin_writing) - - stream.write("world") - cwrite._end_writing(successful=True) - self._cmp_contents(my_file, new_data) - assert not os.path.exists(stream.name) - - # could test automatic _end_writing on destruction - finally: - os.remove(my_file) - # END final cleanup - - - - + def test_it_should_dashify(self): + assert_equal('this-is-my-argument', dashify('this_is_my_argument')) + assert_equal('foo', dashify('foo')) + + + def test_lock_file(self): + my_file = tempfile.mktemp() + lock_file = LockFile(my_file) + assert not lock_file._has_lock() + # release lock we don't have - fine + lock_file._release_lock() + + # get lock + lock_file._obtain_lock_or_raise() + assert lock_file._has_lock() + + # concurrent access + other_lock_file = LockFile(my_file) + assert not other_lock_file._has_lock() + self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise) + + lock_file._release_lock() + assert not lock_file._has_lock() + + other_lock_file._obtain_lock_or_raise() + self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise) + + # auto-release on destruction + del(other_lock_file) + lock_file._obtain_lock_or_raise() + lock_file._release_lock() + + def test_blocking_lock_file(self): + my_file = tempfile.mktemp() + lock_file = BlockingLockFile(my_file) + lock_file._obtain_lock() + + # next one waits for the lock + start = time.time() + wait_time = 0.1 + wait_lock = BlockingLockFile(my_file, 0.05, wait_time) + self.failUnlessRaises(IOError, wait_lock._obtain_lock) + elapsed = time.time() - start + assert elapsed <= wait_time + 0.02 # some extra time it may cost + + def _cmp_contents(self, file_path, data): + # raise if data from file at file_path + # does not match data string + fp = open(file_path, "rb") + try: + assert fp.read() == data + finally: + fp.close() + + def test_safe_operation(self): + my_file = tempfile.mktemp() + orig_data = "hello" + new_data = "world" + my_file_fp = open(my_file, "wb") + my_file_fp.write(orig_data) + my_file_fp.close() + + try: + cwrite = ConcurrentWriteOperation(my_file) + + # didn't start writing, doesnt matter + cwrite._end_writing(False) + cwrite._end_writing(True) + assert not cwrite._is_writing() + + # write data and fail + stream = cwrite._begin_writing() + assert cwrite._is_writing() + stream.write(new_data) + cwrite._end_writing(successful=False) + self._cmp_contents(my_file, orig_data) + assert not os.path.exists(stream.name) + + # write data - concurrently + ocwrite = ConcurrentWriteOperation(my_file) + stream = cwrite._begin_writing() + self.failUnlessRaises(IOError, ocwrite._begin_writing) + + stream.write("world") + cwrite._end_writing(successful=True) + self._cmp_contents(my_file, new_data) + assert not os.path.exists(stream.name) + + # could test automatic _end_writing on destruction + finally: + os.remove(my_file) + # END final cleanup + + + + def test_user_id(self): + assert '@' in get_user_id() + + def test_parse_date(self): + # test all supported formats + def assert_rval(rval, veri_time, offset=0): + assert len(rval) == 2 + assert isinstance(rval[0], int) and isinstance(rval[1], int) + assert rval[0] == veri_time + assert rval[1] == offset + # END assert rval utility + + rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0) + iso = ("2005-04-07T22:13:11 -0200", 7200) + iso2 = ("2005-04-07 22:13:11 +0400", -14400) + iso3 = ("2005.04.07 22:13:11 -0000", 0) + alt = ("04/07/2005 22:13:11", 0) + alt2 = ("07.04.2005 22:13:11", 0) + veri_time = 1112904791 # the time this represents + for date, offset in (rfc, iso, iso2, iso3, alt, alt2): + assert_rval(parse_date(date), veri_time, offset) + # END for each date type + + # and failure + self.failUnlessRaises(ValueError, parse_date, 'invalid format') + self.failUnlessRaises(ValueError, parse_date, '123456789 -02000') + self.failUnlessRaises(ValueError, parse_date, ' 123456789 -0200') + + -- cgit v1.2.1 From 8c1a87d11df666d308d14e4ae7ee0e9d614296b6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 12:30:33 +0200 Subject: commit: refactored existing code to decode commits from streams - performance is slightly better git.cmd: added method to provide access to the content stream directly. This is more efficient if large objects are handled, if it is actually used test.helpers: removed unnecessary code --- test/git/test_commit.py | 2 +- test/git/test_diff.py | 6 +++--- test/git/test_repo.py | 5 +++-- test/testlib/helper.py | 38 ++++++-------------------------------- 4 files changed, 13 insertions(+), 38 deletions(-) (limited to 'test') diff --git a/test/git/test_commit.py b/test/git/test_commit.py index 48937c93..28b407ac 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -129,7 +129,7 @@ class TestCommit(TestBase): bisect_all=True) assert_true(git.called) - commits = Commit._iter_from_process_or_stream(self.rorepo, ListProcessAdapter(revs), True) + commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs), True) expected_ids = ( 'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b', '33ebe7acec14b25c5f84f35a664803fcab2f7781', diff --git a/test/git/test_diff.py b/test/git/test_diff.py index 2f6a19bd..a113b992 100644 --- a/test/git/test_diff.py +++ b/test/git/test_diff.py @@ -20,7 +20,7 @@ class TestDiff(TestBase): return diffs def test_list_from_string_new_mode(self): - output = ListProcessAdapter(fixture('diff_new_mode')) + output = StringProcessAdapter(fixture('diff_new_mode')) diffs = Diff._index_from_patch_format(self.rorepo, output.stdout) self._assert_diff_format(diffs) @@ -28,7 +28,7 @@ class TestDiff(TestBase): assert_equal(10, len(diffs[0].diff.splitlines())) def test_diff_with_rename(self): - output = ListProcessAdapter(fixture('diff_rename')) + output = StringProcessAdapter(fixture('diff_rename')) diffs = Diff._index_from_patch_format(self.rorepo, output.stdout) self._assert_diff_format(diffs) @@ -47,7 +47,7 @@ class TestDiff(TestBase): "diff_tree_numstat_root" ) for fixture_name in fixtures: - diff_proc = ListProcessAdapter(fixture(fixture_name)) + diff_proc = StringProcessAdapter(fixture(fixture_name)) diffs = Diff._index_from_patch_format(self.rorepo, diff_proc.stdout) # END for each fixture diff --git a/test/git/test_repo.py b/test/git/test_repo.py index ce79402a..9316245b 100644 --- a/test/git/test_repo.py +++ b/test/git/test_repo.py @@ -48,6 +48,7 @@ class TestRepo(TestBase): def test_tree_from_revision(self): tree = self.rorepo.tree('0.1.6') + assert len(tree.sha) == 40 assert tree.type == "tree" assert self.rorepo.tree(tree) == tree @@ -56,9 +57,9 @@ class TestRepo(TestBase): @patch_object(Git, '_call_process') def test_commits(self, git): - git.return_value = ListProcessAdapter(fixture('rev_list')) + git.return_value = StringProcessAdapter(fixture('rev_list')) - commits = list( self.rorepo.iter_commits('master', max_count=10) ) + commits = list(self.rorepo.iter_commits('master', max_count=10)) c = commits[0] assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', c.sha) diff --git a/test/testlib/helper.py b/test/testlib/helper.py index 9c38ffd5..c9b4c2ac 100644 --- a/test/testlib/helper.py +++ b/test/testlib/helper.py @@ -9,6 +9,7 @@ from git import Repo, Remote, GitCommandError from unittest import TestCase import tempfile import shutil +import cStringIO GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) @@ -23,40 +24,13 @@ def absolute_project_path(): return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) -class ListProcessAdapter(object): - """Allows to use lists as Process object as returned by SubProcess.Popen. +class StringProcessAdapter(object): + """Allows to use strings as Process object as returned by SubProcess.Popen. Its tailored to work with the test system only""" - class Stream(object): - """Simple stream emulater meant to work only with tests""" - def __init__(self, data): - self.data = data - self.cur_iter = None - - def __iter__(self): - dat = self.data - if isinstance(dat, basestring): - dat = dat.splitlines() - if self.cur_iter is None: - self.cur_iter = iter(dat) - return self.cur_iter - - def read(self): - dat = self.data - if isinstance(dat, (tuple,list)): - dat = "\n".join(dat) - return dat - - def next(self): - if self.cur_iter is None: - self.cur_iter = iter(self) - return self.cur_iter.next() - - # END stream - - def __init__(self, input_list_or_string): - self.stdout = self.Stream(input_list_or_string) - self.stderr = self.Stream('') + def __init__(self, input_string): + self.stdout = cStringIO.StringIO(input_string) + self.stderr = cStringIO.StringIO() def wait(self): return 0 -- cgit v1.2.1 From 4a25347d7f4c371345da2348ac6cceec7a143da2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 13:01:27 +0200 Subject: Added commit-iteration test --- test/git/test_performance.py | 96 ++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 40 deletions(-) (limited to 'test') diff --git a/test/git/test_performance.py b/test/git/test_performance.py index 72acfcac..c1f8ce59 100644 --- a/test/git/test_performance.py +++ b/test/git/test_performance.py @@ -7,46 +7,62 @@ from test.testlib import * from git import * from time import time +import sys class TestPerformance(TestBase): - def _query_commit_info(self, c): - c.author - c.authored_date - c.author_tz_offset - c.committer - c.committed_date - c.committer_tz_offset - c.message - c.parents - - def test_iteration(self): - num_objs = 0 - num_commits = 0 - - # find the first commit containing the given path - always do a full - # iteration ( restricted to the path in question ), but in fact it should - # return quite a lot of commits, we just take one and hence abort the operation - - st = time() - for c in self.rorepo.iter_commits('0.1.6'): - num_commits += 1 - self._query_commit_info(c) - for obj in c.tree.traverse(): - obj.size - num_objs += 1 - # END for each object - # END for each commit - elapsed_time = time() - st - print "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (num_commits, num_objs, elapsed_time, num_objs/elapsed_time) - - def test_commit_traversal(self): - num_commits = 0 - - st = time() - for c in self.rorepo.commit('0.1.6').traverse(branch_first=False): - num_commits += 1 - self._query_commit_info(c) - # END for each traversed commit - elapsed_time = time() - st - print "Traversed %i Commits in %s [s] ( %f commits/s )" % (num_commits, elapsed_time, num_commits/elapsed_time) + # ref with about 100 commits in its history + ref_100 = '0.1.6' + + def _query_commit_info(self, c): + c.author + c.authored_date + c.author_tz_offset + c.committer + c.committed_date + c.committer_tz_offset + c.message + c.parents + + def test_iteration(self): + no = 0 + nc = 0 + + # find the first commit containing the given path - always do a full + # iteration ( restricted to the path in question ), but in fact it should + # return quite a lot of commits, we just take one and hence abort the operation + + st = time() + for c in self.rorepo.iter_commits(self.ref_100): + nc += 1 + self._query_commit_info(c) + for obj in c.tree.traverse(): + obj.size + no += 1 + # END for each object + # END for each commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) + + def test_commit_traversal(self): + # bound to cat-file parsing performance + nc = 0 + st = time() + for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + + def test_commit_iteration(self): + # bound to stream parsing performance + nc = 0 + st = time() + for c in Commit.iter_items(self.rorepo, self.ref_100): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + -- cgit v1.2.1 From 4e1c89ec97ec90037583e85d0e9e71e9c845a19b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 16:13:32 +0200 Subject: Added performance testing foundation library, reworked existing performance tests to work on larger repositories --- test/git/performance/lib.py | 46 +++++++++++++++++++++++++ test/git/performance/test_commit.py | 68 +++++++++++++++++++++++++++++++++++++ test/git/test_performance.py | 68 ------------------------------------- 3 files changed, 114 insertions(+), 68 deletions(-) create mode 100644 test/git/performance/lib.py create mode 100644 test/git/performance/test_commit.py delete mode 100644 test/git/test_performance.py (limited to 'test') diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py new file mode 100644 index 00000000..4b552b20 --- /dev/null +++ b/test/git/performance/lib.py @@ -0,0 +1,46 @@ +"""Contains library functions""" +import os +from test.testlib import * + +from git import ( + Repo + ) + +#{ Invvariants +k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE" +#} END invariants + + +#{ Utilities +def resolve_or_fail(env_var): + """:return: resolved environment variable or raise EnvironmentError""" + try: + return os.environ[env_var] + except KeyError: + raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var) + # END exception handling + +#} END utilities + + +#{ Base Classes + +class TestBigRepoReadOnly(TestBase): + """TestCase providing access to readonly 'big' repositories using the following + member variables: + + * gitrepo + + * Read-Only git repository - actually the repo of git itself""" + + #{ Invariants + head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca' + head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' + #} END invariants + + @classmethod + def setUpAll(cls): + super(TestBigRepoReadOnly, cls).setUpAll() + cls.gitrepo = Repo(resolve_or_fail(k_env_git_repo)) + +#} END base classes diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py new file mode 100644 index 00000000..c1f8ce59 --- /dev/null +++ b/test/git/performance/test_commit.py @@ -0,0 +1,68 @@ +# test_performance.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from test.testlib import * +from git import * +from time import time +import sys + +class TestPerformance(TestBase): + + # ref with about 100 commits in its history + ref_100 = '0.1.6' + + def _query_commit_info(self, c): + c.author + c.authored_date + c.author_tz_offset + c.committer + c.committed_date + c.committer_tz_offset + c.message + c.parents + + def test_iteration(self): + no = 0 + nc = 0 + + # find the first commit containing the given path - always do a full + # iteration ( restricted to the path in question ), but in fact it should + # return quite a lot of commits, we just take one and hence abort the operation + + st = time() + for c in self.rorepo.iter_commits(self.ref_100): + nc += 1 + self._query_commit_info(c) + for obj in c.tree.traverse(): + obj.size + no += 1 + # END for each object + # END for each commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) + + def test_commit_traversal(self): + # bound to cat-file parsing performance + nc = 0 + st = time() + for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + + def test_commit_iteration(self): + # bound to stream parsing performance + nc = 0 + st = time() + for c in Commit.iter_items(self.rorepo, self.ref_100): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + diff --git a/test/git/test_performance.py b/test/git/test_performance.py deleted file mode 100644 index c1f8ce59..00000000 --- a/test/git/test_performance.py +++ /dev/null @@ -1,68 +0,0 @@ -# test_performance.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from test.testlib import * -from git import * -from time import time -import sys - -class TestPerformance(TestBase): - - # ref with about 100 commits in its history - ref_100 = '0.1.6' - - def _query_commit_info(self, c): - c.author - c.authored_date - c.author_tz_offset - c.committer - c.committed_date - c.committer_tz_offset - c.message - c.parents - - def test_iteration(self): - no = 0 - nc = 0 - - # find the first commit containing the given path - always do a full - # iteration ( restricted to the path in question ), but in fact it should - # return quite a lot of commits, we just take one and hence abort the operation - - st = time() - for c in self.rorepo.iter_commits(self.ref_100): - nc += 1 - self._query_commit_info(c) - for obj in c.tree.traverse(): - obj.size - no += 1 - # END for each object - # END for each commit - elapsed_time = time() - st - print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) - - def test_commit_traversal(self): - # bound to cat-file parsing performance - nc = 0 - st = time() - for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): - nc += 1 - self._query_commit_info(c) - # END for each traversed commit - elapsed_time = time() - st - print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) - - def test_commit_iteration(self): - # bound to stream parsing performance - nc = 0 - st = time() - for c in Commit.iter_items(self.rorepo, self.ref_100): - nc += 1 - self._query_commit_info(c) - # END for each traversed commit - elapsed_time = time() - st - print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) - -- cgit v1.2.1 From ae5a69f67822d81bbbd8f4af93be68703e730b37 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 16:41:28 +0200 Subject: commit: redesigned revlist and commit parsing, commits are always retrieved from their object information directly. This is faster, and resolves issues with the rev-list format and empty commit messages Adjusted many tests to go with the changes, as they were still mocked. The mock was removed if necessary and replaced by code that actually executes --- test/fixtures/rev_list | 27 +--- test/git/performance/test_commit.py | 8 +- test/git/test_commit.py | 310 ++++++++++++++++++------------------ test/git/test_repo.py | 39 ++--- 4 files changed, 177 insertions(+), 207 deletions(-) (limited to 'test') diff --git a/test/fixtures/rev_list b/test/fixtures/rev_list index 95a1ebff..1a576118 100644 --- a/test/fixtures/rev_list +++ b/test/fixtures/rev_list @@ -1,24 +1,3 @@ -commit 4c8124ffcf4039d292442eeccabdeca5af5c5017 -tree 672eca9b7f9e09c22dcb128c283e8c3c8d7697a4 -parent 634396b2f541a9f2d58b00be1a07f0c358b999b3 -author Tom Preston-Werner 1191999972 -0700 -committer Tom Preston-Werner 1191999972 -0700 - - implement Grit#heads - -commit 634396b2f541a9f2d58b00be1a07f0c358b999b3 -tree b35b4bf642d667fdd613eebcfe4e17efd420fb8a -author Tom Preston-Werner 1191997100 -0700 -committer Tom Preston-Werner 1191997100 -0700 - - initial grit setup - -commit ab25fd8483882c3bda8a458ad2965d2248654335 -tree c20b5ec543bde1e43a931449b196052c06ed8acc -parent 6e64c55896aabb9a7d8e9f8f296f426d21a78c2c -parent 7f874954efb9ba35210445be456c74e037ba6af2 -author Tom Preston-Werner 1182645538 -0700 -committer Tom Preston-Werner 1182645538 -0700 - - Merge branch 'site' - Some other stuff +4c8124ffcf4039d292442eeccabdeca5af5c5017 +634396b2f541a9f2d58b00be1a07f0c358b999b3 +ab25fd8483882c3bda8a458ad2965d2248654335 diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index c1f8ce59..b4a9d868 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -4,12 +4,12 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from test.testlib import * +from lib import * from git import * from time import time import sys -class TestPerformance(TestBase): +class TestPerformance(TestBigRepoReadOnly): # ref with about 100 commits in its history ref_100 = '0.1.6' @@ -48,7 +48,7 @@ class TestPerformance(TestBase): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -59,7 +59,7 @@ class TestPerformance(TestBase): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.rorepo, self.ref_100): + for c in Commit.iter_items(self.gitrepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit diff --git a/test/git/test_commit.py b/test/git/test_commit.py index 28b407ac..ad7a0082 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -9,169 +9,165 @@ from git import * class TestCommit(TestBase): - def test_bake(self): + def test_bake(self): - commit = Commit(self.rorepo, **{'sha': '2454ae89983a4496a445ce347d7a41c0bb0ea7ae'}) - commit.author # bake + commit = Commit(self.rorepo, '2454ae89983a4496a445ce347d7a41c0bb0ea7ae') + commit.author # bake - assert_equal("Sebastian Thiel", commit.author.name) - assert_equal("byronimo@gmail.com", commit.author.email) - assert commit.author == commit.committer - assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) - assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) - assert commit.message == "Added missing information to docstrings of commit and stats module" + assert_equal("Sebastian Thiel", commit.author.name) + assert_equal("byronimo@gmail.com", commit.author.email) + assert commit.author == commit.committer + assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) + assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) + assert commit.message == "Added missing information to docstrings of commit and stats module" - def test_stats(self): - commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781') - stats = commit.stats - - def check_entries(d): - assert isinstance(d, dict) - for key in ("insertions", "deletions", "lines"): - assert key in d - # END assertion helper - assert stats.files - assert stats.total - - check_entries(stats.total) - assert "files" in stats.total - - for filepath, d in stats.files.items(): - check_entries(d) - # END for each stated file - - # assure data is parsed properly - michael = Actor._from_string("Michael Trier ") - assert commit.author == michael - assert commit.committer == michael - assert commit.authored_date == 1210193388 - assert commit.committed_date == 1210193388 - assert commit.author_tz_offset == 14400, commit.author_tz_offset - assert commit.committer_tz_offset == 14400, commit.committer_tz_offset - assert commit.message == "initial project" - - def test_traversal(self): - start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") - first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") - p0 = start.parents[0] - p1 = start.parents[1] - p00 = p0.parents[0] - p10 = p1.parents[0] - - # basic branch first, depth first - dfirst = start.traverse(branch_first=False) - bfirst = start.traverse(branch_first=True) - assert dfirst.next() == p0 - assert dfirst.next() == p00 - - assert bfirst.next() == p0 - assert bfirst.next() == p1 - assert bfirst.next() == p00 - assert bfirst.next() == p10 - - # at some point, both iterations should stop - assert list(bfirst)[-1] == first - stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) - l = list(stoptraverse) - assert len(l[0]) == 2 - - # ignore self - assert start.traverse(ignore_self=False).next() == start - - # depth - assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 - - # prune - assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 - - # predicate - assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 - - # traversal should stop when the beginning is reached - self.failUnlessRaises(StopIteration, first.traverse().next) - - # parents of the first commit should be empty ( as the only parent has a null - # sha ) - assert len(first.parents) == 0 - - def test_iteration(self): - # we can iterate commits - all_commits = Commit.list_items(self.rorepo, self.rorepo.head) - assert all_commits - assert all_commits == list(self.rorepo.iter_commits()) - - # this includes merge commits - mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d') - assert mcomit in all_commits - - # we can limit the result to paths - ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) - assert ltd_commits and len(ltd_commits) < len(all_commits) - - # show commits of multiple paths, resulting in a union of commits - less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) - assert len(ltd_commits) < len(less_ltd_commits) - - - @patch_object(Git, '_call_process') - def test_rev_list_bisect_all(self, git): - """ - 'git rev-list --bisect-all' returns additional information - in the commit header. This test ensures that we properly parse it. - """ + def test_stats(self): + commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781') + stats = commit.stats + + def check_entries(d): + assert isinstance(d, dict) + for key in ("insertions", "deletions", "lines"): + assert key in d + # END assertion helper + assert stats.files + assert stats.total + + check_entries(stats.total) + assert "files" in stats.total + + for filepath, d in stats.files.items(): + check_entries(d) + # END for each stated file + + # assure data is parsed properly + michael = Actor._from_string("Michael Trier ") + assert commit.author == michael + assert commit.committer == michael + assert commit.authored_date == 1210193388 + assert commit.committed_date == 1210193388 + assert commit.author_tz_offset == 14400, commit.author_tz_offset + assert commit.committer_tz_offset == 14400, commit.committer_tz_offset + assert commit.message == "initial project" + + def test_traversal(self): + start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") + first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") + p0 = start.parents[0] + p1 = start.parents[1] + p00 = p0.parents[0] + p10 = p1.parents[0] + + # basic branch first, depth first + dfirst = start.traverse(branch_first=False) + bfirst = start.traverse(branch_first=True) + assert dfirst.next() == p0 + assert dfirst.next() == p00 + + assert bfirst.next() == p0 + assert bfirst.next() == p1 + assert bfirst.next() == p00 + assert bfirst.next() == p10 + + # at some point, both iterations should stop + assert list(bfirst)[-1] == first + stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) + l = list(stoptraverse) + assert len(l[0]) == 2 + + # ignore self + assert start.traverse(ignore_self=False).next() == start + + # depth + assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 + + # prune + assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 + + # predicate + assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 + + # traversal should stop when the beginning is reached + self.failUnlessRaises(StopIteration, first.traverse().next) + + # parents of the first commit should be empty ( as the only parent has a null + # sha ) + assert len(first.parents) == 0 + + def test_iteration(self): + # we can iterate commits + all_commits = Commit.list_items(self.rorepo, self.rorepo.head) + assert all_commits + assert all_commits == list(self.rorepo.iter_commits()) + + # this includes merge commits + mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d') + assert mcomit in all_commits + + # we can limit the result to paths + ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) + assert ltd_commits and len(ltd_commits) < len(all_commits) + + # show commits of multiple paths, resulting in a union of commits + less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) + assert len(ltd_commits) < len(less_ltd_commits) + + def test_iter_items(self): + # pretty not allowed + self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw") + + def test_rev_list_bisect_all(self): + """ + 'git rev-list --bisect-all' returns additional information + in the commit header. This test ensures that we properly parse it. + """ + revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474', + first_parent=True, + bisect_all=True) - git.return_value = fixture('rev_list_bisect_all') + commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs)) + expected_ids = ( + '7156cece3c49544abb6bf7a0c218eb36646fad6d', + '1f66cfbbce58b4b552b041707a12d437cc5f400a', + '33ebe7acec14b25c5f84f35a664803fcab2f7781', + '933d23bf95a5bd1624fbcdf328d904e1fa173474' + ) + for sha1, commit in zip(expected_ids, commits): + assert_equal(sha1, commit.sha) - revs = self.rorepo.git.rev_list('HEAD', - pretty='raw', - first_parent=True, - bisect_all=True) - assert_true(git.called) + def test_count(self): + assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 + + def test_list(self): + assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit) - commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs), True) - expected_ids = ( - 'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b', - '33ebe7acec14b25c5f84f35a664803fcab2f7781', - 'a6604a00a652e754cb8b6b0b9f194f839fc38d7c', - '8df638c22c75ddc9a43ecdde90c0c9939f5009e7', - 'c231551328faa864848bde6ff8127f59c9566e90', - ) - for sha1, commit in zip(expected_ids, commits): - assert_equal(sha1, commit.sha) + def test_str(self): + commit = Commit(self.rorepo, 'abc') + assert_equal ("abc", str(commit)) - def test_count(self): - assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 - - def test_list(self): - assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit) + def test_repr(self): + commit = Commit(self.rorepo, 'abc') + assert_equal('', repr(commit)) - def test_str(self): - commit = Commit(self.rorepo, 'abc') - assert_equal ("abc", str(commit)) - - def test_repr(self): - commit = Commit(self.rorepo, 'abc') - assert_equal('', repr(commit)) - - def test_equality(self): - commit1 = Commit(self.rorepo, 'abc') - commit2 = Commit(self.rorepo, 'abc') - commit3 = Commit(self.rorepo, 'zyx') - assert_equal(commit1, commit2) - assert_not_equal(commit2, commit3) - - def test_iter_parents(self): - # should return all but ourselves, even if skip is defined - c = self.rorepo.commit('0.1.5') - for skip in (0, 1): - piter = c.iter_parents(skip=skip) - first_parent = piter.next() - assert first_parent != c - assert first_parent == c.parents[0] - # END for each - - def test_base(self): - name_rev = self.rorepo.head.commit.name_rev - assert isinstance(name_rev, basestring) - + def test_equality(self): + commit1 = Commit(self.rorepo, 'abc') + commit2 = Commit(self.rorepo, 'abc') + commit3 = Commit(self.rorepo, 'zyx') + assert_equal(commit1, commit2) + assert_not_equal(commit2, commit3) + + def test_iter_parents(self): + # should return all but ourselves, even if skip is defined + c = self.rorepo.commit('0.1.5') + for skip in (0, 1): + piter = c.iter_parents(skip=skip) + first_parent = piter.next() + assert first_parent != c + assert first_parent == c.parents[0] + # END for each + + def test_base(self): + name_rev = self.rorepo.head.commit.name_rev + assert isinstance(name_rev, basestring) + diff --git a/test/git/test_repo.py b/test/git/test_repo.py index 9316245b..ddf2b3e1 100644 --- a/test/git/test_repo.py +++ b/test/git/test_repo.py @@ -55,32 +55,27 @@ class TestRepo(TestBase): # try from invalid revision that does not exist self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world') - @patch_object(Git, '_call_process') - def test_commits(self, git): - git.return_value = StringProcessAdapter(fixture('rev_list')) - - commits = list(self.rorepo.iter_commits('master', max_count=10)) + def test_commits(self): + mc = 10 + commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc)) + assert len(commits) == mc c = commits[0] - assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', c.sha) - assert_equal(["634396b2f541a9f2d58b00be1a07f0c358b999b3"], [p.sha for p in c.parents]) - assert_equal("672eca9b7f9e09c22dcb128c283e8c3c8d7697a4", c.tree.sha) - assert_equal("Tom Preston-Werner", c.author.name) - assert_equal("tom@mojombo.com", c.author.email) - assert_equal(1191999972, c.authored_date) - assert_equal("Tom Preston-Werner", c.committer.name) - assert_equal("tom@mojombo.com", c.committer.email) - assert_equal(1191999972, c.committed_date) - assert_equal("implement Grit#heads", c.message) + assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.sha) + assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.sha for p in c.parents]) + assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.sha) + assert_equal("Michael Trier", c.author.name) + assert_equal("mtrier@gmail.com", c.author.email) + assert_equal(1232829715, c.authored_date) + assert_equal(5*3600, c.author_tz_offset) + assert_equal("Michael Trier", c.committer.name) + assert_equal("mtrier@gmail.com", c.committer.email) + assert_equal(1232829715, c.committed_date) + assert_equal(5*3600, c.committer_tz_offset) + assert_equal("Bumped version 0.1.6", c.message) c = commits[1] - assert_equal(tuple(), c.parents) - - c = commits[2] - assert_equal(["6e64c55896aabb9a7d8e9f8f296f426d21a78c2c", "7f874954efb9ba35210445be456c74e037ba6af2"], map(lambda p: p.sha, c.parents)) - assert_equal("Merge branch 'site'", c.summary) - - assert_true(git.called) + assert isinstance(c.parents, tuple) def test_trees(self): mc = 30 -- cgit v1.2.1 From 538820055ce1bf9dd07ecda48210832f96194504 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 17:39:17 +0200 Subject: git.cmd: added test for stream section constraint used in git command, found bug of course which just didn't kick in yet --- test/git/test_repo.py | 651 ++++++++++++++++++++++++++++---------------------- 1 file changed, 363 insertions(+), 288 deletions(-) (limited to 'test') diff --git a/test/git/test_repo.py b/test/git/test_repo.py index ddf2b3e1..7dc645b6 100644 --- a/test/git/test_repo.py +++ b/test/git/test_repo.py @@ -10,321 +10,396 @@ from git import * from git.utils import join_path_native import tempfile import shutil +from cStringIO import StringIO class TestRepo(TestBase): - - @raises(InvalidGitRepositoryError) - def test_new_should_raise_on_invalid_repo_location(self): - Repo(tempfile.gettempdir()) + + @raises(InvalidGitRepositoryError) + def test_new_should_raise_on_invalid_repo_location(self): + Repo(tempfile.gettempdir()) - @raises(NoSuchPathError) - def test_new_should_raise_on_non_existant_path(self): - Repo("repos/foobar") + @raises(NoSuchPathError) + def test_new_should_raise_on_non_existant_path(self): + Repo("repos/foobar") - def test_repo_creation_from_different_paths(self): - r_from_gitdir = Repo(self.rorepo.git_dir) - assert r_from_gitdir.git_dir == self.rorepo.git_dir - assert r_from_gitdir.git_dir.endswith('.git') - assert not self.rorepo.git.working_dir.endswith('.git') - assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir + def test_repo_creation_from_different_paths(self): + r_from_gitdir = Repo(self.rorepo.git_dir) + assert r_from_gitdir.git_dir == self.rorepo.git_dir + assert r_from_gitdir.git_dir.endswith('.git') + assert not self.rorepo.git.working_dir.endswith('.git') + assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir - def test_description(self): - txt = "Test repository" - self.rorepo.description = txt - assert_equal(self.rorepo.description, txt) + def test_description(self): + txt = "Test repository" + self.rorepo.description = txt + assert_equal(self.rorepo.description, txt) - def test_heads_should_return_array_of_head_objects(self): - for head in self.rorepo.heads: - assert_equal(Head, head.__class__) + def test_heads_should_return_array_of_head_objects(self): + for head in self.rorepo.heads: + assert_equal(Head, head.__class__) - def test_heads_should_populate_head_data(self): - for head in self.rorepo.heads: - assert head.name - assert isinstance(head.commit,Commit) - # END for each head - - assert isinstance(self.rorepo.heads.master, Head) - assert isinstance(self.rorepo.heads['master'], Head) - - def test_tree_from_revision(self): - tree = self.rorepo.tree('0.1.6') - assert len(tree.sha) == 40 - assert tree.type == "tree" - assert self.rorepo.tree(tree) == tree - - # try from invalid revision that does not exist - self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world') + def test_heads_should_populate_head_data(self): + for head in self.rorepo.heads: + assert head.name + assert isinstance(head.commit,Commit) + # END for each head + + assert isinstance(self.rorepo.heads.master, Head) + assert isinstance(self.rorepo.heads['master'], Head) + + def test_tree_from_revision(self): + tree = self.rorepo.tree('0.1.6') + assert len(tree.sha) == 40 + assert tree.type == "tree" + assert self.rorepo.tree(tree) == tree + + # try from invalid revision that does not exist + self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world') - def test_commits(self): - mc = 10 - commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc)) - assert len(commits) == mc - - c = commits[0] - assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.sha) - assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.sha for p in c.parents]) - assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.sha) - assert_equal("Michael Trier", c.author.name) - assert_equal("mtrier@gmail.com", c.author.email) - assert_equal(1232829715, c.authored_date) - assert_equal(5*3600, c.author_tz_offset) - assert_equal("Michael Trier", c.committer.name) - assert_equal("mtrier@gmail.com", c.committer.email) - assert_equal(1232829715, c.committed_date) - assert_equal(5*3600, c.committer_tz_offset) - assert_equal("Bumped version 0.1.6", c.message) + def test_commits(self): + mc = 10 + commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc)) + assert len(commits) == mc + + c = commits[0] + assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.sha) + assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.sha for p in c.parents]) + assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.sha) + assert_equal("Michael Trier", c.author.name) + assert_equal("mtrier@gmail.com", c.author.email) + assert_equal(1232829715, c.authored_date) + assert_equal(5*3600, c.author_tz_offset) + assert_equal("Michael Trier", c.committer.name) + assert_equal("mtrier@gmail.com", c.committer.email) + assert_equal(1232829715, c.committed_date) + assert_equal(5*3600, c.committer_tz_offset) + assert_equal("Bumped version 0.1.6", c.message) - c = commits[1] - assert isinstance(c.parents, tuple) + c = commits[1] + assert isinstance(c.parents, tuple) - def test_trees(self): - mc = 30 - num_trees = 0 - for tree in self.rorepo.iter_trees('0.1.5', max_count=mc): - num_trees += 1 - assert isinstance(tree, Tree) - # END for each tree - assert num_trees == mc + def test_trees(self): + mc = 30 + num_trees = 0 + for tree in self.rorepo.iter_trees('0.1.5', max_count=mc): + num_trees += 1 + assert isinstance(tree, Tree) + # END for each tree + assert num_trees == mc - def _test_empty_repo(self, repo): - # test all kinds of things with an empty, freshly initialized repo. - # It should throw good errors - - # entries should be empty - assert len(repo.index.entries) == 0 - - # head is accessible - assert repo.head - assert repo.head.ref - assert not repo.head.is_valid() - - # we can change the head to some other ref - head_ref = Head.from_path(repo, Head.to_full_path('some_head')) - assert not head_ref.is_valid() - repo.head.ref = head_ref - - # is_dirty can handle all kwargs - for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)): - assert not repo.is_dirty(*args) - # END for each arg - - # we can add a file to the index ( if we are not bare ) - if not repo.bare: - pass - # END test repos with working tree - + def _test_empty_repo(self, repo): + # test all kinds of things with an empty, freshly initialized repo. + # It should throw good errors + + # entries should be empty + assert len(repo.index.entries) == 0 + + # head is accessible + assert repo.head + assert repo.head.ref + assert not repo.head.is_valid() + + # we can change the head to some other ref + head_ref = Head.from_path(repo, Head.to_full_path('some_head')) + assert not head_ref.is_valid() + repo.head.ref = head_ref + + # is_dirty can handle all kwargs + for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)): + assert not repo.is_dirty(*args) + # END for each arg + + # we can add a file to the index ( if we are not bare ) + if not repo.bare: + pass + # END test repos with working tree + - def test_init(self): - prev_cwd = os.getcwd() - os.chdir(tempfile.gettempdir()) - git_dir_rela = "repos/foo/bar.git" - del_dir_abs = os.path.abspath("repos") - git_dir_abs = os.path.abspath(git_dir_rela) - try: - # with specific path - for path in (git_dir_rela, git_dir_abs): - r = Repo.init(path=path, bare=True) - assert isinstance(r, Repo) - assert r.bare == True - assert os.path.isdir(r.git_dir) - - self._test_empty_repo(r) - shutil.rmtree(git_dir_abs) - # END for each path - - os.makedirs(git_dir_rela) - os.chdir(git_dir_rela) - r = Repo.init(bare=False) - r.bare == False - - self._test_empty_repo(r) - finally: - try: - shutil.rmtree(del_dir_abs) - except OSError: - pass - os.chdir(prev_cwd) - # END restore previous state - - def test_bare_property(self): - self.rorepo.bare + def test_init(self): + prev_cwd = os.getcwd() + os.chdir(tempfile.gettempdir()) + git_dir_rela = "repos/foo/bar.git" + del_dir_abs = os.path.abspath("repos") + git_dir_abs = os.path.abspath(git_dir_rela) + try: + # with specific path + for path in (git_dir_rela, git_dir_abs): + r = Repo.init(path=path, bare=True) + assert isinstance(r, Repo) + assert r.bare == True + assert os.path.isdir(r.git_dir) + + self._test_empty_repo(r) + shutil.rmtree(git_dir_abs) + # END for each path + + os.makedirs(git_dir_rela) + os.chdir(git_dir_rela) + r = Repo.init(bare=False) + r.bare == False + + self._test_empty_repo(r) + finally: + try: + shutil.rmtree(del_dir_abs) + except OSError: + pass + os.chdir(prev_cwd) + # END restore previous state + + def test_bare_property(self): + self.rorepo.bare - @patch_object(Repo, '__init__') - @patch_object(Git, '_call_process') - def test_init_with_options(self, git, repo): - git.return_value = True - repo.return_value = None + @patch_object(Repo, '__init__') + @patch_object(Git, '_call_process') + def test_init_with_options(self, git, repo): + git.return_value = True + repo.return_value = None - r = Repo.init("repos/foo/bar.git", **{'bare' : True,'template': "/baz/sweet"}) - assert isinstance(r, Repo) + r = Repo.init("repos/foo/bar.git", **{'bare' : True,'template': "/baz/sweet"}) + assert isinstance(r, Repo) - assert_true(git.called) - assert_true(repo.called) + assert_true(git.called) + assert_true(repo.called) - @patch_object(Repo, '__init__') - @patch_object(Git, '_call_process') - def test_clone(self, git, repo): - git.return_value = None - repo.return_value = None + @patch_object(Repo, '__init__') + @patch_object(Git, '_call_process') + def test_clone(self, git, repo): + git.return_value = None + repo.return_value = None - self.rorepo.clone("repos/foo/bar.git") + self.rorepo.clone("repos/foo/bar.git") - assert_true(git.called) - path = os.path.join(absolute_project_path(), '.git') - assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), {})) - assert_true(repo.called) + assert_true(git.called) + path = os.path.join(absolute_project_path(), '.git') + assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), {})) + assert_true(repo.called) - @patch_object(Repo, '__init__') - @patch_object(Git, '_call_process') - def test_clone_with_options(self, git, repo): - git.return_value = None - repo.return_value = None + @patch_object(Repo, '__init__') + @patch_object(Git, '_call_process') + def test_clone_with_options(self, git, repo): + git.return_value = None + repo.return_value = None - self.rorepo.clone("repos/foo/bar.git", **{'template': '/awesome'}) + self.rorepo.clone("repos/foo/bar.git", **{'template': '/awesome'}) - assert_true(git.called) - path = os.path.join(absolute_project_path(), '.git') - assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), - { 'template': '/awesome'})) - assert_true(repo.called) + assert_true(git.called) + path = os.path.join(absolute_project_path(), '.git') + assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), + { 'template': '/awesome'})) + assert_true(repo.called) - def test_daemon_export(self): - orig_val = self.rorepo.daemon_export - self.rorepo.daemon_export = not orig_val - assert self.rorepo.daemon_export == ( not orig_val ) - self.rorepo.daemon_export = orig_val - assert self.rorepo.daemon_export == orig_val + def test_daemon_export(self): + orig_val = self.rorepo.daemon_export + self.rorepo.daemon_export = not orig_val + assert self.rorepo.daemon_export == ( not orig_val ) + self.rorepo.daemon_export = orig_val + assert self.rorepo.daemon_export == orig_val - def test_alternates(self): - cur_alternates = self.rorepo.alternates - # empty alternates - self.rorepo.alternates = [] - assert self.rorepo.alternates == [] - alts = [ "other/location", "this/location" ] - self.rorepo.alternates = alts - assert alts == self.rorepo.alternates - self.rorepo.alternates = cur_alternates + def test_alternates(self): + cur_alternates = self.rorepo.alternates + # empty alternates + self.rorepo.alternates = [] + assert self.rorepo.alternates == [] + alts = [ "other/location", "this/location" ] + self.rorepo.alternates = alts + assert alts == self.rorepo.alternates + self.rorepo.alternates = cur_alternates - def test_repr(self): - path = os.path.join(os.path.abspath(GIT_REPO), '.git') - assert_equal('' % path, repr(self.rorepo)) + def test_repr(self): + path = os.path.join(os.path.abspath(GIT_REPO), '.git') + assert_equal('' % path, repr(self.rorepo)) - def test_is_dirty_with_bare_repository(self): - self.rorepo._bare = True - assert_false(self.rorepo.is_dirty()) + def test_is_dirty_with_bare_repository(self): + self.rorepo._bare = True + assert_false(self.rorepo.is_dirty()) - def test_is_dirty(self): - self.rorepo._bare = False - for index in (0,1): - for working_tree in (0,1): - for untracked_files in (0,1): - assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False) - # END untracked files - # END working tree - # END index - self.rorepo._bare = True - assert self.rorepo.is_dirty() == False + def test_is_dirty(self): + self.rorepo._bare = False + for index in (0,1): + for working_tree in (0,1): + for untracked_files in (0,1): + assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False) + # END untracked files + # END working tree + # END index + self.rorepo._bare = True + assert self.rorepo.is_dirty() == False - def test_head(self): - assert self.rorepo.head.reference.object == self.rorepo.active_branch.object + def test_head(self): + assert self.rorepo.head.reference.object == self.rorepo.active_branch.object - def test_index(self): - index = self.rorepo.index - assert isinstance(index, IndexFile) - - def test_tag(self): - assert self.rorepo.tag('refs/tags/0.1.5').commit - - def test_archive(self): - tmpfile = os.tmpfile() - self.rorepo.archive(tmpfile, '0.1.5') - assert tmpfile.tell() - - @patch_object(Git, '_call_process') - def test_should_display_blame_information(self, git): - git.return_value = fixture('blame') - b = self.rorepo.blame( 'master', 'lib/git.py') - assert_equal(13, len(b)) - assert_equal( 2, len(b[0]) ) - # assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b)) - assert_equal(hash(b[0][0]), hash(b[9][0])) - c = b[0][0] - assert_true(git.called) - assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True})) - - assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.sha) - assert_equal('Tom Preston-Werner', c.author.name) - assert_equal('tom@mojombo.com', c.author.email) - assert_equal(1191997100, c.authored_date) - assert_equal('Tom Preston-Werner', c.committer.name) - assert_equal('tom@mojombo.com', c.committer.email) - assert_equal(1191997100, c.committed_date) - assert_equal('initial grit setup', c.message) - - # test the 'lines per commit' entries - tlist = b[0][1] - assert_true( tlist ) - assert_true( isinstance( tlist[0], basestring ) ) - assert_true( len( tlist ) < sum( len(t) for t in tlist ) ) # test for single-char bug - - def test_untracked_files(self): - base = self.rorepo.working_tree_dir - files = ( join_path_native(base, "__test_myfile"), - join_path_native(base, "__test_other_file") ) - num_recently_untracked = 0 - try: - for fpath in files: - fd = open(fpath,"wb") - fd.close() - # END for each filename - untracked_files = self.rorepo.untracked_files - num_recently_untracked = len(untracked_files) - - # assure we have all names - they are relative to the git-dir - num_test_untracked = 0 - for utfile in untracked_files: - num_test_untracked += join_path_native(base, utfile) in files - assert len(files) == num_test_untracked - finally: - for fpath in files: - if os.path.isfile(fpath): - os.remove(fpath) - # END handle files - - assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files)) - - def test_config_reader(self): - reader = self.rorepo.config_reader() # all config files - assert reader.read_only - reader = self.rorepo.config_reader("repository") # single config file - assert reader.read_only - - def test_config_writer(self): - for config_level in self.rorepo.config_level: - try: - writer = self.rorepo.config_writer(config_level) - assert not writer.read_only - except IOError: - # its okay not to get a writer for some configuration files if we - # have no permissions - pass - # END for each config level - - def test_creation_deletion(self): - # just a very quick test to assure it generally works. There are - # specialized cases in the test_refs module - head = self.rorepo.create_head("new_head", "HEAD~1") - self.rorepo.delete_head(head) - - tag = self.rorepo.create_tag("new_tag", "HEAD~2") - self.rorepo.delete_tag(tag) - - remote = self.rorepo.create_remote("new_remote", "git@server:repo.git") - self.rorepo.delete_remote(remote) - - def test_comparison_and_hash(self): - # this is only a preliminary test, more testing done in test_index - assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo) - assert len(set((self.rorepo, self.rorepo))) == 1 + def test_index(self): + index = self.rorepo.index + assert isinstance(index, IndexFile) + + def test_tag(self): + assert self.rorepo.tag('refs/tags/0.1.5').commit + + def test_archive(self): + tmpfile = os.tmpfile() + self.rorepo.archive(tmpfile, '0.1.5') + assert tmpfile.tell() + + @patch_object(Git, '_call_process') + def test_should_display_blame_information(self, git): + git.return_value = fixture('blame') + b = self.rorepo.blame( 'master', 'lib/git.py') + assert_equal(13, len(b)) + assert_equal( 2, len(b[0]) ) + # assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b)) + assert_equal(hash(b[0][0]), hash(b[9][0])) + c = b[0][0] + assert_true(git.called) + assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True})) + + assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.sha) + assert_equal('Tom Preston-Werner', c.author.name) + assert_equal('tom@mojombo.com', c.author.email) + assert_equal(1191997100, c.authored_date) + assert_equal('Tom Preston-Werner', c.committer.name) + assert_equal('tom@mojombo.com', c.committer.email) + assert_equal(1191997100, c.committed_date) + assert_equal('initial grit setup', c.message) + + # test the 'lines per commit' entries + tlist = b[0][1] + assert_true( tlist ) + assert_true( isinstance( tlist[0], basestring ) ) + assert_true( len( tlist ) < sum( len(t) for t in tlist ) ) # test for single-char bug + + def test_untracked_files(self): + base = self.rorepo.working_tree_dir + files = ( join_path_native(base, "__test_myfile"), + join_path_native(base, "__test_other_file") ) + num_recently_untracked = 0 + try: + for fpath in files: + fd = open(fpath,"wb") + fd.close() + # END for each filename + untracked_files = self.rorepo.untracked_files + num_recently_untracked = len(untracked_files) + + # assure we have all names - they are relative to the git-dir + num_test_untracked = 0 + for utfile in untracked_files: + num_test_untracked += join_path_native(base, utfile) in files + assert len(files) == num_test_untracked + finally: + for fpath in files: + if os.path.isfile(fpath): + os.remove(fpath) + # END handle files + + assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files)) + + def test_config_reader(self): + reader = self.rorepo.config_reader() # all config files + assert reader.read_only + reader = self.rorepo.config_reader("repository") # single config file + assert reader.read_only + + def test_config_writer(self): + for config_level in self.rorepo.config_level: + try: + writer = self.rorepo.config_writer(config_level) + assert not writer.read_only + except IOError: + # its okay not to get a writer for some configuration files if we + # have no permissions + pass + # END for each config level + + def test_creation_deletion(self): + # just a very quick test to assure it generally works. There are + # specialized cases in the test_refs module + head = self.rorepo.create_head("new_head", "HEAD~1") + self.rorepo.delete_head(head) + + tag = self.rorepo.create_tag("new_tag", "HEAD~2") + self.rorepo.delete_tag(tag) + + remote = self.rorepo.create_remote("new_remote", "git@server:repo.git") + self.rorepo.delete_remote(remote) + + def test_comparison_and_hash(self): + # this is only a preliminary test, more testing done in test_index + assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo) + assert len(set((self.rorepo, self.rorepo))) == 1 + + def test_git_cmd(self): + # test CatFileContentStream, just to be very sure we have no fencepost errors + # last \n is the terminating newline that it expects + l1 = "0123456789\n" + l2 = "abcdefghijklmnopqrstxy\n" + l3 = "z\n" + d = "%s%s%s\n" % (l1, l2, l3) + + l1p = l1[:5] + + # full size + # size is without terminating newline + def mkfull(): + return Git.CatFileContentStream(len(d)-1, StringIO(d)) + + ts = 5 + def mktiny(): + return Git.CatFileContentStream(ts, StringIO(d)) + + # readlines no limit + s = mkfull() + lines = s.readlines() + assert len(lines) == 3 and lines[-1].endswith('\n') + assert s._stream.tell() == len(d) # must have scrubbed to the end + + # realines line limit + s = mkfull() + lines = s.readlines(5) + assert len(lines) == 1 + + # readlines on tiny sections + s = mktiny() + lines = s.readlines() + assert len(lines) == 1 and lines[0] == l1p + assert s._stream.tell() == ts+1 + + # readline no limit + s = mkfull() + assert s.readline() == l1 + assert s.readline() == l2 + assert s.readline() == l3 + assert s.readline() == '' + assert s._stream.tell() == len(d) + + # readline limit + s = mkfull() + assert s.readline(5) == l1p + assert s.readline() == l1[5:] + + # readline on tiny section + s = mktiny() + assert s.readline() == l1p + assert s.readline() == '' + assert s._stream.tell() == ts+1 + + # read no limit + s = mkfull() + assert s.read() == d[:-1] + assert s.read() == '' + assert s._stream.tell() == len(d) + + # read limit + s = mkfull() + assert s.read(5) == l1p + assert s.read(6) == l1[5:] + assert s._stream.tell() == 5 + 6 # its not yet done + + # read tiny + s = mktiny() + assert s.read(2) == l1[:2] + assert s._stream.tell() == 2 + assert s.read() == l1[2:ts] + assert s._stream.tell() == ts+1 -- cgit v1.2.1 From 282018b79cc8df078381097cb3aeb29ff56e83c6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 20:11:00 +0200 Subject: Added first design and frame for object database. In a first step, loose objects will be written using our utilities, and certain object retrieval functionality moves into the GitObjectDatabase which is used by the repo instance Added performance test for object database access, which shows quite respectable tree parsing performance, and okay blob access. Nonetheless, it will be hard to beat the c performance using a pure python implementation, but it can be a nice practice to write it anyway to allow more direct pack manipulations. Some could benefit from the ability to write packs as these can serve as local cache if alternates are used --- test/git/performance/test_odb.py | 61 ++++++++++++++++++++++++++++++++++++++++ test/git/test_odb.py | 12 ++++++++ 2 files changed, 73 insertions(+) create mode 100644 test/git/performance/test_odb.py create mode 100644 test/git/test_odb.py (limited to 'test') diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py new file mode 100644 index 00000000..0ad2ce33 --- /dev/null +++ b/test/git/performance/test_odb.py @@ -0,0 +1,61 @@ +"""Performance tests for object store""" + +from time import time +import sys +import stat + +from lib import ( + TestBigRepoReadOnly + ) + + +class TestObjDBPerformance(TestBigRepoReadOnly): + + def test_random_access(self): + + # GET COMMITS + # TODO: use the actual db for this + st = time() + root_commit = self.gitrepo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed) + + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + for blob_list in blobs_per_commit: + for blob in blob_list: + blob.data + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed) diff --git a/test/git/test_odb.py b/test/git/test_odb.py new file mode 100644 index 00000000..6f92a5c1 --- /dev/null +++ b/test/git/test_odb.py @@ -0,0 +1,12 @@ +"""Test for object db""" + +from test.testlib import * +from git.odb.db import * + + +class TestDB(TestBase): + """Test the different db class implementations""" + + def test_loose_db(self): + self.fail("todo") + -- cgit v1.2.1 From 8b86f9b399a8f5af792a04025fdeefc02883f3e5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 22:40:52 +0200 Subject: initial version of loose object writing and simple cached object lookup appears to be working --- test/git/test_odb.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/git/test_odb.py b/test/git/test_odb.py index 6f92a5c1..bc92a493 100644 --- a/test/git/test_odb.py +++ b/test/git/test_odb.py @@ -2,11 +2,38 @@ from test.testlib import * from git.odb.db import * +from git import Blob + +from cStringIO import StringIO +import os class TestDB(TestBase): """Test the different db class implementations""" - def test_loose_db(self): - self.fail("todo") + # data + two_lines = "1234\nhello world" + + all_data = (two_lines, ) + + def _assert_object_writing(self, db): + """General tests to verify object writing, compatible to iObjectDBW + :note: requires write access to the database""" + # start in dry-run mode + for dry_run in range(1, -1, -1): + for data in self.all_data: + for hex_sha in range(2): + sha = db.to_object(Blob.type, len(data), StringIO(data), dry_run, hex_sha) + assert db.has_object(sha) != dry_run + assert len(sha) == 20 + hex_sha * 20 + # END for each sha type + # END for each data set + # END for each dry_run mode + + @with_bare_rw_repo + def test_writing(self, rwrepo): + ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) + + # write data + self._assert_object_writing(ldb) -- cgit v1.2.1 From 6f8ce8901e21587cd2320562df412e05b5ab1731 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 23:53:29 +0200 Subject: added frame for object reading, including simple test --- test/git/test_odb.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'test') diff --git a/test/git/test_odb.py b/test/git/test_odb.py index bc92a493..b2840719 100644 --- a/test/git/test_odb.py +++ b/test/git/test_odb.py @@ -3,6 +3,7 @@ from test.testlib import * from git.odb.db import * from git import Blob +from git.errors import BadObject from cStringIO import StringIO import os @@ -26,6 +27,18 @@ class TestDB(TestBase): sha = db.to_object(Blob.type, len(data), StringIO(data), dry_run, hex_sha) assert db.has_object(sha) != dry_run assert len(sha) == 20 + hex_sha * 20 + + # verify data - the slow way, we want to run code + if not dry_run: + type, size = db.object_info(sha) + assert Blob.type == type + assert size == len(data) + + type, size, stream = db.object(sha) + assert stream.read() == data + else: + self.failUnlessRaises(BadObject, db.object_info, sha) + self.failUnlessRaises(BadObject, db.object, sha) # END for each sha type # END for each data set # END for each dry_run mode -- cgit v1.2.1 From 38d59fc8ccccae8882fa48671377bf40a27915a7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 16:35:35 +0200 Subject: odb: implemented loose object streaming, which is impossible to do efficiently considering that it copies string buffers all the time --- test/git/performance/test_utils.py | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 test/git/performance/test_utils.py (limited to 'test') diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py new file mode 100644 index 00000000..381d7c8b --- /dev/null +++ b/test/git/performance/test_utils.py @@ -0,0 +1,44 @@ +"""Performance of utilities""" +from time import time +import sys +import stat + +from lib import ( + TestBigRepoReadOnly + ) + + +class TestUtilPerformance(TestBigRepoReadOnly): + + def test_access(self): + # compare dict vs. slot access + class Slotty(object): + __slots__ = "attr" + def __init__(self): + self.attr = 1 + + class Dicty(object): + def __init__(self): + self.attr = 1 + + class BigSlotty(object): + __slots__ = ('attr', ) + tuple('abcdefghijk') + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, 1) + + class BigDicty(object): + def __init__(self): + for attr in BigSlotty.__slots__: + setattr(self, attr, 1) + + ni = 1000000 + for cls in (Slotty, Dicty, BigSlotty, BigDicty): + cli = cls() + st = time() + for i in xrange(ni): + cli.attr + # END for each access + elapsed = time() - st + print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed) + # END for each class type -- cgit v1.2.1 From 26e138cb47dccc859ff219f108ce9b7d96cbcbcd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 18:21:05 +0200 Subject: odb: fixed streamed decompression reader ( specific tests would still be missing ) and added performance tests which are extremely promising --- test/git/performance/test_streams.py | 91 ++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 test/git/performance/test_streams.py (limited to 'test') diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py new file mode 100644 index 00000000..15924c08 --- /dev/null +++ b/test/git/performance/test_streams.py @@ -0,0 +1,91 @@ +"""Performance data streaming performance""" + +from test.testlib import * +from git.odb.db import * + +from array import array +from cStringIO import StringIO +from time import time +import os +import sys +import stat +import random + + +from lib import ( + TestBigRepoReadOnly + ) + + + +def make_memory_file(size_in_bytes, randomize=False): + """:return: tuple(size_of_stream, stream) + :param randomize: try to produce a very random stream""" + actual_size = size_in_bytes / 4 + producer = xrange(actual_size) + if randomize: + producer = list(producer) + random.shuffle(producer) + # END randomize + a = array('i', producer) + return actual_size*4, StringIO(a.tostring()) + + +class TestObjDBPerformance(TestBigRepoReadOnly): + + large_data_size_bytes = 1000*1000*10 # some MiB should do it + moderate_data_size_bytes = 1000*1000*1 # just 1 MiB + + @with_bare_rw_repo + def test_large_data_streaming(self, rwrepo): + ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) + + for randomize in range(2): + desc = (randomize and 'random ') or '' + print >> sys.stderr, "Creating %s data ..." % desc + st = time() + size, stream = make_memory_file(self.large_data_size_bytes, randomize) + elapsed = time() - st + print >> sys.stderr, "Done (in %f s)" % elapsed + + # writing - due to the compression it will seem faster than it is + st = time() + sha = ldb.to_object('blob', size, stream) + elapsed = time() - st + assert ldb.has_object(sha) + fsize_kib = os.path.getsize(ldb.readable_db_object_path(sha)) / 1000 + + + size_kib = size / 1000 + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed, size_kib / elapsed) + + # reading all at once + st = time() + type, size, shastream = ldb.object(sha) + shadata = shastream.read() + elapsed = time() - st + + stream.seek(0) + assert shadata == stream.getvalue() + print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed, size_kib / elapsed) + + + # reading in chunks of 1 MiB + cs = 512*1000 + chunks = list() + st = time() + type, size, shastream = ldb.object(sha) + while True: + data = shastream.read(cs) + chunks.append(data) + if len(data) < cs: + break + # END read in chunks + elapsed = time() - st + + stream.seek(0) + assert ''.join(chunks) == stream.getvalue() + + cs_kib = cs / 1000 + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed, size_kib / elapsed) + # END for each randomization factor -- cgit v1.2.1 From 4b4a514e51fbc7dc6ddcb27c188159d57b5d1fa9 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 19:04:18 +0200 Subject: Added performance comparison to cgit ... and yes, git-python is faster :) --- test/git/performance/test_streams.py | 67 ++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index 15924c08..6c2834b3 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -10,6 +10,7 @@ import os import sys import stat import random +import subprocess from lib import ( @@ -51,23 +52,24 @@ class TestObjDBPerformance(TestBigRepoReadOnly): # writing - due to the compression it will seem faster than it is st = time() sha = ldb.to_object('blob', size, stream) - elapsed = time() - st + elapsed_add = time() - st assert ldb.has_object(sha) - fsize_kib = os.path.getsize(ldb.readable_db_object_path(sha)) / 1000 + db_file = ldb.readable_db_object_path(sha) + fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed, size_kib / elapsed) + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) # reading all at once st = time() type, size, shastream = ldb.object(sha) shadata = shastream.read() - elapsed = time() - st + elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() - print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed, size_kib / elapsed) + print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) # reading in chunks of 1 MiB @@ -81,11 +83,62 @@ class TestObjDBPerformance(TestBigRepoReadOnly): if len(data) < cs: break # END read in chunks - elapsed = time() - st + elapsed_readchunks = time() - st stream.seek(0) assert ''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed, size_kib / elapsed) + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) + + # del db file so git has something to do + os.remove(db_file) + + # VS. CGIT + ########## + # CGIT ! Can using the cgit programs be faster ? + proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) + + # write file - pump everything in at once to be a fast as possible + data = stream.getvalue() # cache it + st = time() + proc.stdin.write(data) + proc.stdin.close() + gitsha = proc.stdout.read().strip() + proc.wait() + gelapsed_add = time() - st + del(data) + assert gitsha == sha # we do it the same way, right ? + + # as its the same sha, we reuse our path + fsize_kib = os.path.getsize(db_file) / 1000 + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) + + # compare ... + print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc) + + + # read all + st = time() + s, t, size, data = rwrepo.git.get_object_data(gitsha) + gelapsed_readall = time() - st + print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall) + + # compare + print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc) + + + # read chunks + st = time() + s, t, size, stream = rwrepo.git.stream_object_data(gitsha) + while True: + data = stream.read(cs) + if len(data) < cs: + break + # END read stream + gelapsed_readchunks = time() - st + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) + + # compare + print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc) # END for each randomization factor -- cgit v1.2.1 From 1e2b46138ba58033738a24dadccc265748fce2ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:20:34 +0200 Subject: commit.create_from_tree now uses pure python implementation, fixed message parsing which truncated newlines although it was ilegitimate. Its up to the reader to truncate therse, nowhere in the git code I could find anyone adding newlines to commits where it is written Added performance tests for serialization, it does about 5k commits per second if writing to tmpfs --- test/git/performance/lib.py | 25 +++++++++++++-- test/git/performance/test_commit.py | 36 ++++++++++++++++++++-- test/git/performance/test_odb.py | 6 ++-- test/git/performance/test_streams.py | 4 +-- test/git/performance/test_utils.py | 4 +-- test/git/test_commit.py | 59 ++++++++++++++++++++++++++++++++++-- test/git/test_utils.py | 9 ++++-- 7 files changed, 125 insertions(+), 18 deletions(-) (limited to 'test') diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py index 4b552b20..650bea82 100644 --- a/test/git/performance/lib.py +++ b/test/git/performance/lib.py @@ -1,6 +1,8 @@ """Contains library functions""" import os from test.testlib import * +import shutil +import tempfile from git import ( Repo @@ -25,7 +27,7 @@ def resolve_or_fail(env_var): #{ Base Classes -class TestBigRepoReadOnly(TestBase): +class TestBigRepoR(TestBase): """TestCase providing access to readonly 'big' repositories using the following member variables: @@ -40,7 +42,24 @@ class TestBigRepoReadOnly(TestBase): @classmethod def setUpAll(cls): - super(TestBigRepoReadOnly, cls).setUpAll() - cls.gitrepo = Repo(resolve_or_fail(k_env_git_repo)) + super(TestBigRepoR, cls).setUpAll() + cls.gitrorepo = Repo(resolve_or_fail(k_env_git_repo)) + +class TestBigRepoRW(TestBigRepoR): + """As above, but provides a big repository that we can write to. + + Provides ``self.gitrwrepo``""" + + @classmethod + def setUpAll(cls): + super(TestBigRepoRW, cls).setUpAll() + dirname = tempfile.mktemp() + os.mkdir(dirname) + cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True) + + @classmethod + def tearDownAll(cls): + shutil.rmtree(cls.gitrwrepo.working_tree_dir) + #} END base classes diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index b4a9d868..2398c93d 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -6,10 +6,12 @@ from lib import * from git import * +from test.git.test_commit import assert_commit_serialization +from cStringIO import StringIO from time import time import sys -class TestPerformance(TestBigRepoReadOnly): +class TestPerformance(TestBigRepoRW): # ref with about 100 commits in its history ref_100 = '0.1.6' @@ -48,7 +50,7 @@ class TestPerformance(TestBigRepoReadOnly): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False): + for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -59,10 +61,38 @@ class TestPerformance(TestBigRepoReadOnly): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.gitrepo, self.head_sha_2k): + for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit elapsed_time = time() - st print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + def test_commit_serialization(self): + assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) + + rwrepo = self.gitrwrepo + make_object = rwrepo.odb.to_object + # direct serialization - deserialization can be tested afterwards + # serialization is probably limited on IO + hc = rwrepo.commit(self.head_sha_2k) + + commits = list() + nc = 5000 + st = time() + for i in xrange(nc): + cm = Commit( rwrepo, Commit.NULL_HEX_SHA, hc.tree, + hc.author, hc.authored_date, hc.author_tz_offset, + hc.committer, hc.committed_date, hc.committer_tz_offset, + str(i), parents=hc.parents, encoding=hc.encoding) + + stream = StringIO() + cm._serialize(stream) + slen = stream.tell() + stream.seek(0) + + cm.sha = make_object(Commit.type, slen, stream) + # END commit creation + elapsed = time() - st + + print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed) diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py index 0ad2ce33..7b1ee838 100644 --- a/test/git/performance/test_odb.py +++ b/test/git/performance/test_odb.py @@ -5,18 +5,18 @@ import sys import stat from lib import ( - TestBigRepoReadOnly + TestBigRepoR ) -class TestObjDBPerformance(TestBigRepoReadOnly): +class TestObjDBPerformance(TestBigRepoR): def test_random_access(self): # GET COMMITS # TODO: use the actual db for this st = time() - root_commit = self.gitrepo.commit(self.head_sha_2k) + root_commit = self.gitrorepo.commit(self.head_sha_2k) commits = list(root_commit.traverse()) nc = len(commits) elapsed = time() - st diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index 6c2834b3..d31bee14 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -14,7 +14,7 @@ import subprocess from lib import ( - TestBigRepoReadOnly + TestBigRepoR ) @@ -32,7 +32,7 @@ def make_memory_file(size_in_bytes, randomize=False): return actual_size*4, StringIO(a.tostring()) -class TestObjDBPerformance(TestBigRepoReadOnly): +class TestObjDBPerformance(TestBigRepoR): large_data_size_bytes = 1000*1000*10 # some MiB should do it moderate_data_size_bytes = 1000*1000*1 # just 1 MiB diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py index 381d7c8b..47366d34 100644 --- a/test/git/performance/test_utils.py +++ b/test/git/performance/test_utils.py @@ -4,11 +4,11 @@ import sys import stat from lib import ( - TestBigRepoReadOnly + TestBigRepoR ) -class TestUtilPerformance(TestBigRepoReadOnly): +class TestUtilPerformance(TestBigRepoR): def test_access(self): # compare dict vs. slot access diff --git a/test/git/test_commit.py b/test/git/test_commit.py index ad7a0082..a5f184e6 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -7,6 +7,56 @@ from test.testlib import * from git import * +from cStringIO import StringIO +import time +import sys + + +def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False): + """traverse all commits in the history of commit identified by commit_id and check + if the serialization works. + :param print_performance_info: if True, we will show how fast we are""" + ns = 0 # num serializations + nds = 0 # num deserializations + + st = time.time() + for cm in rwrepo.commit(commit_id).traverse(): + nds += 1 + + # assert that we deserialize commits correctly, hence we get the same + # sha on serialization + stream = StringIO() + cm._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + + csha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + assert csha == cm.sha + + nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha, + cm.author, cm.authored_date, cm.author_tz_offset, + cm.committer, cm.committed_date, cm.committer_tz_offset, + cm.message, cm.parents, cm.encoding) + + assert nc.parents == cm.parents + stream = StringIO() + nc._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + nc.sha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + + # if it worked, we have exactly the same contents ! + assert nc.sha == cm.sha + # END check commits + elapsed = time.time() - st + + if print_performance_info: + print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns/elapsed, nds/elapsed) + # END handle performance info + + class TestCommit(TestBase): def test_bake(self): @@ -19,7 +69,7 @@ class TestCommit(TestBase): assert commit.author == commit.committer assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) - assert commit.message == "Added missing information to docstrings of commit and stats module" + assert commit.message == "Added missing information to docstrings of commit and stats module\n" def test_stats(self): @@ -49,7 +99,7 @@ class TestCommit(TestBase): assert commit.committed_date == 1210193388 assert commit.author_tz_offset == 14400, commit.author_tz_offset assert commit.committer_tz_offset == 14400, commit.committer_tz_offset - assert commit.message == "initial project" + assert commit.message == "initial project\n" def test_traversal(self): start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") @@ -171,3 +221,8 @@ class TestCommit(TestBase): name_rev = self.rorepo.head.commit.name_rev assert isinstance(name_rev, basestring) + @with_bare_rw_repo + def test_serialization(self, rwrepo): + # create all commits of our repo + assert_commit_serialization(rwrepo, '0.1.6') + diff --git a/test/git/test_utils.py b/test/git/test_utils.py index 2c3c392b..83ef7e4b 100644 --- a/test/git/test_utils.py +++ b/test/git/test_utils.py @@ -116,8 +116,6 @@ class TestUtils(TestCase): os.remove(my_file) # END final cleanup - - def test_user_id(self): assert '@' in get_user_id() @@ -127,7 +125,12 @@ class TestUtils(TestCase): assert len(rval) == 2 assert isinstance(rval[0], int) and isinstance(rval[1], int) assert rval[0] == veri_time - assert rval[1] == offset + assert rval[1] == offset + + # now that we are here, test our conversion functions as well + utctz = altz_to_utctz_str(offset) + assert isinstance(utctz, basestring) + assert utctz_to_altz(verify_utctz(utctz)) == offset # END assert rval utility rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0) -- cgit v1.2.1 From 1906ee4df9ae4e734288c5203cf79894dff76cab Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:27:09 +0200 Subject: Fixed compatability issues with python 2.5, made sure all tests run --- test/git/performance/lib.py | 2 +- test/git/test_repo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py index 650bea82..7d2a9f4a 100644 --- a/test/git/performance/lib.py +++ b/test/git/performance/lib.py @@ -60,6 +60,6 @@ class TestBigRepoRW(TestBigRepoR): @classmethod def tearDownAll(cls): - shutil.rmtree(cls.gitrwrepo.working_tree_dir) + shutil.rmtree(cls.gitrwrepo.working_dir) #} END base classes diff --git a/test/git/test_repo.py b/test/git/test_repo.py index 7dc645b6..d2c7c742 100644 --- a/test/git/test_repo.py +++ b/test/git/test_repo.py @@ -73,7 +73,7 @@ class TestRepo(TestBase): assert_equal("mtrier@gmail.com", c.committer.email) assert_equal(1232829715, c.committed_date) assert_equal(5*3600, c.committer_tz_offset) - assert_equal("Bumped version 0.1.6", c.message) + assert_equal("Bumped version 0.1.6\n", c.message) c = commits[1] assert isinstance(c.parents, tuple) -- cgit v1.2.1 From a1e80445ad5cb6da4c0070d7cb8af89da3b0803b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 14:41:15 +0200 Subject: initial version of new odb design to facilitate a channel based multi-threading implementation of all odb functions --- test/git/performance/test_commit.py | 2 +- test/git/performance/test_streams.py | 6 +++--- test/git/test_commit.py | 4 ++-- test/git/test_odb.py | 10 +++++----- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index 2398c93d..bca3ad8b 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -72,7 +72,7 @@ class TestPerformance(TestBigRepoRW): assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) rwrepo = self.gitrwrepo - make_object = rwrepo.odb.to_object + make_object = rwrepo.odb.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(self.head_sha_2k) diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index d31bee14..30fd8048 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -51,7 +51,7 @@ class TestObjDBPerformance(TestBigRepoR): # writing - due to the compression it will seem faster than it is st = time() - sha = ldb.to_object('blob', size, stream) + sha = ldb.store('blob', size, stream) elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(sha) @@ -63,7 +63,7 @@ class TestObjDBPerformance(TestBigRepoR): # reading all at once st = time() - type, size, shastream = ldb.object(sha) + type, size, shastream = ldbstreamsha) shadata = shastream.read() elapsed_readall = time() - st @@ -76,7 +76,7 @@ class TestObjDBPerformance(TestBigRepoR): cs = 512*1000 chunks = list() st = time() - type, size, shastream = ldb.object(sha) + type, size, shastream = ldbstreamsha) while True: data = shastream.read(cs) chunks.append(data) diff --git a/test/git/test_commit.py b/test/git/test_commit.py index a5f184e6..e914b9a7 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -31,7 +31,7 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) streamlen = stream.tell() stream.seek(0) - csha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + csha = rwrepo.odb.store(Commit.type, streamlen, stream) assert csha == cm.sha nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha, @@ -45,7 +45,7 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) ns += 1 streamlen = stream.tell() stream.seek(0) - nc.sha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + nc.sha = rwrepo.odb.store(Commit.type, streamlen, stream) # if it worked, we have exactly the same contents ! assert nc.sha == cm.sha diff --git a/test/git/test_odb.py b/test/git/test_odb.py index b2840719..80597df6 100644 --- a/test/git/test_odb.py +++ b/test/git/test_odb.py @@ -18,26 +18,26 @@ class TestDB(TestBase): all_data = (two_lines, ) def _assert_object_writing(self, db): - """General tests to verify object writing, compatible to iObjectDBW + """General tests to verify object writing, compatible to ObjectDBW :note: requires write access to the database""" # start in dry-run mode for dry_run in range(1, -1, -1): for data in self.all_data: for hex_sha in range(2): - sha = db.to_object(Blob.type, len(data), StringIO(data), dry_run, hex_sha) + sha = db.store(Blob.type, len(data), StringIO(data), dry_run, hex_sha) assert db.has_object(sha) != dry_run assert len(sha) == 20 + hex_sha * 20 # verify data - the slow way, we want to run code if not dry_run: - type, size = db.object_info(sha) + type, size = db.info(sha) assert Blob.type == type assert size == len(data) - type, size, stream = db.object(sha) + type, size, stream = dbstreamsha) assert stream.read() == data else: - self.failUnlessRaises(BadObject, db.object_info, sha) + self.failUnlessRaises(BadObject, db.info, sha) self.failUnlessRaises(BadObject, db.object, sha) # END for each sha type # END for each data set -- cgit v1.2.1 From e746f96bcc29238b79118123028ca170adc4ff0f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 17:22:08 +0200 Subject: Fixed implementation after design change to deal with it - all tests run, but next there will have to be more through testing --- test/git/performance/test_commit.py | 2 +- test/git/performance/test_streams.py | 12 ++++----- test/git/performance/test_utils.py | 15 +++++++++++ test/git/test_commit.py | 12 ++++++--- test/git/test_odb.py | 52 +++++++++++++++++++++++------------- 5 files changed, 64 insertions(+), 29 deletions(-) (limited to 'test') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index bca3ad8b..0571d0d9 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -91,7 +91,7 @@ class TestPerformance(TestBigRepoRW): slen = stream.tell() stream.seek(0) - cm.sha = make_object(Commit.type, slen, stream) + cm.sha = make_object(IStream(Commit.type, slen, stream)).sha # END commit creation elapsed = time() - st diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index 30fd8048..01ec9fc4 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -1,7 +1,7 @@ """Performance data streaming performance""" from test.testlib import * -from git.odb.db import * +from git.odb import * from array import array from cStringIO import StringIO @@ -51,7 +51,7 @@ class TestObjDBPerformance(TestBigRepoR): # writing - due to the compression it will seem faster than it is st = time() - sha = ldb.store('blob', size, stream) + sha = ldb.store(IStream('blob', size, stream)).sha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(sha) @@ -63,8 +63,8 @@ class TestObjDBPerformance(TestBigRepoR): # reading all at once st = time() - type, size, shastream = ldbstreamsha) - shadata = shastream.read() + ostream = ldb.stream(sha) + shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) @@ -76,9 +76,9 @@ class TestObjDBPerformance(TestBigRepoR): cs = 512*1000 chunks = list() st = time() - type, size, shastream = ldbstreamsha) + ostream = ldb.stream(sha) while True: - data = shastream.read(cs) + data = ostream.read(cs) chunks.append(data) if len(data) < cs: break diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py index 47366d34..76adffec 100644 --- a/test/git/performance/test_utils.py +++ b/test/git/performance/test_utils.py @@ -42,3 +42,18 @@ class TestUtilPerformance(TestBigRepoR): elapsed = time() - st print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed) # END for each class type + + # check num of sequence-acceses + for cls in (list, tuple): + x = 10 + st = time() + s = cls(range(x)) + for i in xrange(ni): + s[0] + s[1] + s[2] + # END for + elapsed = time() - st + na = ni * 3 + print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed) + # END for each sequence diff --git a/test/git/test_commit.py b/test/git/test_commit.py index e914b9a7..e65e2e59 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -6,6 +6,7 @@ from test.testlib import * from git import * +from git.odb import IStream from cStringIO import StringIO import time @@ -31,8 +32,8 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) streamlen = stream.tell() stream.seek(0) - csha = rwrepo.odb.store(Commit.type, streamlen, stream) - assert csha == cm.sha + istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream)) + assert istream.sha == cm.sha nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha, cm.author, cm.authored_date, cm.author_tz_offset, @@ -45,7 +46,12 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) ns += 1 streamlen = stream.tell() stream.seek(0) - nc.sha = rwrepo.odb.store(Commit.type, streamlen, stream) + + # reuse istream + istream.size = streamlen + istream.stream = stream + istream.sha = None + nc.sha = rwrepo.odb.store(istream).sha # if it worked, we have exactly the same contents ! assert nc.sha == cm.sha diff --git a/test/git/test_odb.py b/test/git/test_odb.py index 80597df6..c3a03714 100644 --- a/test/git/test_odb.py +++ b/test/git/test_odb.py @@ -1,7 +1,8 @@ """Test for object db""" from test.testlib import * -from git.odb.db import * +from git.odb import * +from git.odb.stream import Sha1Writer from git import Blob from git.errors import BadObject @@ -20,26 +21,39 @@ class TestDB(TestBase): def _assert_object_writing(self, db): """General tests to verify object writing, compatible to ObjectDBW :note: requires write access to the database""" - # start in dry-run mode - for dry_run in range(1, -1, -1): + # start in 'dry-run' mode, using a simple sha1 writer + ostreams = (Sha1Writer, None) + for ostreamcls in ostreams: for data in self.all_data: - for hex_sha in range(2): - sha = db.store(Blob.type, len(data), StringIO(data), dry_run, hex_sha) - assert db.has_object(sha) != dry_run - assert len(sha) == 20 + hex_sha * 20 + dry_run = ostreamcls is not None + ostream = None + if ostreamcls is not None: + ostream = ostreamcls() + # END create ostream + + prev_ostream = db.set_ostream(ostream) + assert type(prev_ostream) in ostreams or prev_ostream in ostreams + + istream = IStream(Blob.type, len(data), StringIO(data)) + my_istream = db.store(istream) + sha = istream.sha + assert my_istream is istream + assert db.has_object(sha) != dry_run + assert len(sha) == 40 # for now we require 40 byte shas as default + + # verify data - the slow way, we want to run code + if not dry_run: + info = db.info(sha) + assert Blob.type == info.type + assert info.size == len(data) - # verify data - the slow way, we want to run code - if not dry_run: - type, size = db.info(sha) - assert Blob.type == type - assert size == len(data) - - type, size, stream = dbstreamsha) - assert stream.read() == data - else: - self.failUnlessRaises(BadObject, db.info, sha) - self.failUnlessRaises(BadObject, db.object, sha) - # END for each sha type + ostream = db.stream(sha) + assert ostream.read() == data + assert ostream.type == Blob.type + assert ostream.size == len(data) + else: + self.failUnlessRaises(BadObject, db.info, sha) + self.failUnlessRaises(BadObject, db.stream, sha) # END for each data set # END for each dry_run mode -- cgit v1.2.1