From 8c1a87d11df666d308d14e4ae7ee0e9d614296b6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 12:30:33 +0200 Subject: commit: refactored existing code to decode commits from streams - performance is slightly better git.cmd: added method to provide access to the content stream directly. This is more efficient if large objects are handled, if it is actually used test.helpers: removed unnecessary code --- test/git/test_commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/git/test_commit.py') diff --git a/test/git/test_commit.py b/test/git/test_commit.py index 48937c93..28b407ac 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -129,7 +129,7 @@ class TestCommit(TestBase): bisect_all=True) assert_true(git.called) - commits = Commit._iter_from_process_or_stream(self.rorepo, ListProcessAdapter(revs), True) + commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs), True) expected_ids = ( 'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b', '33ebe7acec14b25c5f84f35a664803fcab2f7781', -- cgit v1.2.1 From ae5a69f67822d81bbbd8f4af93be68703e730b37 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 16:41:28 +0200 Subject: commit: redesigned revlist and commit parsing, commits are always retrieved from their object information directly. This is faster, and resolves issues with the rev-list format and empty commit messages Adjusted many tests to go with the changes, as they were still mocked. The mock was removed if necessary and replaced by code that actually executes --- test/git/test_commit.py | 310 ++++++++++++++++++++++++------------------------ 1 file changed, 153 insertions(+), 157 deletions(-) (limited to 'test/git/test_commit.py') diff --git a/test/git/test_commit.py b/test/git/test_commit.py index 28b407ac..ad7a0082 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -9,169 +9,165 @@ from git import * class TestCommit(TestBase): - def test_bake(self): + def test_bake(self): - commit = Commit(self.rorepo, **{'sha': '2454ae89983a4496a445ce347d7a41c0bb0ea7ae'}) - commit.author # bake + commit = Commit(self.rorepo, '2454ae89983a4496a445ce347d7a41c0bb0ea7ae') + commit.author # bake - assert_equal("Sebastian Thiel", commit.author.name) - assert_equal("byronimo@gmail.com", commit.author.email) - assert commit.author == commit.committer - assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) - assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) - assert commit.message == "Added missing information to docstrings of commit and stats module" + assert_equal("Sebastian Thiel", commit.author.name) + assert_equal("byronimo@gmail.com", commit.author.email) + assert commit.author == commit.committer + assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) + assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) + assert commit.message == "Added missing information to docstrings of commit and stats module" - def test_stats(self): - commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781') - stats = commit.stats - - def check_entries(d): - assert isinstance(d, dict) - for key in ("insertions", "deletions", "lines"): - assert key in d - # END assertion helper - assert stats.files - assert stats.total - - check_entries(stats.total) - assert "files" in stats.total - - for filepath, d in stats.files.items(): - check_entries(d) - # END for each stated file - - # assure data is parsed properly - michael = Actor._from_string("Michael Trier ") - assert commit.author == michael - assert commit.committer == michael - assert commit.authored_date == 1210193388 - assert commit.committed_date == 1210193388 - assert commit.author_tz_offset == 14400, commit.author_tz_offset - assert commit.committer_tz_offset == 14400, commit.committer_tz_offset - assert commit.message == "initial project" - - def test_traversal(self): - start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") - first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") - p0 = start.parents[0] - p1 = start.parents[1] - p00 = p0.parents[0] - p10 = p1.parents[0] - - # basic branch first, depth first - dfirst = start.traverse(branch_first=False) - bfirst = start.traverse(branch_first=True) - assert dfirst.next() == p0 - assert dfirst.next() == p00 - - assert bfirst.next() == p0 - assert bfirst.next() == p1 - assert bfirst.next() == p00 - assert bfirst.next() == p10 - - # at some point, both iterations should stop - assert list(bfirst)[-1] == first - stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) - l = list(stoptraverse) - assert len(l[0]) == 2 - - # ignore self - assert start.traverse(ignore_self=False).next() == start - - # depth - assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 - - # prune - assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 - - # predicate - assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 - - # traversal should stop when the beginning is reached - self.failUnlessRaises(StopIteration, first.traverse().next) - - # parents of the first commit should be empty ( as the only parent has a null - # sha ) - assert len(first.parents) == 0 - - def test_iteration(self): - # we can iterate commits - all_commits = Commit.list_items(self.rorepo, self.rorepo.head) - assert all_commits - assert all_commits == list(self.rorepo.iter_commits()) - - # this includes merge commits - mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d') - assert mcomit in all_commits - - # we can limit the result to paths - ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) - assert ltd_commits and len(ltd_commits) < len(all_commits) - - # show commits of multiple paths, resulting in a union of commits - less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) - assert len(ltd_commits) < len(less_ltd_commits) - - - @patch_object(Git, '_call_process') - def test_rev_list_bisect_all(self, git): - """ - 'git rev-list --bisect-all' returns additional information - in the commit header. This test ensures that we properly parse it. - """ + def test_stats(self): + commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781') + stats = commit.stats + + def check_entries(d): + assert isinstance(d, dict) + for key in ("insertions", "deletions", "lines"): + assert key in d + # END assertion helper + assert stats.files + assert stats.total + + check_entries(stats.total) + assert "files" in stats.total + + for filepath, d in stats.files.items(): + check_entries(d) + # END for each stated file + + # assure data is parsed properly + michael = Actor._from_string("Michael Trier ") + assert commit.author == michael + assert commit.committer == michael + assert commit.authored_date == 1210193388 + assert commit.committed_date == 1210193388 + assert commit.author_tz_offset == 14400, commit.author_tz_offset + assert commit.committer_tz_offset == 14400, commit.committer_tz_offset + assert commit.message == "initial project" + + def test_traversal(self): + start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") + first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") + p0 = start.parents[0] + p1 = start.parents[1] + p00 = p0.parents[0] + p10 = p1.parents[0] + + # basic branch first, depth first + dfirst = start.traverse(branch_first=False) + bfirst = start.traverse(branch_first=True) + assert dfirst.next() == p0 + assert dfirst.next() == p00 + + assert bfirst.next() == p0 + assert bfirst.next() == p1 + assert bfirst.next() == p00 + assert bfirst.next() == p10 + + # at some point, both iterations should stop + assert list(bfirst)[-1] == first + stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) + l = list(stoptraverse) + assert len(l[0]) == 2 + + # ignore self + assert start.traverse(ignore_self=False).next() == start + + # depth + assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 + + # prune + assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 + + # predicate + assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 + + # traversal should stop when the beginning is reached + self.failUnlessRaises(StopIteration, first.traverse().next) + + # parents of the first commit should be empty ( as the only parent has a null + # sha ) + assert len(first.parents) == 0 + + def test_iteration(self): + # we can iterate commits + all_commits = Commit.list_items(self.rorepo, self.rorepo.head) + assert all_commits + assert all_commits == list(self.rorepo.iter_commits()) + + # this includes merge commits + mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d') + assert mcomit in all_commits + + # we can limit the result to paths + ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) + assert ltd_commits and len(ltd_commits) < len(all_commits) + + # show commits of multiple paths, resulting in a union of commits + less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) + assert len(ltd_commits) < len(less_ltd_commits) + + def test_iter_items(self): + # pretty not allowed + self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw") + + def test_rev_list_bisect_all(self): + """ + 'git rev-list --bisect-all' returns additional information + in the commit header. This test ensures that we properly parse it. + """ + revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474', + first_parent=True, + bisect_all=True) - git.return_value = fixture('rev_list_bisect_all') + commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs)) + expected_ids = ( + '7156cece3c49544abb6bf7a0c218eb36646fad6d', + '1f66cfbbce58b4b552b041707a12d437cc5f400a', + '33ebe7acec14b25c5f84f35a664803fcab2f7781', + '933d23bf95a5bd1624fbcdf328d904e1fa173474' + ) + for sha1, commit in zip(expected_ids, commits): + assert_equal(sha1, commit.sha) - revs = self.rorepo.git.rev_list('HEAD', - pretty='raw', - first_parent=True, - bisect_all=True) - assert_true(git.called) + def test_count(self): + assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 + + def test_list(self): + assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit) - commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs), True) - expected_ids = ( - 'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b', - '33ebe7acec14b25c5f84f35a664803fcab2f7781', - 'a6604a00a652e754cb8b6b0b9f194f839fc38d7c', - '8df638c22c75ddc9a43ecdde90c0c9939f5009e7', - 'c231551328faa864848bde6ff8127f59c9566e90', - ) - for sha1, commit in zip(expected_ids, commits): - assert_equal(sha1, commit.sha) + def test_str(self): + commit = Commit(self.rorepo, 'abc') + assert_equal ("abc", str(commit)) - def test_count(self): - assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 - - def test_list(self): - assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit) + def test_repr(self): + commit = Commit(self.rorepo, 'abc') + assert_equal('', repr(commit)) - def test_str(self): - commit = Commit(self.rorepo, 'abc') - assert_equal ("abc", str(commit)) - - def test_repr(self): - commit = Commit(self.rorepo, 'abc') - assert_equal('', repr(commit)) - - def test_equality(self): - commit1 = Commit(self.rorepo, 'abc') - commit2 = Commit(self.rorepo, 'abc') - commit3 = Commit(self.rorepo, 'zyx') - assert_equal(commit1, commit2) - assert_not_equal(commit2, commit3) - - def test_iter_parents(self): - # should return all but ourselves, even if skip is defined - c = self.rorepo.commit('0.1.5') - for skip in (0, 1): - piter = c.iter_parents(skip=skip) - first_parent = piter.next() - assert first_parent != c - assert first_parent == c.parents[0] - # END for each - - def test_base(self): - name_rev = self.rorepo.head.commit.name_rev - assert isinstance(name_rev, basestring) - + def test_equality(self): + commit1 = Commit(self.rorepo, 'abc') + commit2 = Commit(self.rorepo, 'abc') + commit3 = Commit(self.rorepo, 'zyx') + assert_equal(commit1, commit2) + assert_not_equal(commit2, commit3) + + def test_iter_parents(self): + # should return all but ourselves, even if skip is defined + c = self.rorepo.commit('0.1.5') + for skip in (0, 1): + piter = c.iter_parents(skip=skip) + first_parent = piter.next() + assert first_parent != c + assert first_parent == c.parents[0] + # END for each + + def test_base(self): + name_rev = self.rorepo.head.commit.name_rev + assert isinstance(name_rev, basestring) + -- cgit v1.2.1 From 1e2b46138ba58033738a24dadccc265748fce2ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:20:34 +0200 Subject: commit.create_from_tree now uses pure python implementation, fixed message parsing which truncated newlines although it was ilegitimate. Its up to the reader to truncate therse, nowhere in the git code I could find anyone adding newlines to commits where it is written Added performance tests for serialization, it does about 5k commits per second if writing to tmpfs --- test/git/test_commit.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'test/git/test_commit.py') diff --git a/test/git/test_commit.py b/test/git/test_commit.py index ad7a0082..a5f184e6 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -7,6 +7,56 @@ from test.testlib import * from git import * +from cStringIO import StringIO +import time +import sys + + +def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False): + """traverse all commits in the history of commit identified by commit_id and check + if the serialization works. + :param print_performance_info: if True, we will show how fast we are""" + ns = 0 # num serializations + nds = 0 # num deserializations + + st = time.time() + for cm in rwrepo.commit(commit_id).traverse(): + nds += 1 + + # assert that we deserialize commits correctly, hence we get the same + # sha on serialization + stream = StringIO() + cm._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + + csha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + assert csha == cm.sha + + nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha, + cm.author, cm.authored_date, cm.author_tz_offset, + cm.committer, cm.committed_date, cm.committer_tz_offset, + cm.message, cm.parents, cm.encoding) + + assert nc.parents == cm.parents + stream = StringIO() + nc._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + nc.sha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + + # if it worked, we have exactly the same contents ! + assert nc.sha == cm.sha + # END check commits + elapsed = time.time() - st + + if print_performance_info: + print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns/elapsed, nds/elapsed) + # END handle performance info + + class TestCommit(TestBase): def test_bake(self): @@ -19,7 +69,7 @@ class TestCommit(TestBase): assert commit.author == commit.committer assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) - assert commit.message == "Added missing information to docstrings of commit and stats module" + assert commit.message == "Added missing information to docstrings of commit and stats module\n" def test_stats(self): @@ -49,7 +99,7 @@ class TestCommit(TestBase): assert commit.committed_date == 1210193388 assert commit.author_tz_offset == 14400, commit.author_tz_offset assert commit.committer_tz_offset == 14400, commit.committer_tz_offset - assert commit.message == "initial project" + assert commit.message == "initial project\n" def test_traversal(self): start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") @@ -171,3 +221,8 @@ class TestCommit(TestBase): name_rev = self.rorepo.head.commit.name_rev assert isinstance(name_rev, basestring) + @with_bare_rw_repo + def test_serialization(self, rwrepo): + # create all commits of our repo + assert_commit_serialization(rwrepo, '0.1.6') + -- cgit v1.2.1 From a1e80445ad5cb6da4c0070d7cb8af89da3b0803b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 14:41:15 +0200 Subject: initial version of new odb design to facilitate a channel based multi-threading implementation of all odb functions --- test/git/test_commit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test/git/test_commit.py') diff --git a/test/git/test_commit.py b/test/git/test_commit.py index a5f184e6..e914b9a7 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -31,7 +31,7 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) streamlen = stream.tell() stream.seek(0) - csha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + csha = rwrepo.odb.store(Commit.type, streamlen, stream) assert csha == cm.sha nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha, @@ -45,7 +45,7 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) ns += 1 streamlen = stream.tell() stream.seek(0) - nc.sha = rwrepo.odb.to_object(Commit.type, streamlen, stream) + nc.sha = rwrepo.odb.store(Commit.type, streamlen, stream) # if it worked, we have exactly the same contents ! assert nc.sha == cm.sha -- cgit v1.2.1 From e746f96bcc29238b79118123028ca170adc4ff0f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 17:22:08 +0200 Subject: Fixed implementation after design change to deal with it - all tests run, but next there will have to be more through testing --- test/git/test_commit.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'test/git/test_commit.py') diff --git a/test/git/test_commit.py b/test/git/test_commit.py index e914b9a7..e65e2e59 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -6,6 +6,7 @@ from test.testlib import * from git import * +from git.odb import IStream from cStringIO import StringIO import time @@ -31,8 +32,8 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) streamlen = stream.tell() stream.seek(0) - csha = rwrepo.odb.store(Commit.type, streamlen, stream) - assert csha == cm.sha + istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream)) + assert istream.sha == cm.sha nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha, cm.author, cm.authored_date, cm.author_tz_offset, @@ -45,7 +46,12 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) ns += 1 streamlen = stream.tell() stream.seek(0) - nc.sha = rwrepo.odb.store(Commit.type, streamlen, stream) + + # reuse istream + istream.size = streamlen + istream.stream = stream + istream.sha = None + nc.sha = rwrepo.odb.store(istream).sha # if it worked, we have exactly the same contents ! assert nc.sha == cm.sha -- cgit v1.2.1