From 282018b79cc8df078381097cb3aeb29ff56e83c6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 20:11:00 +0200 Subject: Added first design and frame for object database. In a first step, loose objects will be written using our utilities, and certain object retrieval functionality moves into the GitObjectDatabase which is used by the repo instance Added performance test for object database access, which shows quite respectable tree parsing performance, and okay blob access. Nonetheless, it will be hard to beat the c performance using a pure python implementation, but it can be a nice practice to write it anyway to allow more direct pack manipulations. Some could benefit from the ability to write packs as these can serve as local cache if alternates are used --- test/git/performance/test_odb.py | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 test/git/performance/test_odb.py (limited to 'test/git/performance/test_odb.py') diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py new file mode 100644 index 00000000..0ad2ce33 --- /dev/null +++ b/test/git/performance/test_odb.py @@ -0,0 +1,61 @@ +"""Performance tests for object store""" + +from time import time +import sys +import stat + +from lib import ( + TestBigRepoReadOnly + ) + + +class TestObjDBPerformance(TestBigRepoReadOnly): + + def test_random_access(self): + + # GET COMMITS + # TODO: use the actual db for this + st = time() + root_commit = self.gitrepo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed) + + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + for blob_list in blobs_per_commit: + for blob in blob_list: + blob.data + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed) -- cgit v1.2.1 From 1e2b46138ba58033738a24dadccc265748fce2ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:20:34 +0200 Subject: commit.create_from_tree now uses pure python implementation, fixed message parsing which truncated newlines although it was ilegitimate. Its up to the reader to truncate therse, nowhere in the git code I could find anyone adding newlines to commits where it is written Added performance tests for serialization, it does about 5k commits per second if writing to tmpfs --- test/git/performance/test_odb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'test/git/performance/test_odb.py') diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py index 0ad2ce33..7b1ee838 100644 --- a/test/git/performance/test_odb.py +++ b/test/git/performance/test_odb.py @@ -5,18 +5,18 @@ import sys import stat from lib import ( - TestBigRepoReadOnly + TestBigRepoR ) -class TestObjDBPerformance(TestBigRepoReadOnly): +class TestObjDBPerformance(TestBigRepoR): def test_random_access(self): # GET COMMITS # TODO: use the actual db for this st = time() - root_commit = self.gitrepo.commit(self.head_sha_2k) + root_commit = self.gitrorepo.commit(self.head_sha_2k) commits = list(root_commit.traverse()) nc = len(commits) elapsed = time() - st -- cgit v1.2.1