diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-02 20:11:00 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-02 20:11:00 +0200 |
commit | 282018b79cc8df078381097cb3aeb29ff56e83c6 (patch) | |
tree | f4ad72281d65d062239fb571fdd693e328883e55 /test/git/performance/test_odb.py | |
parent | 538820055ce1bf9dd07ecda48210832f96194504 (diff) | |
download | gitpython-282018b79cc8df078381097cb3aeb29ff56e83c6.tar.gz |
Added first design and frame for object database. In a first step, loose objects will be written using our utilities, and certain object retrieval functionality moves into the GitObjectDatabase which is used by the repo instance
Added performance test for object database access, which shows quite respectable tree parsing performance, and okay blob access. Nonetheless, it will be hard to beat the c performance using a pure python implementation, but it can be a nice practice to write it anyway to allow more direct pack manipulations. Some could benefit from the ability to write packs as these can serve as local cache if alternates are used
Diffstat (limited to 'test/git/performance/test_odb.py')
-rw-r--r-- | test/git/performance/test_odb.py | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py new file mode 100644 index 00000000..0ad2ce33 --- /dev/null +++ b/test/git/performance/test_odb.py @@ -0,0 +1,61 @@ +"""Performance tests for object store""" + +from time import time +import sys +import stat + +from lib import ( + TestBigRepoReadOnly + ) + + +class TestObjDBPerformance(TestBigRepoReadOnly): + + def test_random_access(self): + + # GET COMMITS + # TODO: use the actual db for this + st = time() + root_commit = self.gitrepo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed) + + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + for blob_list in blobs_per_commit: + for blob in blob_list: + blob.data + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed) |