From 282018b79cc8df078381097cb3aeb29ff56e83c6 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Wed, 2 Jun 2010 20:11:00 +0200
Subject: Added first design and frame for object database. In a first step,
 loose objects will be written using our utilities, and certain object
 retrieval functionality moves into the GitObjectDatabase which is used by the
 repo instance Added performance test for object database access, which shows
 quite respectable tree parsing performance, and okay blob access.
 Nonetheless, it will be hard to beat the c performance using a pure python
 implementation, but it can be a nice practice to write it anyway to allow
 more direct pack manipulations. Some could benefit from the ability to write
 packs as these can serve as local cache if alternates are used

---
 test/git/performance/test_odb.py | 61 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 test/git/performance/test_odb.py

(limited to 'test/git/performance/test_odb.py')

diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
new file mode 100644
index 00000000..0ad2ce33
--- /dev/null
+++ b/test/git/performance/test_odb.py
@@ -0,0 +1,61 @@
+"""Performance tests for object store"""
+
+from time import time
+import sys
+import stat
+
+from lib import (
+	TestBigRepoReadOnly
+	)
+
+
+class TestObjDBPerformance(TestBigRepoReadOnly):
+	
+	def test_random_access(self):
+		
+		# GET COMMITS
+		# TODO: use the actual db for this
+		st = time()
+		root_commit = self.gitrepo.commit(self.head_sha_2k)
+		commits = list(root_commit.traverse())
+		nc = len(commits)
+		elapsed = time() - st
+		
+		print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
+			
+			
+		# GET TREES
+		# walk all trees of all commits
+		st = time()
+		blobs_per_commit = list()
+		nt = 0
+		for commit in commits:
+			tree = commit.tree
+			blobs = list()
+			for item in tree.traverse():
+				nt += 1
+				if item.type == 'blob':
+					blobs.append(item)
+				# direct access for speed
+			# END while trees are there for walking
+			blobs_per_commit.append(blobs)
+		# END for each commit
+		elapsed = time() - st
+		
+		print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed)
+		
+		# GET BLOBS
+		st = time()
+		nb = 0
+		too_many = 15000
+		for blob_list in blobs_per_commit:
+			for blob in blob_list:
+				blob.data
+			# END for each blobsha
+			nb += len(blob_list)
+			if nb > too_many:
+				break
+		# END for each bloblist
+		elapsed = time() - st
+		
+		print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed)
-- 
cgit v1.2.1


From 1e2b46138ba58033738a24dadccc265748fce2ca Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Thu, 3 Jun 2010 23:20:34 +0200
Subject: commit.create_from_tree now uses pure python implementation, fixed
 message parsing which truncated newlines although it was ilegitimate. Its up
 to the reader to truncate therse, nowhere in the git code I could find anyone
 adding newlines to commits where it is written Added performance tests for
 serialization, it does about 5k commits per second if writing to tmpfs

---
 test/git/performance/test_odb.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'test/git/performance/test_odb.py')

diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
index 0ad2ce33..7b1ee838 100644
--- a/test/git/performance/test_odb.py
+++ b/test/git/performance/test_odb.py
@@ -5,18 +5,18 @@ import sys
 import stat
 
 from lib import (
-	TestBigRepoReadOnly
+	TestBigRepoR
 	)
 
 
-class TestObjDBPerformance(TestBigRepoReadOnly):
+class TestObjDBPerformance(TestBigRepoR):
 	
 	def test_random_access(self):
 		
 		# GET COMMITS
 		# TODO: use the actual db for this
 		st = time()
-		root_commit = self.gitrepo.commit(self.head_sha_2k)
+		root_commit = self.gitrorepo.commit(self.head_sha_2k)
 		commits = list(root_commit.traverse())
 		nc = len(commits)
 		elapsed = time() - st
-- 
cgit v1.2.1