-#######->WARNING<-####### Directory structure changed, see commit message

If you use git-python as a submodule of your own project, which alters the sys.path to import it, you will have to adjust your code to take the changed directory structure into consideration. Previously, you would put the path ./git-python/lib into your syspath. All modules moved one level up into the 'git' subdirectory, which means that the 'git-python' directory now contains the 'git' root package. To allow git to be found, add ./git-python into your path. To finalize your update, run the following commands git submodule update --init --recursive As there will be left-over directories, consider running git-clean
author: Sebastian Thiel <byronimo@gmail.com> 2010-11-25 18:10:33 +0100
committer: Sebastian Thiel <byronimo@gmail.com> 2010-11-25 18:18:15 +0100
commit: f8ce24a835cae8c623e2936bec2618a8855c605b (patch)
tree: d4c1d392579e24285381613a4ac1b7cc2d6b6fae /git/test/performance/test_streams.py
parent: 65747a216c67c3101c6ae2edaa8119d786b793cb (diff)
parent: 9004e3a1cf33110f2cbc458f1dc3259c930ad9b4 (diff)
download: gitpython-f8ce24a835cae8c623e2936bec2618a8855c605b.tar.gz
1 files changed, 131 insertions, 0 deletions
diff --git a/git/test/performance/test_streams.py b/git/test/performance/test_streams.py
new file mode 100644
index 00000000..7f17d722
--- /dev/null
+++ b/git/test/performance/test_streams.py
@@ -0,0 +1,131 @@
+"""Performance data streaming performance"""
+
+from git.test.lib import *
+from gitdb import *
+from gitdb.util import bin_to_hex
+
+from time import time
+import os
+import sys
+import stat
+import subprocess
+
+from gitdb.test.lib import make_memory_file
+
+from lib import (
+	TestBigRepoR
+	)
+
+
+class TestObjDBPerformance(TestBigRepoR):
+	
+	large_data_size_bytes = 1000*1000*10		# some MiB should do it
+	moderate_data_size_bytes = 1000*1000*1		# just 1 MiB
+	
+	@with_rw_repo('HEAD', bare=True)
+	def test_large_data_streaming(self, rwrepo):
+		# TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
+		# It should be shared if possible
+		ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
+		
+		for randomize in range(2):
+			desc = (randomize and 'random ') or ''
+			print >> sys.stderr, "Creating %s data ..." % desc
+			st = time()
+			size, stream = make_memory_file(self.large_data_size_bytes, randomize)
+			elapsed = time() - st
+			print >> sys.stderr, "Done (in %f s)" % elapsed
+			
+			# writing - due to the compression it will seem faster than it is 
+			st = time()
+			binsha = ldb.store(IStream('blob', size, stream)).binsha
+			elapsed_add = time() - st
+			assert ldb.has_object(binsha)
+			db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
+			fsize_kib = os.path.getsize(db_file) / 1000
+			
+			
+			size_kib = size / 1000
+			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
+			
+			# reading all at once
+			st = time()
+			ostream = ldb.stream(binsha)
+			shadata = ostream.read()
+			elapsed_readall = time() - st
+			
+			stream.seek(0)
+			assert shadata == stream.getvalue()
+			print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
+			
+			
+			# reading in chunks of 1 MiB
+			cs = 512*1000
+			chunks = list()
+			st = time()
+			ostream = ldb.stream(binsha)
+			while True:
+				data = ostream.read(cs)
+				chunks.append(data)
+				if len(data) < cs:
+					break
+			# END read in chunks
+			elapsed_readchunks = time() - st
+			
+			stream.seek(0)
+			assert ''.join(chunks) == stream.getvalue()
+			
+			cs_kib = cs / 1000
+			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
+			
+			# del db file so git has something to do
+			os.remove(db_file)
+			
+			# VS. CGIT 
+			##########
+			# CGIT ! Can using the cgit programs be faster ?
+			proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)
+			
+			# write file - pump everything in at once to be a fast as possible
+			data = stream.getvalue()	# cache it
+			st = time()
+			proc.stdin.write(data)
+			proc.stdin.close()
+			gitsha = proc.stdout.read().strip()
+			proc.wait()
+			gelapsed_add = time() - st
+			del(data)
+			assert gitsha == bin_to_hex(binsha)		# we do it the same way, right ?
+			
+			#  as its the same sha, we reuse our path
+			fsize_kib = os.path.getsize(db_file) / 1000
+			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
+			
+			# compare ... 
+			print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc)
+			
+			
+			# read all
+			st = time()
+			s, t, size, data = rwrepo.git.get_object_data(gitsha)
+			gelapsed_readall = time() - st
+			print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall)
+
+			# compare 
+			print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc)
+			
+			
+			# read chunks
+			st = time()
+			s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
+			while True:
+				data = stream.read(cs)
+				if len(data) < cs:
+					break
+			# END read stream
+			gelapsed_readchunks = time() - st
+			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
+			
+			# compare 
+			print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
+		# END for each randomization factor
author	Sebastian Thiel <byronimo@gmail.com>	2010-11-25 18:10:33 +0100
committer	Sebastian Thiel <byronimo@gmail.com>	2010-11-25 18:18:15 +0100
commit	f8ce24a835cae8c623e2936bec2618a8855c605b (patch)
tree	d4c1d392579e24285381613a4ac1b7cc2d6b6fae /git/test/performance/test_streams.py
parent	65747a216c67c3101c6ae2edaa8119d786b793cb (diff)
parent	9004e3a1cf33110f2cbc458f1dc3259c930ad9b4 (diff)
download	gitpython-f8ce24a835cae8c623e2936bec2618a8855c605b.tar.gz