summaryrefslogtreecommitdiff
path: root/git/test/performance
diff options
context:
space:
mode:
authorAntoine Musso <hashar@free.fr>2014-11-16 20:15:50 +0100
committerAntoine Musso <hashar@free.fr>2014-11-16 20:46:41 +0100
commitf5d11b750ecc982541d1f936488248f0b42d75d3 (patch)
tree8be522510315f5adc32c0c55acd45dc1074294da /git/test/performance
parent7aba59a2609ec768d5d495dafd23a4bce8179741 (diff)
downloadgitpython-f5d11b750ecc982541d1f936488248f0b42d75d3.tar.gz
pep8 linting (whitespaces)
W191 indentation contains tabs E221 multiple spaces before operator E222 multiple spaces after operator E225 missing whitespace around operator E271 multiple spaces after keyword W292 no newline at end of file W293 blank line contains whitespace W391 blank line at end of file
Diffstat (limited to 'git/test/performance')
-rw-r--r--git/test/performance/lib.py20
-rw-r--r--git/test/performance/test_commit.py22
-rw-r--r--git/test/performance/test_odb.py14
-rw-r--r--git/test/performance/test_streams.py44
-rw-r--r--git/test/performance/test_utils.py40
5 files changed, 70 insertions, 70 deletions
diff --git a/git/test/performance/lib.py b/git/test/performance/lib.py
index d8313dac..acf2e4d5 100644
--- a/git/test/performance/lib.py
+++ b/git/test/performance/lib.py
@@ -35,21 +35,21 @@ def resolve_or_fail(env_var):
class TestBigRepoR(TestBase):
"""TestCase providing access to readonly 'big' repositories using the following
member variables:
-
+
* gitrorepo
-
+
* Read-Only git repository - actually the repo of git itself
-
+
* puregitrorepo
-
+
* As gitrepo, but uses pure python implementation
"""
-
+
#{ Invariants
head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca'
head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
#} END invariants
-
+
@classmethod
def setUp(cls):
super(TestBigRepoR, cls).setUp()
@@ -60,9 +60,9 @@ class TestBigRepoR(TestBase):
class TestBigRepoRW(TestBigRepoR):
"""As above, but provides a big repository that we can write to.
-
+
Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``"""
-
+
@classmethod
def setUp(cls):
super(TestBigRepoRW, cls).setUp()
@@ -70,9 +70,9 @@ class TestBigRepoRW(TestBigRepoR):
os.mkdir(dirname)
cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB)
cls.puregitrwrepo = Repo(dirname, odbt=GitDB)
-
+
@classmethod
def tearDownAll(cls):
shutil.rmtree(cls.gitrwrepo.working_dir)
-
+
#} END base classes
diff --git a/git/test/performance/test_commit.py b/git/test/performance/test_commit.py
index 1bdfcfa2..c3d89931 100644
--- a/git/test/performance/test_commit.py
+++ b/git/test/performance/test_commit.py
@@ -26,15 +26,15 @@ class TestPerformance(TestBigRepoRW):
c.committer_tz_offset
c.message
c.parents
-
+
def test_iteration(self):
no = 0
nc = 0
-
+
# find the first commit containing the given path - always do a full
# iteration ( restricted to the path in question ), but in fact it should
# return quite a lot of commits, we just take one and hence abort the operation
-
+
st = time()
for c in self.rorepo.iter_commits(self.ref_100):
nc += 1
@@ -46,7 +46,7 @@ class TestPerformance(TestBigRepoRW):
# END for each commit
elapsed_time = time() - st
print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time)
-
+
def test_commit_traversal(self):
# bound to cat-file parsing performance
nc = 0
@@ -57,7 +57,7 @@ class TestPerformance(TestBigRepoRW):
# END for each traversed commit
elapsed_time = time() - st
print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
-
+
def test_commit_iteration(self):
# bound to stream parsing performance
nc = 0
@@ -68,16 +68,16 @@ class TestPerformance(TestBigRepoRW):
# END for each traversed commit
elapsed_time = time() - st
print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
-
+
def test_commit_serialization(self):
assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True)
-
+
rwrepo = self.gitrwrepo
make_object = rwrepo.odb.store
# direct serialization - deserialization can be tested afterwards
# serialization is probably limited on IO
hc = rwrepo.commit(self.head_sha_2k)
-
+
commits = list()
nc = 5000
st = time()
@@ -86,14 +86,14 @@ class TestPerformance(TestBigRepoRW):
hc.author, hc.authored_date, hc.author_tz_offset,
hc.committer, hc.committed_date, hc.committer_tz_offset,
str(i), parents=hc.parents, encoding=hc.encoding)
-
+
stream = StringIO()
cm._serialize(stream)
slen = stream.tell()
stream.seek(0)
-
+
cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha
# END commit creation
elapsed = time() - st
-
+
print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
diff --git a/git/test/performance/test_odb.py b/git/test/performance/test_odb.py
index ccc13eb4..57a953ab 100644
--- a/git/test/performance/test_odb.py
+++ b/git/test/performance/test_odb.py
@@ -10,7 +10,7 @@ from lib import (
class TestObjDBPerformance(TestBigRepoR):
-
+
def test_random_access(self):
results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ]
for repo in (self.gitrorepo, self.puregitrorepo):
@@ -20,10 +20,10 @@ class TestObjDBPerformance(TestBigRepoR):
commits = list(root_commit.traverse())
nc = len(commits)
elapsed = time() - st
-
+
print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed)
results[0].append(elapsed)
-
+
# GET TREES
# walk all trees of all commits
st = time()
@@ -41,10 +41,10 @@ class TestObjDBPerformance(TestBigRepoR):
blobs_per_commit.append(blobs)
# END for each commit
elapsed = time() - st
-
+
print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed)
results[1].append(elapsed)
-
+
# GET BLOBS
st = time()
nb = 0
@@ -59,11 +59,11 @@ class TestObjDBPerformance(TestBigRepoR):
break
# END for each bloblist
elapsed = time() - st
-
+
print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed)
results[2].append(elapsed)
# END for each repo type
-
+
# final results
for test_name, a, b in results:
print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a)
diff --git a/git/test/performance/test_streams.py b/git/test/performance/test_streams.py
index 93e88841..cac53a06 100644
--- a/git/test/performance/test_streams.py
+++ b/git/test/performance/test_streams.py
@@ -18,16 +18,16 @@ from lib import (
class TestObjDBPerformance(TestBigRepoR):
-
+
large_data_size_bytes = 1000*1000*10 # some MiB should do it
moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
-
+
@with_rw_repo('HEAD', bare=True)
def test_large_data_streaming(self, rwrepo):
# TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
# It should be shared if possible
ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
-
+
for randomize in range(2):
desc = (randomize and 'random ') or ''
print >> sys.stderr, "Creating %s data ..." % desc
@@ -35,7 +35,7 @@ class TestObjDBPerformance(TestBigRepoR):
size, stream = make_memory_file(self.large_data_size_bytes, randomize)
elapsed = time() - st
print >> sys.stderr, "Done (in %f s)" % elapsed
-
+
# writing - due to the compression it will seem faster than it is
st = time()
binsha = ldb.store(IStream('blob', size, stream)).binsha
@@ -43,22 +43,22 @@ class TestObjDBPerformance(TestBigRepoR):
assert ldb.has_object(binsha)
db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
fsize_kib = os.path.getsize(db_file) / 1000
-
-
+
+
size_kib = size / 1000
print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
-
+
# reading all at once
st = time()
ostream = ldb.stream(binsha)
shadata = ostream.read()
elapsed_readall = time() - st
-
+
stream.seek(0)
assert shadata == stream.getvalue()
print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
-
-
+
+
# reading in chunks of 1 MiB
cs = 512*1000
chunks = list()
@@ -71,21 +71,21 @@ class TestObjDBPerformance(TestBigRepoR):
break
# END read in chunks
elapsed_readchunks = time() - st
-
+
stream.seek(0)
assert ''.join(chunks) == stream.getvalue()
-
+
cs_kib = cs / 1000
print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
-
+
# del db file so git has something to do
os.remove(db_file)
-
+
# VS. CGIT
##########
# CGIT ! Can using the cgit programs be faster ?
proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)
-
+
# write file - pump everything in at once to be a fast as possible
data = stream.getvalue() # cache it
st = time()
@@ -96,15 +96,15 @@ class TestObjDBPerformance(TestBigRepoR):
gelapsed_add = time() - st
del(data)
assert gitsha == bin_to_hex(binsha) # we do it the same way, right ?
-
+
# as its the same sha, we reuse our path
fsize_kib = os.path.getsize(db_file) / 1000
print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
-
+
# compare ...
print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc)
-
-
+
+
# read all
st = time()
s, t, size, data = rwrepo.git.get_object_data(gitsha)
@@ -113,8 +113,8 @@ class TestObjDBPerformance(TestBigRepoR):
# compare
print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc)
-
-
+
+
# read chunks
st = time()
s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
@@ -125,7 +125,7 @@ class TestObjDBPerformance(TestBigRepoR):
# END read stream
gelapsed_readchunks = time() - st
print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
-
+
# compare
print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
# END for each randomization factor
diff --git a/git/test/performance/test_utils.py b/git/test/performance/test_utils.py
index 8637af48..7de77970 100644
--- a/git/test/performance/test_utils.py
+++ b/git/test/performance/test_utils.py
@@ -9,29 +9,29 @@ from lib import (
class TestUtilPerformance(TestBigRepoR):
-
+
def test_access(self):
# compare dict vs. slot access
class Slotty(object):
__slots__ = "attr"
def __init__(self):
self.attr = 1
-
+
class Dicty(object):
def __init__(self):
self.attr = 1
-
+
class BigSlotty(object):
__slots__ = ('attr', ) + tuple('abcdefghijk')
def __init__(self):
for attr in self.__slots__:
setattr(self, attr, 1)
-
+
class BigDicty(object):
def __init__(self):
for attr in BigSlotty.__slots__:
setattr(self, attr, 1)
-
+
ni = 1000000
for cls in (Slotty, Dicty, BigSlotty, BigDicty):
cli = cls()
@@ -42,7 +42,7 @@ class TestUtilPerformance(TestBigRepoR):
elapsed = time() - st
print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed)
# END for each class type
-
+
# check num of sequence-acceses
for cls in (list, tuple):
x = 10
@@ -57,7 +57,7 @@ class TestUtilPerformance(TestBigRepoR):
na = ni * 3
print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed)
# END for each sequence
-
+
def test_instantiation(self):
ni = 100000
max_num_items = 4
@@ -83,7 +83,7 @@ class TestUtilPerformance(TestBigRepoR):
print >> sys.stderr, "Created %i %ss of size %i in %f s ( %f inst / s)" % (ni, cls.__name__, mni, elapsed, ni / elapsed)
# END for each type
# END for each item count
-
+
# tuple and tuple direct
st = time()
for i in xrange(ni):
@@ -91,19 +91,19 @@ class TestUtilPerformance(TestBigRepoR):
# END for each item
elapsed = time() - st
print >> sys.stderr, "Created %i tuples (1,2,3,4) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed)
-
+
st = time()
for i in xrange(ni):
t = tuple((1,2,3,4))
# END for each item
elapsed = time() - st
print >> sys.stderr, "Created %i tuples tuple((1,2,3,4)) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed)
-
+
def test_unpacking_vs_indexing(self):
ni = 1000000
list_items = [1,2,3,4]
tuple_items = (1,2,3,4)
-
+
for sequence in (list_items, tuple_items):
st = time()
for i in xrange(ni):
@@ -111,14 +111,14 @@ class TestUtilPerformance(TestBigRepoR):
# END for eac iteration
elapsed = time() - st
print >> sys.stderr, "Unpacked %i %ss of size %i in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed)
-
+
st = time()
for i in xrange(ni):
one, two, three, four = sequence[0], sequence[1], sequence[2], sequence[3]
# END for eac iteration
elapsed = time() - st
print >> sys.stderr, "Unpacked %i %ss of size %i individually in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed)
-
+
st = time()
for i in xrange(ni):
one, two = sequence[0], sequence[1]
@@ -126,14 +126,14 @@ class TestUtilPerformance(TestBigRepoR):
elapsed = time() - st
print >> sys.stderr, "Unpacked %i %ss of size %i individually (2 of 4) in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed)
# END for each sequence
-
+
def test_large_list_vs_iteration(self):
# what costs more: alloc/realloc of lists, or the cpu strain of iterators ?
def slow_iter(ni):
for i in xrange(ni):
yield i
# END slow iter - be closer to the real world
-
+
# alloc doesn't play a role here it seems
for ni in (500, 1000, 10000, 20000, 40000):
st = time()
@@ -142,7 +142,7 @@ class TestUtilPerformance(TestBigRepoR):
# END for each item
elapsed = time() - st
print >> sys.stderr, "Iterated %i items from list in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed)
-
+
st = time()
for i in slow_iter(ni):
i
@@ -150,14 +150,14 @@ class TestUtilPerformance(TestBigRepoR):
elapsed = time() - st
print >> sys.stderr, "Iterated %i items from iterator in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed)
# END for each number of iterations
-
+
def test_type_vs_inst_class(self):
class NewType(object):
pass
-
+
# lets see which way is faster
inst = NewType()
-
+
ni = 1000000
st = time()
for i in xrange(ni):
@@ -165,7 +165,7 @@ class TestUtilPerformance(TestBigRepoR):
# END for each item
elapsed = time() - st
print >> sys.stderr, "Created %i items using inst.__class__ in %f s ( %f items / s)" % (ni, elapsed, ni / elapsed)
-
+
st = time()
for i in xrange(ni):
type(inst)()