summaryrefslogtreecommitdiff
path: root/gitdb/test/performance/test_pack_streaming.py
blob: fe160ea54aa9ba1dc86653cad71179ece1847405 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Specific test for pack streams only"""
from __future__ import print_function

from gitdb.test.performance.lib import (
    TestBigRepoR 
)

from gitdb.db.pack import PackedDB
from gitdb.stream import NullStream
from gitdb.pack import PackEntity
from gitdb.test.lib import skip_on_travis_ci

import os
import sys
from time import time

class CountedNullStream(NullStream):
    __slots__ = '_bw'
    def __init__(self):
        self._bw = 0
        
    def bytes_written(self):
        return self._bw
        
    def write(self, d):
        self._bw += NullStream.write(self, d)
    

class TestPackStreamingPerformance(TestBigRepoR):
    
    @skip_on_travis_ci
    def test_pack_writing(self):
        # see how fast we can write a pack from object streams.
        # This will not be fast, as we take time for decompressing the streams as well
        ostream = CountedNullStream()
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
        
        ni = 1000
        count = 0
        st = time()
        for sha in pdb.sha_iter():
            count += 1
            pdb.stream(sha)
            if count == ni:
                break
        #END gather objects for pack-writing
        elapsed = time() - st
        print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed), file=sys.stderr)
        
        st = time()
        PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni)
        elapsed = time() - st
        total_kb = ostream.bytes_written() / 1000
        print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed), sys.stderr)
        
    
    @skip_on_travis_ci
    def test_stream_reading(self):
        # raise SkipTest()
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
        
        # streaming only, meant for --with-profile runs
        ni = 5000
        count = 0
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in pdb.sha_iter():
            if count == ni:
                break
            stream = pdb_stream(sha)
            stream.read()
            total_size += stream.size
            count += 1
        elapsed = time() - st
        total_kib = total_size / 1000
        print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed), sys.stderr)