summaryrefslogtreecommitdiff
path: root/git/test/performance/db/packedodb_impl.py
blob: 1ca7c8a0e57126d31d259223d791391d92064e14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Performance tests for object store"""
from git.test.performance.lib import (
    TestBigRepoR, 
    GlobalsItemDeletorMetaCls
    )

from git.exc import UnsupportedOperation

import sys
import os
from time import time
import random


class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
    ModuleToDelete = 'TestPurePackedODBPerformanceBase'

class TestPurePackedODBPerformanceBase(TestBigRepoR):
    __metaclass__ = PerfBaseDeletorMetaClass
    
    #{ Configuration
    PackedODBCls = None
    #} END configuration
    
    @classmethod
    def setUpAll(cls):
        super(TestPurePackedODBPerformanceBase, cls).setUpAll()
        if cls.PackedODBCls is None:
            raise AssertionError("PackedODBCls must be set in subclass")
        #END assert configuration
        cls.ropdb = cls.PackedODBCls(cls.rorepo.db_path("pack"))
    
    def test_pack_random_access(self):
        pdb = self.ropdb
        
        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
        
        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        # END shuffle shas
        st = time()
        for sha in sha_list:
            pdb_pack_info(sha)
        # END for each sha to look up
        elapsed = time() - st
        
        # discard cache
        del(pdb._entities)
        pdb.entities()
        print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed)
        # END for each random mode
        
        # query info and streams only
        max_items = 10000           # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
        # END for each function
        
        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            stream.read()
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
        
    def test_correctness(self):
        pdb = self.ropdb
        # disabled for now as it used to work perfectly, checking big repositories takes a long time
        print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)"
        for crc in range(2):
            count = 0
            st = time()
            for entity in pdb.entities():
                pack_verify = entity.is_valid_stream
                sha_by_index = entity.index().sha
                for index in xrange(entity.index().size()):
                    try:
                        assert pack_verify(sha_by_index(index), use_crc=crc)
                        count += 1
                    except UnsupportedOperation:
                        pass
                    # END ignore old indices
                # END for each index
            # END for each entity
            elapsed = time() - st
            print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed)
        # END for each verify mode