git/test/db/lib.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246

# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Base classes for object db testing"""
from git.test.lib import (
    with_rw_directory,
    with_packs_rw,
    ZippedStoreShaWriter,
    fixture_path,
    TestBase,
    rorepo_dir,
)

from git.stream import Sha1Writer
from git.base import (
    IStream,
    OStream,
    OInfo
)

from git.exc import BadObject
from git.typ import str_blob_type

from async import IteratorReader
from cStringIO import StringIO
from struct import pack


__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path')


class TestDBBase(TestBase):

    """Base Class providing default functionality to all tests such as:

    - Utility functions provided by the TestCase base of the unittest method such as::
        self.fail("todo")
        self.failUnlessRaises(...)

    - Class level repository which is considered read-only as it is shared among 
      all test cases in your type.
      Access it using:: 
       self.rorepo  # 'ro' stands for read-only

      The rorepo is in fact your current project's git repo. If you refer to specific 
      shas for your objects, be sure you choose some that are part of the immutable portion 
      of the project history ( to assure tests don't fail for others ).

      Derived types can override the default repository type to create a different
      read-only repo, allowing to test their specific type
    """

    # data
    two_lines = "1234\nhello world"
    all_data = (two_lines, )

    #{ Configuration
    # The repository type to instantiate. It takes at least a path to operate upon
    # during instantiation.
    RepoCls = None

    # if True, a read-only repo will be provided and RepoCls must be set.
    # Otherwise it may remain unset
    needs_ro_repo = True
    #} END configuration

    @classmethod
    def setUp(cls):
        """
        Dynamically add a read-only repository to our actual type. This way 
        each test type has its own repository
        """
        if cls.needs_ro_repo:
            if cls is not TestDBBase:
                assert cls.RepoCls is not None, "RepoCls class member must be set in %s" % cls
            cls.rorepo = cls.RepoCls(rorepo_dir())
        # END handle rorepo

    def _assert_object_writing_simple(self, db):
        # write a bunch of objects and query their streams and info
        null_objs = db.size()
        ni = 250
        for i in xrange(ni):
            data = pack(">L", i)
            istream = IStream(str_blob_type, len(data), StringIO(data))
            new_istream = db.store(istream)
            assert new_istream is istream
            assert db.has_object(istream.binsha)

            info = db.info(istream.binsha)
            assert isinstance(info, OInfo)
            assert info.type == istream.type and info.size == istream.size

            stream = db.stream(istream.binsha)
            assert isinstance(stream, OStream)
            assert stream.binsha == info.binsha and stream.type == info.type
            assert stream.read() == data
        # END for each item

        assert db.size() == null_objs + ni
        shas = list(db.sha_iter())
        assert len(shas) == db.size()
        assert len(shas[0]) == 20

    def _assert_object_writing(self, db):
        """General tests to verify object writing, compatible to ObjectDBW
        :note: requires write access to the database"""
        # start in 'dry-run' mode, using a simple sha1 writer
        ostreams = (ZippedStoreShaWriter, None)
        for ostreamcls in ostreams:
            for data in self.all_data:
                dry_run = ostreamcls is not None
                ostream = None
                if ostreamcls is not None:
                    ostream = ostreamcls()
                    assert isinstance(ostream, Sha1Writer)
                # END create ostream

                prev_ostream = db.set_ostream(ostream)
                assert type(prev_ostream) in ostreams or prev_ostream in ostreams

                istream = IStream(str_blob_type, len(data), StringIO(data))

                # store returns same istream instance, with new sha set
                my_istream = db.store(istream)
                sha = istream.binsha
                assert my_istream is istream
                assert db.has_object(sha) != dry_run
                assert len(sha) == 20

                # verify data - the slow way, we want to run code
                if not dry_run:
                    info = db.info(sha)
                    assert str_blob_type == info.type
                    assert info.size == len(data)

                    ostream = db.stream(sha)
                    assert ostream.read() == data
                    assert ostream.type == str_blob_type
                    assert ostream.size == len(data)
                else:
                    self.failUnlessRaises(BadObject, db.info, sha)
                    self.failUnlessRaises(BadObject, db.stream, sha)

                    # DIRECT STREAM COPY
                    # our data hase been written in object format to the StringIO
                    # we pasesd as output stream. No physical database representation
                    # was created.
                    # Test direct stream copy of object streams, the result must be
                    # identical to what we fed in
                    ostream.seek(0)
                    istream.stream = ostream
                    assert istream.binsha is not None
                    prev_sha = istream.binsha

                    db.set_ostream(ZippedStoreShaWriter())
                    db.store(istream)
                    assert istream.binsha == prev_sha
                    new_ostream = db.ostream()

                    # note: only works as long our store write uses the same compression
                    # level, which is zip_best
                    assert ostream.getvalue() == new_ostream.getvalue()
            # END for each data set
        # END for each dry_run mode

    def _assert_object_writing_async(self, db):
        """Test generic object writing using asynchronous access"""
        ni = 5000

        def istream_generator(offset=0, ni=ni):
            for data_src in xrange(ni):
                data = str(data_src + offset)
                yield IStream(str_blob_type, len(data), StringIO(data))
            # END for each item
        # END generator utility

        # for now, we are very trusty here as we expect it to work if it worked
        # in the single-stream case

        # write objects
        reader = IteratorReader(istream_generator())
        istream_reader = db.store_async(reader)
        istreams = istream_reader.read()        # read all
        assert istream_reader.task().error() is None
        assert len(istreams) == ni

        for stream in istreams:
            assert stream.error is None
            assert len(stream.binsha) == 20
            assert isinstance(stream, IStream)
        # END assert each stream

        # test has-object-async - we must have all previously added ones
        reader = IteratorReader(istream.binsha for istream in istreams)
        hasobject_reader = db.has_object_async(reader)
        count = 0
        for sha, has_object in hasobject_reader:
            assert has_object
            count += 1
        # END for each sha
        assert count == ni

        # read the objects we have just written
        reader = IteratorReader(istream.binsha for istream in istreams)
        ostream_reader = db.stream_async(reader)

        # read items individually to prevent hitting possible sys-limits
        count = 0
        for ostream in ostream_reader:
            assert isinstance(ostream, OStream)
            count += 1
        # END for each ostream
        assert ostream_reader.task().error() is None
        assert count == ni

        # get info about our items
        reader = IteratorReader(istream.binsha for istream in istreams)
        info_reader = db.info_async(reader)

        count = 0
        for oinfo in info_reader:
            assert isinstance(oinfo, OInfo)
            count += 1
        # END for each oinfo instance
        assert count == ni

        # combined read-write using a converter
        # add 2500 items, and obtain their output streams
        nni = 2500
        reader = IteratorReader(istream_generator(offset=ni, ni=nni))
        istream_to_sha = lambda istreams: [istream.binsha for istream in istreams]

        istream_reader = db.store_async(reader)
        istream_reader.set_post_cb(istream_to_sha)

        ostream_reader = db.stream_async(istream_reader)

        count = 0
        # read it individually, otherwise we might run into the ulimit
        for ostream in ostream_reader:
            assert isinstance(ostream, OStream)
            count += 1
        # END for each ostream
        assert count == nni