git/test/db/lib.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208

# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
# This module is part of GitDB and is released under
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Base classes for object db testing"""
from git.test.lib import (
	with_rw_directory,
	with_packs_rw,
	ZippedStoreShaWriter,
	fixture_path,
	TestBase
	)

from git.stream import Sha1Writer
from git.base import (
							IStream,
							OStream,
							OInfo
						)
				
from git.exc import BadObject
from git.typ import str_blob_type

from async import IteratorReader
from cStringIO import StringIO
from struct import pack


__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path')
		
class TestDBBase(TestBase):
	"""Base class providing testing routines on databases"""
	
	# data
	two_lines = "1234\nhello world"
	all_data = (two_lines, )
	
	
	def _assert_object_writing_simple(self, db):
		# write a bunch of objects and query their streams and info
		null_objs = db.size()
		ni = 250
		for i in xrange(ni):
			data = pack(">L", i)
			istream = IStream(str_blob_type, len(data), StringIO(data))
			new_istream = db.store(istream)
			assert new_istream is istream
			assert db.has_object(istream.binsha)
			
			info = db.info(istream.binsha)
			assert isinstance(info, OInfo)
			assert info.type == istream.type and info.size == istream.size
			
			stream = db.stream(istream.binsha)
			assert isinstance(stream, OStream)
			assert stream.binsha == info.binsha and stream.type == info.type
			assert stream.read() == data
		# END for each item
		
		assert db.size() == null_objs + ni
		shas = list(db.sha_iter())
		assert len(shas) == db.size()
		assert len(shas[0]) == 20
		
	
	def _assert_object_writing(self, db):
		"""General tests to verify object writing, compatible to ObjectDBW
		:note: requires write access to the database"""
		# start in 'dry-run' mode, using a simple sha1 writer
		ostreams = (ZippedStoreShaWriter, None)
		for ostreamcls in ostreams:
			for data in self.all_data:
				dry_run = ostreamcls is not None
				ostream = None
				if ostreamcls is not None:
					ostream = ostreamcls()
					assert isinstance(ostream, Sha1Writer)
				# END create ostream
				
				prev_ostream = db.set_ostream(ostream)
				assert type(prev_ostream) in ostreams or prev_ostream in ostreams 
					
				istream = IStream(str_blob_type, len(data), StringIO(data))
				
				# store returns same istream instance, with new sha set
				my_istream = db.store(istream)
				sha = istream.binsha
				assert my_istream is istream
				assert db.has_object(sha) != dry_run
				assert len(sha) == 20	
				
				# verify data - the slow way, we want to run code
				if not dry_run:
					info = db.info(sha)
					assert str_blob_type == info.type
					assert info.size == len(data)
					
					ostream = db.stream(sha)
					assert ostream.read() == data
					assert ostream.type == str_blob_type
					assert ostream.size == len(data)
				else:
					self.failUnlessRaises(BadObject, db.info, sha)
					self.failUnlessRaises(BadObject, db.stream, sha)
					
					# DIRECT STREAM COPY
					# our data hase been written in object format to the StringIO
					# we pasesd as output stream. No physical database representation
					# was created.
					# Test direct stream copy of object streams, the result must be 
					# identical to what we fed in
					ostream.seek(0)
					istream.stream = ostream
					assert istream.binsha is not None
					prev_sha = istream.binsha
					
					db.set_ostream(ZippedStoreShaWriter())
					db.store(istream)
					assert istream.binsha == prev_sha
					new_ostream = db.ostream()
					
					# note: only works as long our store write uses the same compression
					# level, which is zip_best
					assert ostream.getvalue() == new_ostream.getvalue()
			# END for each data set
		# END for each dry_run mode
		
	def _assert_object_writing_async(self, db):
		"""Test generic object writing using asynchronous access"""
		ni = 5000
		def istream_generator(offset=0, ni=ni):
			for data_src in xrange(ni):
				data = str(data_src + offset)
				yield IStream(str_blob_type, len(data), StringIO(data))
			# END for each item
		# END generator utility
		
		# for now, we are very trusty here as we expect it to work if it worked
		# in the single-stream case
		
		# write objects
		reader = IteratorReader(istream_generator())
		istream_reader = db.store_async(reader)
		istreams = istream_reader.read()		# read all
		assert istream_reader.task().error() is None
		assert len(istreams) == ni
		
		for stream in istreams:
			assert stream.error is None
			assert len(stream.binsha) == 20
			assert isinstance(stream, IStream)
		# END assert each stream
		
		# test has-object-async - we must have all previously added ones
		reader = IteratorReader( istream.binsha for istream in istreams )
		hasobject_reader = db.has_object_async(reader)
		count = 0
		for sha, has_object in hasobject_reader:
			assert has_object
			count += 1
		# END for each sha
		assert count == ni
		
		# read the objects we have just written
		reader = IteratorReader( istream.binsha for istream in istreams )
		ostream_reader = db.stream_async(reader)
		
		# read items individually to prevent hitting possible sys-limits
		count = 0
		for ostream in ostream_reader:
			assert isinstance(ostream, OStream)
			count += 1
		# END for each ostream
		assert ostream_reader.task().error() is None
		assert count == ni
		
		# get info about our items
		reader = IteratorReader( istream.binsha for istream in istreams )
		info_reader = db.info_async(reader)
		
		count = 0
		for oinfo in info_reader:
			assert isinstance(oinfo, OInfo)
			count += 1
		# END for each oinfo instance
		assert count == ni
		
		  
		# combined read-write using a converter
		# add 2500 items, and obtain their output streams
		nni = 2500
		reader = IteratorReader(istream_generator(offset=ni, ni=nni))
		istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ]
		
		istream_reader = db.store_async(reader)
		istream_reader.set_post_cb(istream_to_sha)
		
		ostream_reader = db.stream_async(istream_reader)
		
		count = 0
		# read it individually, otherwise we might run into the ulimit
		for ostream in ostream_reader:
			assert isinstance(ostream, OStream)
			count += 1
		# END for each ostream
		assert count == nni