lib/git/odb/fun.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114

"""Contains basic c-functions which usually contain performance critical code
Keeping this code separate from the beginning makes it easier to out-source
it into c later, if required"""

from git.errors import (
	BadObjectType
	)

import zlib
decompressobj = zlib.decompressobj


# INVARIANTS
type_id_to_type_map = 	{
							1 : "commit",
							2 : "tree",
							3 : "blob",
							4 : "tag"
						}

# used when dealing with larger streams
chunk_size = 1000*1000


#{ Routines

def is_loose_object(m):
	""":return: True the file contained in memory map m appears to be a loose object.
	Only the first two bytes are needed"""
	b0, b1 = map(ord, m[:2])
	word = (b0 << 8) + b1
	return b0 == 0x78 and (word % 31) == 0

def loose_object_header_info(m):
	""":return: tuple(type_string, uncompressed_size_in_bytes) the type string of the 
		object as well as its uncompressed size in bytes.
	:param m: memory map from which to read the compressed object data"""
	decompress_size = 8192		# is used in cgit as well
	hdr = decompressobj().decompress(m, decompress_size)
	type_name, size = hdr[:hdr.find("\0")].split(" ")
	return type_name, int(size)
	
def object_header_info(m):
	""":return: tuple(type_string, uncompressed_size_in_bytes 
	:param mmap: mapped memory map. It will be 
		seeked to the actual start of the object contents, which can be used
		to initialize a zlib decompress object.
	:note: This routine can only handle new-style objects which are assumably contained
		in packs
		"""
	assert not is_loose_object(m), "Use loose_object_header_info instead"
	
	c = b0							# first byte
	i = 1							# next char to read
	type_id = (c >> 4) & 7			# numeric type
	size = c & 15					# starting size
	s = 4							# starting bit-shift size
	while c & 0x80:
		c = ord(m[i])
		i += 1
		size += (c & 0x7f) << s
		s += 7
	# END character loop
	
	# finally seek the map to the start of the data stream
	m.seek(i)
	try:
		return (type_id_to_type_map[type_id], size)
	except KeyError:
		# invalid object type - we could try to be smart now and decode part 
		# of the stream to get the info, problem is that we had trouble finding 
		# the exact start of the content stream
		raise BadObjectType(type_id)
	# END handle exceptions
	
def write_object(type, size, source_stream, target_stream, close_target_stream=True, 
					chunk_size=chunk_size):
	"""Write the object as identified by type, size and source_stream into the 
	target_stream
	
	:param type: type string of the object
	:param size: amount of bytes to write from source_stream
	:param source_stream: stream as file-like object providing at least size bytes
	:param target_stream: stream as file-like object to receive the data
	:param close_target_stream: if True, the target stream will be closed when 
		the routine exits, even if an error is thrown
	:param chunk_size: size of chunks to read from source. Larger values can be beneficial
		for io performance, but cost more memory as well
	:return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
	tbw = 0												# total num bytes written
	dbw = 0												# num data bytes written
	try:
		# WRITE HEADER: type SP size NULL
		tbw += target_stream.write("%s %i\0" % (type, size))
	
		# WRITE ALL DATA UP TO SIZE
		while True:
			cs = min(chunk_size, size-dbw)
			data_len = target_stream.write(source_stream.read(cs))
			dbw += data_len
			if data_len < cs or dbw == size:
				tbw += dbw
				break
			# END check for stream end
		# END duplicate data
		return tbw
	finally:
		if close_target_stream:
			target_stream.close()
		# END handle stream closing
	# END assure file was closed
	
	
#} END routines