import binascii
import os
import zlib
from cStringIO import StringIO
from git.utils import make_sha
import errno
from fun import chunk_size

__all__ = ('FDSha1Writer', )

#{ Routines

hex_to_bin = binascii.a2b_hex
bin_to_hex = binascii.b2a_hex

def to_hex_sha(sha):
	""":return: hexified version  of sha"""
	if len(sha) == 40:
		return sha
	return bin_to_hex(sha)
	
def to_bin_sha(sha):
	if len(sha) == 20:
		return sha
	return hex_to_bin(sha)

# errors
ENOENT = errno.ENOENT

# os shortcuts
exists = os.path.exists
mkdir = os.mkdir
isdir = os.path.isdir
rename = os.rename
dirname = os.path.dirname
join = os.path.join
read = os.read
write = os.write
close = os.close

# ZLIB configuration
# used when compressing objects
Z_BEST_SPEED = 1

#} END Routines


#{ Classes

class FDCompressedSha1Writer(object):
	"""Digests data written to it, making the sha available, then compress the 
	data and write it to the file descriptor
	:note: operates on raw file descriptors
	:note: for this to work, you have to use the close-method of this instance"""
	__slots__ = ("fd", "sha1", "zip")
	
	# default exception
	exc = IOError("Failed to write all bytes to filedescriptor")
	
	def __init__(self, fd):
		self.fd = fd
		self.sha1 = make_sha("")
		self.zip = zlib.compressobj(Z_BEST_SPEED)

	def write(self, data):
		""":raise IOError: If not all bytes could be written
		:return: lenght of incoming data"""
		self.sha1.update(data)
		cdata = self.zip.compress(data)
		bytes_written = write(self.fd, cdata)
		if bytes_written != len(cdata):
			raise self.exc
		return bytes_written

	def sha(self, as_hex = False):
		""":return: sha so far
		:param as_hex: if True, sha will be hex-encoded, binary otherwise"""
		if as_hex:
			return self.sha1.hexdigest()
		return self.sha1.digest()

	def close(self):
		remainder = self.zip.flush()
		if write(self.fd, remainder) != len(remainder):
			raise self.exc
		return close(self.fd)


class DecompressMemMapReader(object):
	"""Reads data in chunks from a memory map and decompresses it. The client sees 
	only the uncompressed data, respective file-like read calls are handling on-demand
	buffered decompression accordingly
	
	A constraint on the total size of bytes is activated, simulating 
	a logical file within a possibly larger physical memory area
	
	To read efficiently, you clearly don't want to read individual bytes, instead, 
	read a few kilobytes at least.
	
	:note: The chunk-size should be carefully selected as it will involve quite a bit 
		of string copying due to the way the zlib is implemented. Its very wasteful, 
		hence we try to find a good tradeoff between allocation time and number of 
		times we actually allocate. An own zlib implementation would be good here
		to better support streamed reading - it would only need to keep the mmap
		and decompress it into chunks, thats all ... """
	__slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_cs', '_close')
	
	def __init__(self, m, close_on_deletion, cs = 128*1024):
		"""Initialize with mmap and chunk_size for stream reading"""
		self._m = m
		self._zip = zlib.decompressobj()
		self._buf = None						# buffer of decompressed bytes
		self._buflen = 0						# length of bytes in buffer
		self._s = 0								# size of uncompressed data to read in total
		self._br = 0							# num uncompressed bytes read
		self._cws = 0							# start byte of compression window
		self._cwe = 0							# end byte of compression window
		self._cs = cs							# chunk size (when reading from zip) 
		self._close = close_on_deletion			# close the memmap on deletion ?
		
	def __del__(self):
		if self._close:
			self._m.close()
		# END handle resource freeing
		
	def initialize(self, size=0):
		"""Initialize this instance for acting as a read-only stream for size bytes.
		:param size: size in bytes to be decompresed before being depleted.
			If 0, default object header information is parsed from the data, 
			returning a tuple of (type_string, uncompressed_size)
			If not 0, the size will be used, and None is returned.
		:note: must only be called exactly once"""
		if size:
			self._s = size
			return
		# END handle size
		
		# read header
		maxb = 8192
		self._s = maxb
		hdr = self.read(maxb)
		hdrend = hdr.find("\0")
		type, size = hdr[:hdrend].split(" ")
		self._s = int(size)
		
		# adjust internal state to match actual header length that we ignore
		# The buffer will be depleted first on future reads
		self._br = 0
		hdrend += 1									# count terminating \0
		self._buf = StringIO(hdr[hdrend:])
		self._buflen = len(hdr) - hdrend
		
		return type, size
		
	def read(self, size=-1):
		if size < 1:
			size = self._s - self._br
		else:
			size = min(size, self._s - self._br)
		# END clamp size
		
		if size == 0:
			return str()
		# END handle depletion
		
		# deplete the buffer, then just continue using the decompress object 
		# which has an own buffer. We just need this to transparently parse the 
		# header from the zlib stream
		dat = str()
		if self._buf:
			if self._buflen >= size:
				# have enough data
				dat = self._buf.read(size)
				self._buflen -= size
				self._br += size
				return dat
			else:
				dat = self._buf.getvalue()		# ouch, duplicates data
				size -= self._buflen
				self._br += self._buflen
				
				self._buflen = 0
				self._buf = None
			# END handle buffer len
		# END handle buffer
		
		# decompress some data
		# Abstract: zlib needs to operate on chunks of our memory map ( which may 
		# be large ), as it will otherwise and always fill in the 'unconsumed_tail'
		# attribute which possible reads our whole map to the end, forcing 
		# everything to be read from disk even though just a portion was requested.
		# As this would be a nogo, we workaround it by passing only chunks of data, 
		# moving the window into the memory map along as we decompress, which keeps 
		# the tail smaller than our chunk-size. This causes 'only' the chunk to be
		# copied once, and another copy of a part of it when it creates the unconsumed
		# tail. We have to use it to hand in the appropriate amount of bytes durin g
		# the next read.
		if self._zip.unconsumed_tail:
			# move the window, make it as large as size demands. For code-clarity, 
			# we just take the chunk from our map again instead of reusing the unconsumed
			# tail. The latter one would safe some memory copying, but we could end up
			# with not getting enough data uncompressed, so we had to sort that out as well.
			# Now we just assume the worst case, hence the data is uncompressed and the window
			# needs to be as large as the uncompressed bytes we want to read.
			self._cws = self._cwe - len(self._zip.unconsumed_tail)
			self._cwe = self._cws + size
			indata = self._m[self._cws:self._cwe]		# another copy ... :(
		else:
			cws = self._cws
			self._cws = self._cwe
			self._cwe = cws + size 
			indata = self._m[self._cws:self._cwe]		# ... copy it again :(
		# END handle tail
		
		dcompdat = self._zip.decompress(indata, size)
		self._br += len(dcompdat)
		
		if dat:
			return dat + dcompdat
		return dcompdat
		
#} END classes