summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-22 00:05:37 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-22 00:05:37 +0200
commit69dd8750be1fbf55010a738dc1ced4655e727f23 (patch)
tree1d49d5ff0218a358d63ae1cab93c56731cc5ea46
parent1044116d25f0311033e0951d2ab30579bba4b051 (diff)
downloadgitpython-69dd8750be1fbf55010a738dc1ced4655e727f23.tar.gz
index.write_tree: initial version implemented, although its not yet working correctly, a test to explicitly compare the git version with the python implementation is still missing
Tree and Index internally use 20 byte shas, converting them only as needed to reduce memory footprint and processing time objects: started own 'fun' module containing the most important tree functions, more are likely to be added soon
-rw-r--r--CHANGES7
-rw-r--r--lib/git/__init__.py2
-rw-r--r--lib/git/db.py7
-rw-r--r--lib/git/diff.py2
-rw-r--r--lib/git/errors.py70
-rw-r--r--lib/git/index/base.py75
-rw-r--r--lib/git/index/fun.py73
-rw-r--r--lib/git/index/typ.py22
-rw-r--r--lib/git/objects/base.py1
-rw-r--r--lib/git/objects/fun.py66
-rw-r--r--lib/git/objects/tree.py121
-rw-r--r--lib/git/utils.py14
-rw-r--r--test/git/test_index.py46
13 files changed, 298 insertions, 208 deletions
diff --git a/CHANGES b/CHANGES
index 87742f63..2f93b3e7 100644
--- a/CHANGES
+++ b/CHANGES
@@ -5,6 +5,13 @@ CHANGES
===
* ConcurrentWriteOperation was removed, and replaced by LockedFD
* IndexFile.get_entries_key was renamed to entry_key
+ * IndexEntry instances contained in IndexFile.entries now use binary sha's. Use
+ the .hexsha property to obtain the hexadecimal version
+ * IndexFile.write_tree: removed missing_ok keyword, its always True now
+ Instead of raising GitCommandError it raises UnmergedEntriesError
+ * diff.Diff.null_hex_sha renamed to NULL_HEX_SHA, to be conforming with
+ the naming in the Object base class
+
0.2 Beta 2
===========
diff --git a/lib/git/__init__.py b/lib/git/__init__.py
index 7a0d000d..7860a3c1 100644
--- a/lib/git/__init__.py
+++ b/lib/git/__init__.py
@@ -28,7 +28,7 @@ from git.config import GitConfigParser
from git.objects import *
from git.refs import *
from git.diff import *
-from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError
+from git.errors import *
from git.cmd import Git
from git.repo import Repo
from git.remote import *
diff --git a/lib/git/db.py b/lib/git/db.py
index f3698b76..b7cf0fc7 100644
--- a/lib/git/db.py
+++ b/lib/git/db.py
@@ -4,9 +4,12 @@ from gitdb.base import (
OStream
)
+from gitdb.util import to_hex_sha
+
from gitdb.db import GitDB
from gitdb.db import LooseObjectDB
+
__all__ = ('GitCmdObjectDB', 'GitDB' )
#class GitCmdObjectDB(CompoundDB, ObjectDBW):
@@ -24,11 +27,11 @@ class GitCmdObjectDB(LooseObjectDB):
self._git = git
def info(self, sha):
- t = self._git.get_object_header(sha)
+ t = self._git.get_object_header(to_hex_sha(sha))
return OInfo(*t)
def stream(self, sha):
"""For now, all lookup is done by git itself"""
- t = self._git.stream_object_data(sha)
+ t = self._git.stream_object_data(to_hex_sha(sha))
return OStream(*t)
diff --git a/lib/git/diff.py b/lib/git/diff.py
index 9df0c499..f9a0a66f 100644
--- a/lib/git/diff.py
+++ b/lib/git/diff.py
@@ -196,7 +196,7 @@ class Diff(object):
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
""", re.VERBOSE | re.MULTILINE)
# can be used for comparisons
- null_hex_sha = "0"*40
+ NULL_HEX_SHA = "0"*40
__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file",
"rename_from", "rename_to", "diff")
diff --git a/lib/git/errors.py b/lib/git/errors.py
index 5fe99915..93919d5e 100644
--- a/lib/git/errors.py
+++ b/lib/git/errors.py
@@ -6,43 +6,51 @@
""" Module containing all exceptions thrown througout the git package, """
class InvalidGitRepositoryError(Exception):
- """ Thrown if the given repository appears to have an invalid format. """
+ """ Thrown if the given repository appears to have an invalid format. """
class NoSuchPathError(OSError):
- """ Thrown if a path could not be access by the system. """
+ """ Thrown if a path could not be access by the system. """
class GitCommandError(Exception):
- """ Thrown if execution of the git command fails with non-zero status code. """
- def __init__(self, command, status, stderr=None):
- self.stderr = stderr
- self.status = status
- self.command = command
-
- def __str__(self):
- return ("'%s' returned exit status %i: %s" %
- (' '.join(str(i) for i in self.command), self.status, self.stderr))
+ """ Thrown if execution of the git command fails with non-zero status code. """
+ def __init__(self, command, status, stderr=None):
+ self.stderr = stderr
+ self.status = status
+ self.command = command
+
+ def __str__(self):
+ return ("'%s' returned exit status %i: %s" %
+ (' '.join(str(i) for i in self.command), self.status, self.stderr))
class CheckoutError( Exception ):
- """Thrown if a file could not be checked out from the index as it contained
- changes.
-
- The .failed_files attribute contains a list of relative paths that failed
- to be checked out as they contained changes that did not exist in the index.
-
- The .failed_reasons attribute contains a string informing about the actual
- cause of the issue.
-
- The .valid_files attribute contains a list of relative paths to files that
- were checked out successfully and hence match the version stored in the
- index"""
- def __init__(self, message, failed_files, valid_files, failed_reasons):
- Exception.__init__(self, message)
- self.failed_files = failed_files
- self.failed_reasons = failed_reasons
- self.valid_files = valid_files
-
- def __str__(self):
- return Exception.__str__(self) + ":%s" % self.failed_files
+ """Thrown if a file could not be checked out from the index as it contained
+ changes.
+
+ The .failed_files attribute contains a list of relative paths that failed
+ to be checked out as they contained changes that did not exist in the index.
+
+ The .failed_reasons attribute contains a string informing about the actual
+ cause of the issue.
+
+ The .valid_files attribute contains a list of relative paths to files that
+ were checked out successfully and hence match the version stored in the
+ index"""
+ def __init__(self, message, failed_files, valid_files, failed_reasons):
+ Exception.__init__(self, message)
+ self.failed_files = failed_files
+ self.failed_reasons = failed_reasons
+ self.valid_files = valid_files
+
+ def __str__(self):
+ return Exception.__str__(self) + ":%s" % self.failed_files
+
+
+class CacheError(Exception):
+ """Base for all errors related to the git index, which is called cache internally"""
+
+class UnmergedEntriesError(CacheError):
+ """Thrown if an operation cannot proceed as there are still unmerged
+ entries in the cache"""
diff --git a/lib/git/index/base.py b/lib/git/index/base.py
index a605c3ec..96a9430c 100644
--- a/lib/git/index/base.py
+++ b/lib/git/index/base.py
@@ -5,13 +5,13 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module containing Index implementation, allowing to perform all kinds of index
manipulations such as querying and merging. """
-import binascii
import tempfile
import os
import sys
import subprocess
import glob
from cStringIO import StringIO
+from binascii import b2a_hex
from stat import (
S_ISLNK,
@@ -25,16 +25,12 @@ from stat import (
from typ import (
BaseIndexEntry,
IndexEntry,
- CE_NAMEMASK,
- CE_STAGESHIFT
)
from util import (
TemporaryFileSwap,
post_clear_cache,
default_index,
- pack,
- unpack
)
import git.objects
@@ -60,20 +56,17 @@ from git.utils import (
LockedFD,
join_path_native,
file_contents_ro,
- LockFile
- )
-
-
-from gitdb.base import (
- IStream
)
from fun import (
write_cache,
read_cache,
+ write_tree_from_cache,
entry_key
)
+from gitdb.base import IStream
+
__all__ = ( 'IndexFile', 'CheckoutError' )
@@ -161,10 +154,15 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
self.version, self.entries, self._extension_data, conten_sha = read_cache(stream)
return self
- def _serialize(self, stream, ignore_tree_extension_data=False):
+ def _entries_sorted(self):
+ """:return: list of entries, in a sorted fashion, first by path, then by stage"""
entries_sorted = self.entries.values()
- entries_sorted.sort(key=lambda e: (e[3], e.stage)) # use path/stage as sort key
- write_cache(entries_sorted,
+ entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key
+ return entries_sorted
+
+ def _serialize(self, stream, ignore_tree_extension_data=False):
+ entries = self._entries_sorted()
+ write_cache(entries,
stream,
(ignore_tree_extension_data and None) or self._extension_data)
return self
@@ -403,7 +401,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
# TODO: is it necessary to convert the mode ? We did that when adding
# it to the index, right ?
mode = self._stat_mode_to_index_mode(entry.mode)
- blob = Blob(self.repo, entry.sha, mode, entry.path)
+ blob = Blob(self.repo, entry.hexsha, mode, entry.path)
blob.size = entry.size
output = (entry.stage, blob)
if predicate(output):
@@ -490,33 +488,31 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
# allows to lazily reread on demand
return self
- def _write_tree(self, missing_ok=False):
+ def write_tree(self):
"""Writes this index to a corresponding Tree object into the repository's
object database and return it.
-
- :param missing_ok:
- If True, missing objects referenced by this index will not result
- in an error.
-
- :return: Tree object representing this index"""
+
+ :return: Tree object representing this index
+ :note: The tree will be written even if one or more objects the tree refers to
+ does not yet exist in the object database. This could happen if you added
+ Entries to the index directly.
+ :raise ValueError: if there are no entries in the cache
+ :raise UnmergedEntriesError: """
# we obtain no lock as we just flush our contents to disk as tree
if not self.entries:
raise ValueError("Cannot write empty index")
+ # TODO: use memory db, this helps to prevent IO if the resulting tree
+ # already exists
+ entries = self._entries_sorted()
+ binsha, tree_items = write_tree_from_cache(entries, self.repo.odb, slice(0, len(entries)))
+ # note: additional deserialization could be saved if write_tree_from_cache
+ # would return sorted tree entries
+ root_tree = Tree(self.repo, b2a_hex(binsha), path='')
+ root_tree._cache = tree_items
+ return root_tree
- return Tree(self.repo, tree_sha, 0, '')
-
- def write_tree(self, missing_ok = False):
- index_path = self._index_path()
- tmp_index_mover = TemporaryFileSwap(index_path)
-
- self.write(index_path, ignore_tree_extension_data=True)
- tree_sha = self.repo.git.write_tree(missing_ok=missing_ok)
-
- del(tmp_index_mover) # as soon as possible
- return Tree(self.repo, tree_sha, 0, '')
-
def _process_diff_args(self, args):
try:
args.pop(args.index(self))
@@ -525,7 +521,6 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
# END remove self
return args
-
def _to_relative_path(self, path):
""":return: Version of path relative to our git directory or raise ValueError
if it is not within our git direcotory"""
@@ -599,7 +594,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
- BaseIndexEntry or type
Handling equals the one of Blob objects, but the stage may be
- explicitly set.
+ explicitly set. Please note that Index Entries require binary sha's.
:param force:
If True, otherwise ignored or excluded files will be
@@ -666,7 +661,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
fprogress(filepath, True, filepath)
return BaseIndexEntry((self._stat_mode_to_index_mode(st.st_mode),
- istream.sha, 0, filepath))
+ istream.binsha, 0, filepath))
# END utility method
@@ -691,14 +686,14 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
# HANLDE ENTRY OBJECT CREATION
# create objects if required, otherwise go with the existing shas
- null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ]
+ null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ]
if null_entries_indices:
for ei in null_entries_indices:
null_entry = entries[ei]
new_entry = store_path(null_entry.path)
# update null entry
- entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.sha, null_entry.stage, null_entry.path))
+ entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path))
# END for each entry index
# END null_entry handling
@@ -707,7 +702,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
# all object sha's
if path_rewriter:
for i,e in enumerate(entries):
- entries[i] = BaseIndexEntry((e.mode, e.sha, e.stage, path_rewriter(e)))
+ entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e)))
# END for each entry
# END handle path rewriting
diff --git a/lib/git/index/fun.py b/lib/git/index/fun.py
index 2e653ea6..557941d5 100644
--- a/lib/git/index/fun.py
+++ b/lib/git/index/fun.py
@@ -2,6 +2,11 @@
Contains standalone functions to accompany the index implementation and make it
more versatile
"""
+from stat import S_IFDIR
+from cStringIO import StringIO
+
+from git.errors import UnmergedEntriesError
+from git.objects.fun import tree_to_stream
from git.utils import (
IndexFileSHA1Writer,
)
@@ -16,12 +21,11 @@ from util import (
unpack
)
-from binascii import (
- hexlify,
- unhexlify
- )
+from gitdb.base import IStream
+from gitdb.typ import str_tree_type
+from binascii import a2b_hex
-__all__ = ('write_cache', 'read_cache' )
+__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key' )
def write_cache_entry(entry, stream):
"""Write the given entry to the stream"""
@@ -34,7 +38,7 @@ def write_cache_entry(entry, stream):
assert plen == len(path), "Path %s too long to fit into index" % entry[3]
flags = plen | entry[2]
write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0],
- entry[8], entry[9], entry[10], unhexlify(entry[1]), flags))
+ entry[8], entry[9], entry[10], entry[1], flags))
write(path)
real_size = ((stream.tell() - beginoffset + 8) & ~7)
write("\0" * ((beginoffset + real_size) - stream.tell()))
@@ -80,7 +84,7 @@ def read_entry(stream):
real_size = ((stream.tell() - beginoffset + 8) & ~7)
data = stream.read((beginoffset + real_size) - stream.tell())
- return IndexEntry((mode, hexlify(sha), flags, path, ctime, mtime, dev, ino, uid, gid, size))
+ return IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size))
def read_header(stream):
"""Return tuple(version_long, num_entries) from the given stream"""
@@ -136,3 +140,58 @@ def read_cache(stream):
return (version, entries, extension_data, content_sha)
+def write_tree_from_cache(entries, odb, sl, si=0):
+ """Create a tree from the given sorted list of entries and put the respective
+ trees into the given object database
+ :param entries: **sorted** list of IndexEntries
+ :param odb: object database to store the trees in
+ :param si: start index at which we should start creating subtrees
+ :param sl: slice indicating the range we should process on the entries list
+ :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of
+ tree entries being a tuple of hexsha, mode, name"""
+ tree_items = list()
+ ci = sl.start
+ end = sl.stop
+ while ci < end:
+ entry = entries[ci]
+ if entry.stage != 0:
+ raise UnmergedEntriesError(entry)
+ # END abort on unmerged
+ ci += 1
+ rbound = entry.path.find('/', si)
+ if rbound == -1:
+ # its not a tree
+ tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
+ else:
+ # find common base range
+ base = entry.path[si:rbound]
+ xi = ci
+ while xi < end:
+ oentry = entries[xi]
+ xi += 1
+ orbound = oentry.path.find('/')
+ if orbound == -1 or oentry.path[si:orbound] != base:
+ break
+ # END abort on base mismatch
+ # END find common base
+
+ # enter recursion
+ # ci - 1 as we want to count our current item as well
+ sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1)
+ tree_items.append((sha, S_IFDIR, base))
+
+ # skip ahead
+ ci = xi
+ # END handle bounds
+ # END for each entry
+
+ # finally create the tree
+ sio = StringIO()
+ tree_to_stream(tree_items, sio.write)
+ sio.seek(0)
+
+ istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
+ return (istream.binsha, tree_items)
+
+
+
diff --git a/lib/git/index/typ.py b/lib/git/index/typ.py
index b5dac58a..6ef1d2f2 100644
--- a/lib/git/index/typ.py
+++ b/lib/git/index/typ.py
@@ -5,6 +5,11 @@ from util import (
unpack
)
+from binascii import (
+ b2a_hex,
+ a2b_hex
+ )
+
__all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry')
#{ Invariants
@@ -50,7 +55,7 @@ class BaseIndexEntry(tuple):
use numeric indices for performance reasons. """
def __str__(self):
- return "%o %s %i\t%s" % (self.mode, self.sha, self.stage, self.path)
+ return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path)
@property
def mode(self):
@@ -58,9 +63,14 @@ class BaseIndexEntry(tuple):
return self[0]
@property
- def sha(self):
- """ hex sha of the blob """
+ def binsha(self):
+ """binary sha of the blob """
return self[1]
+
+ @property
+ def hexsha(self):
+ """hex version of our sha"""
+ return b2a_hex(self[1])
@property
def stage(self):
@@ -88,7 +98,7 @@ class BaseIndexEntry(tuple):
@classmethod
def from_blob(cls, blob, stage = 0):
""":return: Fully equipped BaseIndexEntry at the given stage"""
- return cls((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path))
+ return cls((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path))
class IndexEntry(BaseIndexEntry):
@@ -145,12 +155,12 @@ class IndexEntry(BaseIndexEntry):
:param base: Instance of type BaseIndexEntry"""
time = pack(">LL", 0, 0)
- return IndexEntry((base.mode, base.sha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
+ return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
@classmethod
def from_blob(cls, blob, stage = 0):
""":return: Minimal entry resembling the given blob object"""
time = pack(">LL", 0, 0)
- return IndexEntry((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))
+ return IndexEntry((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 36bbccb2..90aa8ca2 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -21,6 +21,7 @@ class Object(LazyMixin):
inst.data # byte string containing the whole data of the object
"""
NULL_HEX_SHA = '0'*40
+ NULL_BIN_SHA = '\0'*20
TYPES = ("blob", "tree", "commit", "tag")
__slots__ = ("repo", "sha", "size", "data" )
type = None # to be set by subclass
diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py
new file mode 100644
index 00000000..7882437d
--- /dev/null
+++ b/lib/git/objects/fun.py
@@ -0,0 +1,66 @@
+"""Module with functions which are supposed to be as fast as possible"""
+
+__all__ = ('tree_to_stream', 'tree_entries_from_data')
+
+def tree_to_stream(entries, write):
+ """Write the give list of entries into a stream using its write method
+ :param entries: **sorted** list of tuples with (binsha, mode, name)
+ :param write: write method which takes a data string"""
+ ord_zero = ord('0')
+ bit_mask = 7 # 3 bits set
+
+ for binsha, mode, name in entries:
+ mode_str = ''
+ for i in xrange(6):
+ mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
+ # END for each 8 octal value
+
+ # git slices away the first octal if its zero
+ if mode_str[0] == '0':
+ mode_str = mode_str[1:]
+ # END save a byte
+
+ write("%s %s\0%s" % (mode_str, name, binsha))
+ # END for each item
+
+
+def tree_entries_from_data(data):
+ """Reads the binary representation of a tree and returns tuples of Tree items
+ :param data: data block with tree data
+ :return: list(tuple(binsha, mode, tree_relative_path), ...)"""
+ ord_zero = ord('0')
+ len_data = len(data)
+ i = 0
+ out = list()
+ while i < len_data:
+ mode = 0
+
+ # read mode
+ # Some git versions truncate the leading 0, some don't
+ # The type will be extracted from the mode later
+ while data[i] != ' ':
+ # move existing mode integer up one level being 3 bits
+ # and add the actual ordinal value of the character
+ mode = (mode << 3) + (ord(data[i]) - ord_zero)
+ i += 1
+ # END while reading mode
+
+ # byte is space now, skip it
+ i += 1
+
+ # parse name, it is NULL separated
+
+ ns = i
+ while data[i] != '\0':
+ i += 1
+ # END while not reached NULL
+ name = data[ns:i]
+
+ # byte is NULL, get next 20
+ i += 1
+ sha = data[i:i+20]
+ i = i + 20
+
+ out.append((sha, mode, name))
+ # END for each byte in data stream
+ return out
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index eb8aa9eb..6b1d13c1 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -5,21 +5,21 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os
+import utils
+import base
+
from blob import Blob
from submodule import Submodule
-import base
-import binascii
import git.diff as diff
-import utils
-from git.utils import join_path
-
join = os.path.join
-def sha_to_hex(sha):
- """Takes a string and returns the hex of the sha within"""
- hexsha = binascii.hexlify(sha)
- return hexsha
-
+from fun import (
+ tree_entries_from_data,
+ tree_to_stream
+ )
+
+from gitdb.util import to_bin_sha
+from binascii import b2a_hex
class TreeModifier(object):
"""A utility class providing methods to alter the underlying cache in a list-like
@@ -51,25 +51,24 @@ class TreeModifier(object):
#} END interface
#{ Mutators
- def add(self, hexsha, mode, name, force=False):
+ def add(self, sha, mode, name, force=False):
"""Add the given item to the tree. If an item with the given name already
exists, nothing will be done, but a ValueError will be raised if the
sha and mode of the existing item do not match the one you add, unless
force is True
- :param hexsha: The 40 byte sha of the item to add
+ :param sha: The 20 or 40 byte sha of the item to add
:param mode: int representing the stat compatible mode of the item
:param force: If True, an item with your name and information will overwrite
any existing item with the same name, no matter which information it has
:return: self"""
if '/' in name:
raise ValueError("Name must not contain '/' characters")
- if len(hexsha) != 40:
- raise ValueError("Hexsha required, got %r" % hexsha)
if (mode >> 12) not in Tree._map_id_to_type:
raise ValueError("Invalid object type according to mode %o" % mode)
-
+
+ sha = to_bin_sha(sha)
index = self._index_by_name(name)
- item = (hexsha, mode, name)
+ item = (sha, mode, name)
if index == -1:
self._cache.append(item)
else:
@@ -77,18 +76,19 @@ class TreeModifier(object):
self._cache[index] = item
else:
ex_item = self._cache[index]
- if ex_item[0] != hexsha or ex_item[1] != mode:
+ if ex_item[0] != sha or ex_item[1] != mode:
raise ValueError("Item %r existed with different properties" % name)
# END handle mismatch
# END handle force
# END handle name exists
return self
- def add_unchecked(self, hexsha, mode, name):
+ def add_unchecked(self, binsha, mode, name):
"""Add the given item to the tree, its correctness is assumed, which
puts the caller into responsibility to assure the input is correct.
- For more information on the parameters, see ``add``"""
- self._cache.append((hexsha, mode, name))
+ For more information on the parameters, see ``add``
+ :param binsha: 20 byte binary sha"""
+ self._cache.append((binsha, mode, name))
def __delitem__(self, name):
"""Deletes an item with the given name if it exists"""
@@ -146,70 +146,21 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def _set_cache_(self, attr):
if attr == "_cache":
# Set the data when we need it
- self._cache = self._get_tree_cache(self.data)
+ self._cache = tree_entries_from_data(self.data)
else:
super(Tree, self)._set_cache_(attr)
- def _get_tree_cache(self, data):
- """ :return: list(object_instance, ...)
- :param data: data string containing our serialized information"""
- return list(self._iter_from_data(data))
-
def _iter_convert_to_object(self, iterable):
"""Iterable yields tuples of (hexsha, mode, name), which will be converted
to the respective object representation"""
- for hexsha, mode, name in iterable:
+ for binsha, mode, name in iterable:
path = join(self.path, name)
type_id = mode >> 12
try:
- yield self._map_id_to_type[type_id](self.repo, hexsha, mode, path)
+ yield self._map_id_to_type[type_id](self.repo, b2a_hex(binsha), mode, path)
except KeyError:
raise TypeError( "Unknown type %i found in tree data for path '%s'" % (type_id, path))
# END for each item
-
- def _iter_from_data(self, data):
- """
- Reads the binary non-pretty printed representation of a tree and converts
- it into Blob, Tree or Commit objects.
-
- Note: This method was inspired by the parse_tree method in dulwich.
-
- :yield: Tuple(hexsha, mode, tree_relative_path)
- """
- ord_zero = ord('0')
- len_data = len(data)
- i = 0
- while i < len_data:
- mode = 0
-
- # read mode
- # Some git versions truncate the leading 0, some don't
- # The type will be extracted from the mode later
- while data[i] != ' ':
- # move existing mode integer up one level being 3 bits
- # and add the actual ordinal value of the character
- mode = (mode << 3) + (ord(data[i]) - ord_zero)
- i += 1
- # END while reading mode
-
- # byte is space now, skip it
- i += 1
-
- # parse name, it is NULL separated
-
- ns = i
- while data[i] != '\0':
- i += 1
- # END while not reached NULL
- name = data[ns:i]
-
- # byte is NULL, get next 20
- i += 1
- sha = data[i:i+20]
- i = i + 20
-
- yield (sha_to_hex(sha), mode, name)
- # END for each byte in data stream
def __div__(self, file):
"""
@@ -250,7 +201,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
else:
for info in self._cache:
if info[2] == file: # [2] == name
- return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
+ return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
# END for each obj
raise KeyError( msg % file )
# END handle long paths
@@ -304,7 +255,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def __getitem__(self, item):
if isinstance(item, int):
info = self._cache[item]
- return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
+ return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
if isinstance(item, basestring):
# compatability
@@ -335,32 +286,16 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
def __reversed__(self):
return reversed(self._iter_convert_to_object(self._cache))
- def _serialize(self, stream, presort=False):
+ def _serialize(self, stream):
"""Serialize this tree into the stream. Please note that we will assume
our tree data to be in a sorted state. If this is not the case, serialization
will not generate a correct tree representation as these are assumed to be sorted
by algorithms"""
- ord_zero = ord('0')
- bit_mask = 7 # 3 bits set
- hex_to_bin = binascii.a2b_hex
-
- for hexsha, mode, name in self._cache:
- mode_str = ''
- for i in xrange(6):
- mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
- # END for each 8 octal value
-
- # git slices away the first octal if its zero
- if mode_str[0] == '0':
- mode_str = mode_str[1:]
- # END save a byte
-
- stream.write("%s %s\0%s" % (mode_str, name, hex_to_bin(hexsha)))
- # END for each item
+ tree_to_stream(self._cache, stream.write)
return self
def _deserialize(self, stream):
- self._cache = self._get_tree_cache(stream.read())
+ self._cache = tree_entries_from_data(stream.read())
return self
diff --git a/lib/git/utils.py b/lib/git/utils.py
index 3fb7fbf8..38501292 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -10,12 +10,14 @@ import time
import tempfile
from gitdb.util import (
- make_sha,
- FDStreamWrapper,
- LockedFD,
- file_contents_ro,
- LazyMixin
- )
+ make_sha,
+ FDStreamWrapper,
+ LockedFD,
+ file_contents_ro,
+ LazyMixin,
+ to_hex_sha,
+ to_bin_sha
+ )
def stream_copy(source, destination, chunk_size=512*1024):
diff --git a/test/git/test_index.py b/test/git/test_index.py
index 21d610db..efb1b477 100644
--- a/test/git/test_index.py
+++ b/test/git/test_index.py
@@ -55,7 +55,7 @@ class TestIndex(TestBase):
last_val = None
entry = index.entries.itervalues().next()
for attr in ("path","ctime","mtime","dev","inode","mode","uid",
- "gid","size","sha","stage"):
+ "gid","size","binsha", "hexsha", "stage"):
val = getattr(entry, attr)
# END for each method
@@ -128,7 +128,7 @@ class TestIndex(TestBase):
# writing a tree should fail with an unmerged index
- self.failUnlessRaises(GitCommandError, three_way_index.write_tree)
+ self.failUnlessRaises(UnmergedEntriesError, three_way_index.write_tree)
# removed unmerged entries
unmerged_blob_map = three_way_index.unmerged_blobs()
@@ -159,27 +159,27 @@ class TestIndex(TestBase):
manifest_entry = rw_repo.index.entries[manifest_key]
rw_repo.index.merge_tree(next_commit)
# only one change should be recorded
- assert manifest_entry.sha != rw_repo.index.entries[manifest_key].sha
+ assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha
rw_repo.index.reset(rw_repo.head)
- assert rw_repo.index.entries[manifest_key].sha == manifest_entry.sha
+ assert rw_repo.index.entries[manifest_key].binsha == manifest_entry.binsha
# FAKE MERGE
#############
# Add a change with a NULL sha that should conflict with next_commit. We
# pretend there was a change, but we do not even bother adding a proper
# sha for it ( which makes things faster of course )
- manifest_fake_entry = BaseIndexEntry((manifest_entry[0], Diff.null_hex_sha, 0, manifest_entry[3]))
+ manifest_fake_entry = BaseIndexEntry((manifest_entry[0], "\0"*20, 0, manifest_entry[3]))
rw_repo.index.add([manifest_fake_entry])
# add actually resolves the null-hex-sha for us as a feature, but we can
# edit the index manually
- assert rw_repo.index.entries[manifest_key].sha != Diff.null_hex_sha
+ assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA
# must operate on the same index for this ! Its a bit problematic as
# it might confuse people
index = rw_repo.index
index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry)
index.write()
- assert rw_repo.index.entries[manifest_key].sha == Diff.null_hex_sha
+ assert rw_repo.index.entries[manifest_key].hexsha == Diff.NULL_HEX_SHA
# a three way merge would result in a conflict and fails as the command will
# not overwrite any entries in our index and hence leave them unmerged. This is
@@ -189,10 +189,11 @@ class TestIndex(TestBase):
# the only way to get the merged entries is to safe the current index away into a tree,
# which is like a temporary commit for us. This fails as well as the NULL sha deos not
# have a corresponding object
- self.failUnlessRaises(GitCommandError, index.write_tree)
+ # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true
+ # self.failUnlessRaises(GitCommandError, index.write_tree)
- # if missing objects are okay, this would work though
- tree = index.write_tree(missing_ok = True)
+ # if missing objects are okay, this would work though ( they are always okay now )
+ tree = index.write_tree()
# now make a proper three way merge with unmerged entries
unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit)
@@ -348,7 +349,7 @@ class TestIndex(TestBase):
if type_id == 0: # path
yield entry.path
elif type_id == 1: # blob
- yield Blob(rw_repo, entry.sha, entry.mode, entry.path)
+ yield Blob(rw_repo, entry.hexsha, entry.mode, entry.path)
elif type_id == 2: # BaseIndexEntry
yield BaseIndexEntry(entry[:4])
elif type_id == 3: # IndexEntry
@@ -442,18 +443,19 @@ class TestIndex(TestBase):
old_blob = new_commit.parents[0].tree.blobs[0]
entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add)
self._assert_fprogress(entries)
- assert index.entries[(old_blob.path,0)].sha == old_blob.sha and len(entries) == 1
+ assert index.entries[(old_blob.path,0)].hexsha == old_blob.sha and len(entries) == 1
# mode 0 not allowed
- null_sha = "0"*40
- self.failUnlessRaises(ValueError, index.reset(new_commit).add, [BaseIndexEntry((0, null_sha,0,"doesntmatter"))])
+ null_hex_sha = Diff.NULL_HEX_SHA
+ null_bin_sha = "\0" * 20
+ self.failUnlessRaises(ValueError, index.reset(new_commit).add, [BaseIndexEntry((0, null_bin_sha,0,"doesntmatter"))])
# add new file
new_file_relapath = "my_new_file"
new_file_path = self._make_file(new_file_relapath, "hello world", rw_repo)
- entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_sha, 0, new_file_relapath))], fprogress=self._fprogress_add)
+ entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_bin_sha, 0, new_file_relapath))], fprogress=self._fprogress_add)
self._assert_fprogress(entries)
- assert len(entries) == 1 and entries[0].sha != null_sha
+ assert len(entries) == 1 and entries[0].hexsha != null_hex_sha
# add symlink
if sys.platform != "win32":
@@ -467,21 +469,21 @@ class TestIndex(TestBase):
assert S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode)
# we expect only the target to be written
- assert index.repo.odb.stream(entries[0].sha).read() == target
+ assert index.repo.odb.stream(entries[0].binsha).read() == target
# END real symlink test
# add fake symlink and assure it checks-our as symlink
fake_symlink_relapath = "my_fake_symlink"
link_target = "/etc/that"
fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo)
- fake_entry = BaseIndexEntry((0120000, null_sha, 0, fake_symlink_relapath))
+ fake_entry = BaseIndexEntry((0120000, null_hex_sha, 0, fake_symlink_relapath))
entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add)
self._assert_fprogress(entries)
- assert entries[0].sha != null_sha
+ assert entries[0].hexsha != null_hex_sha
assert len(entries) == 1 and S_ISLNK(entries[0].mode)
# assure this also works with an alternate method
- full_index_entry = IndexEntry.from_base(BaseIndexEntry((0120000, entries[0].sha, 0, entries[0].path)))
+ full_index_entry = IndexEntry.from_base(BaseIndexEntry((0120000, entries[0].binsha, 0, entries[0].path)))
entry_key = index.entry_key(full_index_entry)
index.reset(new_commit)
@@ -493,7 +495,7 @@ class TestIndex(TestBase):
assert S_ISLNK(new_entry.mode)
# a tree created from this should contain the symlink
- tree = index.write_tree(True)
+ tree = index.write_tree()
assert fake_symlink_relapath in tree
# checkout the fakelink, should be a link then
@@ -567,3 +569,5 @@ class TestIndex(TestBase):
for filenum in range(len(paths)):
assert index.entry_key(str(filenum), 0) in index.entries
+ def test_compare_write_tree(self):
+ self.fail("compare git-write-tree with python implementation, must have same output")