diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2015-01-06 16:11:34 +0100 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2015-01-06 16:11:34 +0100 |
commit | 56e942318f3c493c8dcd4759f806034331ebeda5 (patch) | |
tree | 82cdca65cd197f36ea3680171186e0ddcf234266 /git/objects | |
parent | d46e3fe9cb0dea2617cd9231d29bf6919b0f1e91 (diff) | |
parent | 68f8a43d1b643318732f30ee1cd75e1d315a4537 (diff) | |
download | gitpython-56e942318f3c493c8dcd4759f806034331ebeda5.tar.gz |
Merge branch 'py3' into 0.3
Conflicts:
git/refs/log.py
Diffstat (limited to 'git/objects')
-rw-r--r-- | git/objects/__init__.py | 7 | ||||
-rw-r--r-- | git/objects/base.py | 11 | ||||
-rw-r--r-- | git/objects/blob.py | 3 | ||||
-rw-r--r-- | git/objects/commit.py | 99 | ||||
-rw-r--r-- | git/objects/fun.py | 33 | ||||
-rw-r--r-- | git/objects/submodule/base.py | 44 | ||||
-rw-r--r-- | git/objects/submodule/root.py | 7 | ||||
-rw-r--r-- | git/objects/submodule/util.py | 4 | ||||
-rw-r--r-- | git/objects/tag.py | 12 | ||||
-rw-r--r-- | git/objects/tree.py | 31 | ||||
-rw-r--r-- | git/objects/util.py | 16 |
11 files changed, 149 insertions, 118 deletions
diff --git a/git/objects/__init__.py b/git/objects/__init__.py index 70fc52cb..ee642876 100644 --- a/git/objects/__init__.py +++ b/git/objects/__init__.py @@ -7,9 +7,10 @@ import inspect from .base import * # Fix import dependency - add IndexObject to the util module, so that it can be # imported by the submodule.base -from .submodule import util -util.IndexObject = IndexObject -util.Object = Object +from .submodule import util as smutil +smutil.IndexObject = IndexObject +smutil.Object = Object +del(smutil) from .submodule.base import * from .submodule.root import * diff --git a/git/objects/base.py b/git/objects/base.py index 20147e57..3f595d9d 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -3,8 +3,8 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +from .util import get_object_type_by_name from git.util import LazyMixin, join_path_native, stream_copy -from util import get_object_type_by_name from gitdb.util import ( bin_to_hex, basename @@ -21,7 +21,7 @@ class Object(LazyMixin): """Implements an Object which may be Blobs, Trees, Commits and Tags""" NULL_HEX_SHA = '0' * 40 - NULL_BIN_SHA = '\0' * 20 + NULL_BIN_SHA = b'\0' * 20 TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) __slots__ = ("repo", "binsha", "size") @@ -60,7 +60,7 @@ class Object(LazyMixin): :param sha1: 20 byte binary sha1""" if sha1 == cls.NULL_BIN_SHA: # the NULL binsha is always the root commit - return get_object_type_by_name('commit')(repo, sha1) + return get_object_type_by_name(b'commit')(repo, sha1) # END handle special case oinfo = repo.odb.info(sha1) inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) @@ -94,7 +94,7 @@ class Object(LazyMixin): def __str__(self): """:return: string of our SHA1 as understood by all git commands""" - return bin_to_hex(self.binsha) + return self.hexsha def __repr__(self): """:return: string with pythonic representation of our object""" @@ -103,7 +103,8 @@ class Object(LazyMixin): @property def hexsha(self): """:return: 40 byte hex version of our 20 byte binary sha""" - return bin_to_hex(self.binsha) + # b2a_hex produces bytes + return bin_to_hex(self.binsha).decode('ascii') @property def data_stream(self): diff --git a/git/objects/blob.py b/git/objects/blob.py index b05e5b84..322f6992 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -3,9 +3,8 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php - from mimetypes import guess_type -import base +from . import base __all__ = ('Blob', ) diff --git a/git/objects/commit.py b/git/objects/commit.py index 9c733695..8f93d1b9 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -4,6 +4,8 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +from gitdb import IStream +from gitdb.util import hex_to_bin from git.util import ( Actor, Iterable, @@ -11,26 +13,24 @@ from git.util import ( finalize_process ) from git.diff import Diffable -from tree import Tree -from gitdb import IStream -from cStringIO import StringIO -import base -from gitdb.util import ( - hex_to_bin -) -from util import ( +from .tree import Tree +from . import base +from .util import ( Traversable, Serializable, parse_date, altz_to_utctz_str, parse_actor_and_date ) +from git.compat import text_type + from time import ( time, altzone ) import os +from io import BytesIO import logging log = logging.getLogger('git.objects.commit') @@ -62,7 +62,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): "author", "authored_date", "author_tz_offset", "committer", "committed_date", "committer_tz_offset", "message", "parents", "encoding", "gpgsig") - _id_attribute_ = "binsha" + _id_attribute_ = "hexsha" def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, committer=None, committed_date=None, committer_tz_offset=None, @@ -133,7 +133,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): if attr in Commit.__slots__: # read the data in a chunk, its faster - then provide a file wrapper binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) + self._deserialize(BytesIO(stream.read())) else: super(Commit, self)._set_cache_(attr) # END handle attrs @@ -345,7 +345,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): committer, committer_time, committer_offset, message, parent_commits, conf_encoding) - stream = StringIO() + stream = BytesIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) @@ -373,43 +373,36 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): def _serialize(self, stream): write = stream.write - write("tree %s\n" % self.tree) + write(("tree %s\n" % self.tree).encode('ascii')) for p in self.parents: - write("parent %s\n" % p) + write(("parent %s\n" % p).encode('ascii')) a = self.author aname = a.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - c = self.committer fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", aname, a.email, - self.authored_date, - altz_to_utctz_str(self.author_tz_offset))) + write((fmt % ("author", aname, a.email, + self.authored_date, + altz_to_utctz_str(self.author_tz_offset))).encode(self.encoding)) # encode committer aname = c.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - write(fmt % ("committer", aname, c.email, - self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))) + write((fmt % ("committer", aname, c.email, + self.committed_date, + altz_to_utctz_str(self.committer_tz_offset))).encode(self.encoding)) if self.encoding != self.default_encoding: - write("encoding %s\n" % self.encoding) + write(("encoding %s\n" % self.encoding).encode('ascii')) if self.gpgsig: - write("gpgsig") + write(b"gpgsig") for sigline in self.gpgsig.rstrip("\n").split("\n"): - write(" " + sigline + "\n") + write((" " + sigline + "\n").encode('ascii')) - write("\n") + write(b"\n") # write plain bytes, be sure its encoded according to our encoding - if isinstance(self.message, unicode): + if isinstance(self.message, text_type): write(self.message.encode(self.encoding)) else: write(self.message) @@ -426,23 +419,25 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): next_line = None while True: parent_line = readline() - if not parent_line.startswith('parent'): + if not parent_line.startswith(b'parent'): next_line = parent_line break # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) + self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii')))) # END for each parent line self.parents = tuple(self.parents) - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + # we don't know actual author encoding before we have parsed it, so keep the lines around + author_line = next_line + committer_line = readline() # we might run into one or more mergetag blocks, skip those for now next_line = readline() - while next_line.startswith('mergetag '): + while next_line.startswith(b'mergetag '): next_line = readline() while next_line.startswith(' '): next_line = readline() + # end skip mergetags # now we can have the encoding line, or an empty line followed by the optional # message. @@ -451,39 +446,40 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # read headers enc = next_line buf = enc.strip() - while buf != "": - if buf[0:10] == "encoding ": - self.encoding = buf[buf.find(' ') + 1:] - elif buf[0:7] == "gpgsig ": - sig = buf[buf.find(' ') + 1:] + "\n" + while buf: + if buf[0:10] == b"encoding ": + self.encoding = buf[buf.find(' ') + 1:].decode('ascii') + elif buf[0:7] == b"gpgsig ": + sig = buf[buf.find(b' ') + 1:] + b"\n" is_next_header = False while True: sigbuf = readline() - if sigbuf == "": + if not sigbuf: break - if sigbuf[0:1] != " ": + if sigbuf[0:1] != b" ": buf = sigbuf.strip() is_next_header = True break sig += sigbuf[1:] - self.gpgsig = sig.rstrip("\n") + # end read all signature + self.gpgsig = sig.rstrip(b"\n").decode('ascii') if is_next_header: continue buf = readline().strip() - # decode the authors name + try: - self.author.name = self.author.name.decode(self.encoding) + self.author, self.authored_date, self.author_tz_offset = \ + parse_actor_and_date(author_line.decode(self.encoding)) except UnicodeDecodeError: - log.error("Failed to decode author name '%s' using encoding %s", self.author.name, self.encoding, + log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) - # END handle author's encoding - # decode committer name try: - self.committer.name = self.committer.name.decode(self.encoding) + self.committer, self.committed_date, self.committer_tz_offset = \ + parse_actor_and_date(committer_line.decode(self.encoding)) except UnicodeDecodeError: - log.error("Failed to decode committer name '%s' using encoding %s", self.committer.name, self.encoding, + log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) # END handle author's encoding @@ -495,6 +491,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling + return self #} END serializable implementation diff --git a/git/objects/fun.py b/git/objects/fun.py index 416a52e6..c04f80b5 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,5 +1,12 @@ """Module with functions which are supposed to be as fast as possible""" from stat import S_ISDIR +from git.compat import ( + byte_ord, + defenc, + xrange, + text_type, + bchr +) __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', 'traverse_tree_recursive') @@ -13,13 +20,13 @@ def tree_to_stream(entries, write): bit_mask = 7 # 3 bits set for binsha, mode, name in entries: - mode_str = '' + mode_str = b'' for i in xrange(6): - mode_str = chr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str + mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero - if mode_str[0] == '0': + if byte_ord(mode_str[0]) == ord_zero: mode_str = mode_str[1:] # END save a byte @@ -28,17 +35,18 @@ def tree_to_stream(entries, write): # hence we must convert to an utf8 string for it to work properly. # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. - if isinstance(name, unicode): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) + if isinstance(name, text_type): + name = name.encode(defenc) + write(b''.join((mode_str, b' ', name, b'\0', binsha))) # END for each item def tree_entries_from_data(data): """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data + :param data: data block with tree data (as bytes) :return: list(tuple(binsha, mode, tree_relative_path), ...)""" ord_zero = ord('0') + space_ord = ord(' ') len_data = len(data) i = 0 out = list() @@ -48,10 +56,10 @@ def tree_entries_from_data(data): # read mode # Some git versions truncate the leading 0, some don't # The type will be extracted from the mode later - while data[i] != ' ': + while byte_ord(data[i]) != space_ord: # move existing mode integer up one level being 3 bits # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) + mode = (mode << 3) + (byte_ord(data[i]) - ord_zero) i += 1 # END while reading mode @@ -61,7 +69,7 @@ def tree_entries_from_data(data): # parse name, it is NULL separated ns = i - while data[i] != '\0': + while byte_ord(data[i]) != 0: i += 1 # END while not reached NULL @@ -69,12 +77,9 @@ def tree_entries_from_data(data): # Only use the respective unicode object if the byte stream was encoded name = data[ns:i] try: - name_enc = name.decode("utf-8") + name = name.decode(defenc) except UnicodeDecodeError: pass - else: - if len(name) > len(name_enc): - name = name_enc # END handle encoding # byte is NULL, get next 20 diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index d6f8982b..0ec6f656 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,5 +1,5 @@ -import util -from util import ( +from . import util +from .util import ( mkhead, sm_name, sm_section, @@ -8,7 +8,7 @@ from util import ( find_first_remote_branch ) from git.objects.util import Traversable -from StringIO import StringIO # need a dict to set bloody .name field +from io import BytesIO # need a dict to set bloody .name field from git.util import ( Iterable, join_path_native, @@ -17,11 +17,15 @@ from git.util import ( rmtree ) -from git.config import SectionConstraint +from git.config import ( + SectionConstraint, + cp +) from git.exc import ( InvalidGitRepositoryError, NoSuchPathError ) +from git.compat import string_types import stat import git @@ -93,7 +97,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): if url is not None: self._url = url if branch_path is not None: - assert isinstance(branch_path, basestring) + assert isinstance(branch_path, string_types) self._branch_path = branch_path if name is not None: self._name = name @@ -186,8 +190,8 @@ class Submodule(util.IndexObject, Iterable, Traversable): @classmethod def _sio_modules(cls, parent_commit): - """:return: Configuration file as StringIO - we only access it through the respective blob's data""" - sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) + """:return: Configuration file as BytesIO - we only access it through the respective blob's data""" + sio = BytesIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) sio.name = cls.k_modules_file return sio @@ -301,6 +305,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): writer.set_value(cls.k_head_option, br.path) sm._branch_path = br.path # END handle path + writer.release() del(writer) # we deliberatly assume that our head matches our index ! @@ -418,7 +423,9 @@ class Submodule(util.IndexObject, Iterable, Traversable): # the default implementation will be offended and not update the repository # Maybe this is a good way to assure it doesn't get into our way, but # we want to stay backwards compatible too ... . Its so redundant ! - self.repo.config_writer().set_value(sm_section(self.name), 'url', self.url) + writer = self.repo.config_writer() + writer.set_value(sm_section(self.name), 'url', self.url) + writer.release() # END handle dry_run # END handle initalization @@ -575,6 +582,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): writer = self.config_writer(index=index) # auto-write writer.set_value('path', module_path) self.path = module_path + writer.release() del(writer) # END handle configuration flag except Exception: @@ -699,8 +707,12 @@ class Submodule(util.IndexObject, Iterable, Traversable): # now git config - need the config intact, otherwise we can't query # inforamtion anymore - self.repo.config_writer().remove_section(sm_section(self.name)) - self.config_writer().remove_section() + writer = self.repo.config_writer() + writer.remove_section(sm_section(self.name)) + writer.release() + writer = self.config_writer() + writer.remove_section() + writer.release() # END delete configuration # void our data not to delay invalid access @@ -799,14 +811,18 @@ class Submodule(util.IndexObject, Iterable, Traversable): """ :return: True if the submodule exists, False otherwise. Please note that a submodule may exist (in the .gitmodules file) even though its module - doesn't exist""" + doesn't exist on disk""" # keep attributes for later, and restore them if we have no valid data # this way we do not actually alter the state of the object loc = locals() for attr in self._cache_attrs: - if hasattr(self, attr): - loc[attr] = getattr(self, attr) - # END if we have the attribute cache + try: + if hasattr(self, attr): + loc[attr] = getattr(self, attr) + # END if we have the attribute cache + except cp.NoSectionError: + # on PY3, this can happen apparently ... don't know why this doesn't happen on PY2 + pass # END for each attr self._clear_cache() diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py index 708749c7..8c9afff1 100644 --- a/git/objects/submodule/root.py +++ b/git/objects/submodule/root.py @@ -1,5 +1,8 @@ -from base import Submodule, UpdateProgress -from util import ( +from .base import ( + Submodule, + UpdateProgress +) +from .util import ( find_first_remote_branch ) from git.exc import InvalidGitRepositoryError diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py index 01bd03b3..5604dec7 100644 --- a/git/objects/submodule/util.py +++ b/git/objects/submodule/util.py @@ -1,7 +1,7 @@ import git from git.exc import InvalidGitRepositoryError from git.config import GitConfigParser -from StringIO import StringIO +from io import BytesIO import weakref __all__ = ('sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch', @@ -83,7 +83,7 @@ class SubmoduleConfigParser(GitConfigParser): """Flush changes in our configuration file to the index""" assert self._smref is not None # should always have a file here - assert not isinstance(self._file_or_files, StringIO) + assert not isinstance(self._file_or_files, BytesIO) sm = self._smref() if sm is not None: diff --git a/git/objects/tag.py b/git/objects/tag.py index 3c379579..c8684447 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -4,12 +4,13 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all object based types. """ -import base -from gitdb.util import hex_to_bin -from util import ( +from . import base +from .util import ( get_object_type_by_name, parse_actor_and_date ) +from gitdb.util import hex_to_bin +from git.compat import defenc __all__ = ("TagObject", ) @@ -52,11 +53,12 @@ class TagObject(base.Object): """Cache all our attributes at once""" if attr in TagObject.__slots__: ostream = self.repo.odb.stream(self.binsha) - lines = ostream.read().splitlines() + lines = ostream.read().decode(defenc).splitlines() obj, hexsha = lines[0].split(" ") # object <hexsha> type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) + self.object = \ + get_object_type_by_name(type_name.encode('ascii'))(self.repo, hex_to_bin(hexsha)) self.tag = lines[2][4:] # tag <tag name> diff --git a/git/objects/tree.py b/git/objects/tree.py index c77e6056..f9bee01e 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -3,22 +3,21 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -import util -from base import IndexObject from git.util import join_path -from blob import Blob -from submodule.base import Submodule import git.diff as diff +from gitdb.util import to_bin_sha -from fun import ( +from . import util +from .base import IndexObject +from .blob import Blob +from .submodule.base import Submodule +from git.compat import string_types + +from .fun import ( tree_entries_from_data, tree_to_stream ) -from gitdb.util import ( - to_bin_sha, -) - __all__ = ("TreeModifier", "Tree") @@ -160,7 +159,7 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) # END for each item - def __div__(self, file): + def join(self, file): """Find the named object in this tree's contents :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` @@ -193,6 +192,14 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): raise KeyError(msg % file) # END handle long paths + def __div__(self, file): + """For PY2 only""" + return self.join(file) + + def __truediv__(self, file): + """For PY3 only""" + return self.join(file) + @property def trees(self): """:return: list(Tree, ...) list of trees directly below this tree""" @@ -234,9 +241,9 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): info = self._cache[item] return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - if isinstance(item, basestring): + if isinstance(item, string_types): # compatability - return self.__div__(item) + return self.join(item) # END index is basestring raise TypeError("Invalid index type: %r" % item) diff --git a/git/objects/util.py b/git/objects/util.py index fdf9622b..cefef862 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -46,17 +46,17 @@ def get_object_type_by_name(object_type_name): :param object_type_name: Member of TYPES :raise ValueError: In case object_type_name is unknown""" - if object_type_name == "commit": - import commit + if object_type_name == b"commit": + from . import commit return commit.Commit - elif object_type_name == "tag": - import tag + elif object_type_name == b"tag": + from . import tag return tag.TagObject - elif object_type_name == "blob": - import blob + elif object_type_name == b"blob": + from . import blob return blob.Blob - elif object_type_name == "tree": - import tree + elif object_type_name == b"tree": + from . import tree return tree.Tree else: raise ValueError("Cannot handle unknown object type: %s" % object_type_name) |