From f6aa8d116eb33293c0a9d6d600eb7c32832758b9 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 4 Jan 2015 19:14:33 +0100 Subject: initial set of adjustments to make (most) imports work. More to come, especially when it's about strings --- git/objects/commit.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 9c733695..5b6b9a33 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -4,6 +4,8 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +from gitdb import IStream +from gitdb.util import hex_to_bin from git.util import ( Actor, Iterable, @@ -11,26 +13,23 @@ from git.util import ( finalize_process ) from git.diff import Diffable -from tree import Tree -from gitdb import IStream -from cStringIO import StringIO -import base -from gitdb.util import ( - hex_to_bin -) -from util import ( +from .tree import Tree +from . import base +from .util import ( Traversable, Serializable, parse_date, altz_to_utctz_str, parse_actor_and_date ) + from time import ( time, altzone ) import os +from io import StringIO import logging log = logging.getLogger('git.objects.commit') -- cgit v1.2.1 From ae2ff0f9d704dc776a1934f72a339da206a9fff4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 4 Jan 2015 19:50:28 +0100 Subject: Dum brute force conversion of all types. However, StringIO really is ByteIO in most cases, and py2.7 should run but doesn't. This should be made work first. --- git/objects/commit.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 5b6b9a33..c9d7ddc8 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -23,6 +23,7 @@ from .util import ( altz_to_utctz_str, parse_actor_and_date ) +from git.compat import text_type from time import ( time, @@ -378,7 +379,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): a = self.author aname = a.name - if isinstance(aname, unicode): + if isinstance(aname, text_type): aname = aname.encode(self.encoding) # END handle unicode in name @@ -390,7 +391,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # encode committer aname = c.name - if isinstance(aname, unicode): + if isinstance(aname, text_type): aname = aname.encode(self.encoding) # END handle unicode in name write(fmt % ("committer", aname, c.email, @@ -408,7 +409,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): write("\n") # write plain bytes, be sure its encoded according to our encoding - if isinstance(self.message, unicode): + if isinstance(self.message, text_type): write(self.message.encode(self.encoding)) else: write(self.message) -- cgit v1.2.1 From bc8c91200a7fb2140aadd283c66b5ab82f9ad61e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jan 2015 10:09:51 +0100 Subject: Fixed io types to make tests work on PY2 once again. Now it's about going through PY3 issues --- git/objects/commit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index c9d7ddc8..79d460ad 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -30,7 +30,7 @@ from time import ( altzone ) import os -from io import StringIO +from io import BytesIO import logging log = logging.getLogger('git.objects.commit') @@ -133,7 +133,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): if attr in Commit.__slots__: # read the data in a chunk, its faster - then provide a file wrapper binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) + self._deserialize(BytesIO(stream.read())) else: super(Commit, self)._set_cache_(attr) # END handle attrs @@ -345,7 +345,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): committer, committer_time, committer_offset, message, parent_commits, conf_encoding) - stream = StringIO() + stream = BytesIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) -- cgit v1.2.1 From 3d0556a31916a709e9da3eafb92fc6b8bf69896c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jan 2015 16:10:50 +0100 Subject: Added test of #147 to verify it works. Applied a few more fixes to commit implementation, possibly not the last --- git/objects/commit.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 79d460ad..5ad7902b 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -373,40 +373,33 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): def _serialize(self, stream): write = stream.write - write("tree %s\n" % self.tree) + write(("tree %s\n" % self.tree).encode('ascii')) for p in self.parents: - write("parent %s\n" % p) + write(("parent %s\n" % p).encode('ascii')) a = self.author aname = a.name - if isinstance(aname, text_type): - aname = aname.encode(self.encoding) - # END handle unicode in name - c = self.committer fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", aname, a.email, + write((fmt % ("author", aname, a.email, self.authored_date, - altz_to_utctz_str(self.author_tz_offset))) + altz_to_utctz_str(self.author_tz_offset))).encode(self.encoding)) # encode committer aname = c.name - if isinstance(aname, text_type): - aname = aname.encode(self.encoding) - # END handle unicode in name - write(fmt % ("committer", aname, c.email, + write((fmt % ("committer", aname, c.email, self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))) + altz_to_utctz_str(self.committer_tz_offset))).encode(self.encoding)) if self.encoding != self.default_encoding: - write("encoding %s\n" % self.encoding) + write(("encoding %s\n" % self.encoding).encode('ascii')) if self.gpgsig: write("gpgsig") for sigline in self.gpgsig.rstrip("\n").split("\n"): - write(" " + sigline + "\n") + write((" " + sigline + "\n").encode('ascii')) - write("\n") + write(b"\n") # write plain bytes, be sure its encoded according to our encoding if isinstance(self.message, text_type): -- cgit v1.2.1 From 8a308613467a1510f8dac514624abae4e10c0779 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jan 2015 16:44:54 +0100 Subject: Fixes test_blob and improved commit writing/reading --- git/objects/commit.py | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 5ad7902b..f8b5c969 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -419,23 +419,25 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): next_line = None while True: parent_line = readline() - if not parent_line.startswith('parent'): + if not parent_line.startswith(b'parent'): next_line = parent_line break # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) + self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii')))) # END for each parent line self.parents = tuple(self.parents) - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + # we don't know actual author encoding before we have parsed it, so keep the lines around + author_line = next_line + committer_line = readline() # we might run into one or more mergetag blocks, skip those for now next_line = readline() - while next_line.startswith('mergetag '): + while next_line.startswith(b'mergetag '): next_line = readline() while next_line.startswith(' '): next_line = readline() + # end skip mergetags # now we can have the encoding line, or an empty line followed by the optional # message. @@ -444,39 +446,40 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # read headers enc = next_line buf = enc.strip() - while buf != "": - if buf[0:10] == "encoding ": - self.encoding = buf[buf.find(' ') + 1:] - elif buf[0:7] == "gpgsig ": - sig = buf[buf.find(' ') + 1:] + "\n" + while buf: + if buf[0:10] == b"encoding ": + self.encoding = buf[buf.find(' ') + 1:].decode('ascii') + elif buf[0:7] == b"gpgsig ": + sig = buf[buf.find(b' ') + 1:] + b"\n" is_next_header = False while True: sigbuf = readline() - if sigbuf == "": + if not sigbuf: break - if sigbuf[0:1] != " ": + if sigbuf[0:1] != b" ": buf = sigbuf.strip() is_next_header = True break sig += sigbuf[1:] - self.gpgsig = sig.rstrip("\n") + # end read all signature + self.gpgsig = sig.rstrip(b"\n").decode('ascii') if is_next_header: continue buf = readline().strip() - # decode the authors name + try: - self.author.name = self.author.name.decode(self.encoding) + self.author, self.authored_date, self.author_tz_offset = \ + parse_actor_and_date(author_line.decode(self.encoding)) except UnicodeDecodeError: - log.error("Failed to decode author name '%s' using encoding %s", self.author.name, self.encoding, + log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding, exc_info=True) - # END handle author's encoding - # decode committer name try: - self.committer.name = self.committer.name.decode(self.encoding) + self.committer, self.committed_date, self.committer_tz_offset = \ + parse_actor_and_date(committer_line.decode(self.encoding)) except UnicodeDecodeError: - log.error("Failed to decode committer name '%s' using encoding %s", self.committer.name, self.encoding, + log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, exc_info=True) # END handle author's encoding @@ -488,6 +491,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): except UnicodeDecodeError: log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True) # END exception handling + return self #} END serializable implementation -- cgit v1.2.1 From e1060a2a8c90c0730c3541811df8f906dac510a7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jan 2015 17:59:22 +0100 Subject: test_commit works once again --- git/objects/commit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index f8b5c969..53af22cd 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -62,7 +62,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): "author", "authored_date", "author_tz_offset", "committer", "committed_date", "committer_tz_offset", "message", "parents", "encoding", "gpgsig") - _id_attribute_ = "binsha" + _id_attribute_ = "hexsha" def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, committer=None, committed_date=None, committer_tz_offset=None, @@ -395,7 +395,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): write(("encoding %s\n" % self.encoding).encode('ascii')) if self.gpgsig: - write("gpgsig") + write(b"gpgsig") for sigline in self.gpgsig.rstrip("\n").split("\n"): write((" " + sigline + "\n").encode('ascii')) -- cgit v1.2.1 From e0c65d6638698f4e3a9e726efca8c0bcf466cd62 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 6 Jan 2015 15:38:20 +0100 Subject: Make flake8 happy --- git/objects/commit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 53af22cd..8f93d1b9 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -382,14 +382,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): c = self.committer fmt = "%s %s <%s> %s %s\n" write((fmt % ("author", aname, a.email, - self.authored_date, - altz_to_utctz_str(self.author_tz_offset))).encode(self.encoding)) + self.authored_date, + altz_to_utctz_str(self.author_tz_offset))).encode(self.encoding)) # encode committer aname = c.name write((fmt % ("committer", aname, c.email, - self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))).encode(self.encoding)) + self.committed_date, + altz_to_utctz_str(self.committer_tz_offset))).encode(self.encoding)) if self.encoding != self.default_encoding: write(("encoding %s\n" % self.encoding).encode('ascii')) -- cgit v1.2.1