diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-10-15 12:34:43 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-10-15 12:40:39 +0200 |
commit | 0019d7dc8c72839d238065473a62b137c3c350f5 (patch) | |
tree | 75c5fcd85fe9e655e035bd3cbec10e49071562f1 | |
parent | 0f88fb96869b6ac3ed4dac7d23310a9327d3c89c (diff) | |
download | gitpython-0019d7dc8c72839d238065473a62b137c3c350f5.tar.gz |
Added unicode handling for author names. They will now be properly encoded into the byte stream, as well as decoded from it
-rw-r--r-- | doc/source/changes.rst | 4 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 16 | ||||
-rw-r--r-- | test/git/test_commit.py | 29 |
3 files changed, 47 insertions, 2 deletions
diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 2a7ff46b..730d5867 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,10 @@ Changelog ========= +0.3.0 Beta 3 +============ +* Added unicode support for author names. Commit.author.name is now unicode instead of string. + 0.3.0 Beta 2 ============ * Added python 2.4 support diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f3a6e216..c7da01e8 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -368,9 +368,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): write("parent %s\n" % p) a = self.author + aname = a.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + c = self.committer fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", a.name, a.email, + write(fmt % ("author", aname, a.email, self.authored_date, altz_to_utctz_str(self.author_tz_offset))) @@ -425,12 +430,19 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): readline() # END handle encoding + # decode the authors name + try: + self.author.name = self.author.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode author name: %s" % self.author.name + # END handle author's encoding + # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: self.message = self.message.decode(self.encoding) - except Exception: + except UnicodeDecodeError: print >> sys.stderr, "Failed to decode message: %s" % self.message # END exception handling return self diff --git a/test/git/test_commit.py b/test/git/test_commit.py index a9ea7f98..2692938f 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -242,3 +242,32 @@ class TestCommit(TestBase): # create all commits of our repo assert_commit_serialization(rwrepo, '0.1.6') + def test_serialization_unicode_support(self): + assert Commit.default_encoding.lower() == 'utf-8' + + # create a commit with unicode in the message, and the author's name + # Verify its serialization and deserialization + cmt = self.rorepo.commit('0.1.6') + assert isinstance(cmt.message, unicode) # it automatically decodes it as such + assert isinstance(cmt.author.name, unicode) # same here + + cmt.message = "üäêèß".decode("utf-8") + assert len(cmt.message) == 5 + + cmt.author.name = "äüß".decode("utf-8") + assert len(cmt.author.name) == 3 + + cstream = StringIO() + cmt._serialize(cstream) + cstream.seek(0) + assert len(cstream.getvalue()) + + ncmt = Commit(self.rorepo, cmt.binsha) + ncmt._deserialize(cstream) + + assert cmt.author.name == ncmt.author.name + assert cmt.message == ncmt.message + # actually, it can't be printed in a shell as repr wants to have ascii only + # it appears + cmt.author.__repr__() + |