diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-10-15 12:40:54 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-10-15 12:40:54 +0200 |
commit | 13647590f96fb5a22cb60f12c5a70e00065a7f3a (patch) | |
tree | 75c5fcd85fe9e655e035bd3cbec10e49071562f1 | |
parent | 741dfaadf732d4a2a897250c006d5ef3d3cd9f3a (diff) | |
parent | 0019d7dc8c72839d238065473a62b137c3c350f5 (diff) | |
download | gitpython-13647590f96fb5a22cb60f12c5a70e00065a7f3a.tar.gz |
Merge branch 'unicode'
-rw-r--r-- | doc/source/changes.rst | 4 | ||||
m--------- | lib/git/ext/gitdb | 0 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 16 | ||||
-rw-r--r-- | test/git/performance/test_odb.py | 5 | ||||
-rw-r--r-- | test/git/test_commit.py | 38 |
5 files changed, 59 insertions, 4 deletions
diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 2a7ff46b..730d5867 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,10 @@ Changelog ========= +0.3.0 Beta 3 +============ +* Added unicode support for author names. Commit.author.name is now unicode instead of string. + 0.3.0 Beta 2 ============ * Added python 2.4 support diff --git a/lib/git/ext/gitdb b/lib/git/ext/gitdb -Subproject 425ecf04aa5038c3d46b01ca20de17c51ef6c4e +Subproject 78665b13ff4125f4ce3e5311d040c027bdc92a9 diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f3a6e216..c7da01e8 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -368,9 +368,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): write("parent %s\n" % p) a = self.author + aname = a.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + c = self.committer fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", a.name, a.email, + write(fmt % ("author", aname, a.email, self.authored_date, altz_to_utctz_str(self.author_tz_offset))) @@ -425,12 +430,19 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): readline() # END handle encoding + # decode the authors name + try: + self.author.name = self.author.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode author name: %s" % self.author.name + # END handle author's encoding + # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() try: self.message = self.message.decode(self.encoding) - except Exception: + except UnicodeDecodeError: print >> sys.stderr, "Failed to decode message: %s" % self.message # END exception handling return self diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py index 23d5b98e..32b70f69 100644 --- a/test/git/performance/test_odb.py +++ b/test/git/performance/test_odb.py @@ -49,9 +49,10 @@ class TestObjDBPerformance(TestBigRepoR): st = time() nb = 0 too_many = 15000 + data_bytes = 0 for blob_list in blobs_per_commit: for blob in blob_list: - blob.data_stream.read() + data_bytes += len(blob.data_stream.read()) # END for each blobsha nb += len(blob_list) if nb > too_many: @@ -59,7 +60,7 @@ class TestObjDBPerformance(TestBigRepoR): # END for each bloblist elapsed = time() - st - print >> sys.stderr, "%s: Retrieved %i blob and their data in %g s ( %f blobs / s )" % (type(repo.odb), nb, elapsed, nb / elapsed) + print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed) results[2].append(elapsed) # END for each repo type diff --git a/test/git/test_commit.py b/test/git/test_commit.py index 31ce2c4e..2692938f 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # test_commit.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # @@ -108,6 +109,14 @@ class TestCommit(TestBase): assert commit.committer_tz_offset == 14400, commit.committer_tz_offset assert commit.message == "initial project\n" + def test_unicode_actor(self): + # assure we can parse unicode actors correctly + name = "Üäöß ÄußÉ".decode("utf-8") + assert len(name) == 9 + special = Actor._from_string(u"%s <something@this.com>" % name) + assert special.name == name + assert isinstance(special.name, unicode) + def test_traversal(self): start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") @@ -233,3 +242,32 @@ class TestCommit(TestBase): # create all commits of our repo assert_commit_serialization(rwrepo, '0.1.6') + def test_serialization_unicode_support(self): + assert Commit.default_encoding.lower() == 'utf-8' + + # create a commit with unicode in the message, and the author's name + # Verify its serialization and deserialization + cmt = self.rorepo.commit('0.1.6') + assert isinstance(cmt.message, unicode) # it automatically decodes it as such + assert isinstance(cmt.author.name, unicode) # same here + + cmt.message = "üäêèß".decode("utf-8") + assert len(cmt.message) == 5 + + cmt.author.name = "äüß".decode("utf-8") + assert len(cmt.author.name) == 3 + + cstream = StringIO() + cmt._serialize(cstream) + cstream.seek(0) + assert len(cstream.getvalue()) + + ncmt = Commit(self.rorepo, cmt.binsha) + ncmt._deserialize(cstream) + + assert cmt.author.name == ncmt.author.name + assert cmt.message == ncmt.message + # actually, it can't be printed in a shell as repr wants to have ascii only + # it appears + cmt.author.__repr__() + |