summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/source/changes.rst4
m---------lib/git/ext/gitdb0
-rw-r--r--lib/git/objects/commit.py16
-rw-r--r--test/git/performance/test_odb.py5
-rw-r--r--test/git/test_commit.py38
5 files changed, 59 insertions, 4 deletions
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index 2a7ff46b..730d5867 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -2,6 +2,10 @@
Changelog
=========
+0.3.0 Beta 3
+============
+* Added unicode support for author names. Commit.author.name is now unicode instead of string.
+
0.3.0 Beta 2
============
* Added python 2.4 support
diff --git a/lib/git/ext/gitdb b/lib/git/ext/gitdb
-Subproject 425ecf04aa5038c3d46b01ca20de17c51ef6c4e
+Subproject 78665b13ff4125f4ce3e5311d040c027bdc92a9
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index f3a6e216..c7da01e8 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -368,9 +368,14 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
write("parent %s\n" % p)
a = self.author
+ aname = a.name
+ if isinstance(aname, unicode):
+ aname = aname.encode(self.encoding)
+ # END handle unicode in name
+
c = self.committer
fmt = "%s %s <%s> %s %s\n"
- write(fmt % ("author", a.name, a.email,
+ write(fmt % ("author", aname, a.email,
self.authored_date,
altz_to_utctz_str(self.author_tz_offset)))
@@ -425,12 +430,19 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
readline()
# END handle encoding
+ # decode the authors name
+ try:
+ self.author.name = self.author.name.decode(self.encoding)
+ except UnicodeDecodeError:
+ print >> sys.stderr, "Failed to decode author name: %s" % self.author.name
+ # END handle author's encoding
+
# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
self.message = stream.read()
try:
self.message = self.message.decode(self.encoding)
- except Exception:
+ except UnicodeDecodeError:
print >> sys.stderr, "Failed to decode message: %s" % self.message
# END exception handling
return self
diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
index 23d5b98e..32b70f69 100644
--- a/test/git/performance/test_odb.py
+++ b/test/git/performance/test_odb.py
@@ -49,9 +49,10 @@ class TestObjDBPerformance(TestBigRepoR):
st = time()
nb = 0
too_many = 15000
+ data_bytes = 0
for blob_list in blobs_per_commit:
for blob in blob_list:
- blob.data_stream.read()
+ data_bytes += len(blob.data_stream.read())
# END for each blobsha
nb += len(blob_list)
if nb > too_many:
@@ -59,7 +60,7 @@ class TestObjDBPerformance(TestBigRepoR):
# END for each bloblist
elapsed = time() - st
- print >> sys.stderr, "%s: Retrieved %i blob and their data in %g s ( %f blobs / s )" % (type(repo.odb), nb, elapsed, nb / elapsed)
+ print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed)
results[2].append(elapsed)
# END for each repo type
diff --git a/test/git/test_commit.py b/test/git/test_commit.py
index 31ce2c4e..2692938f 100644
--- a/test/git/test_commit.py
+++ b/test/git/test_commit.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
# test_commit.py
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
@@ -108,6 +109,14 @@ class TestCommit(TestBase):
assert commit.committer_tz_offset == 14400, commit.committer_tz_offset
assert commit.message == "initial project\n"
+ def test_unicode_actor(self):
+ # assure we can parse unicode actors correctly
+ name = "Üäöß ÄußÉ".decode("utf-8")
+ assert len(name) == 9
+ special = Actor._from_string(u"%s <something@this.com>" % name)
+ assert special.name == name
+ assert isinstance(special.name, unicode)
+
def test_traversal(self):
start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff")
first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781")
@@ -233,3 +242,32 @@ class TestCommit(TestBase):
# create all commits of our repo
assert_commit_serialization(rwrepo, '0.1.6')
+ def test_serialization_unicode_support(self):
+ assert Commit.default_encoding.lower() == 'utf-8'
+
+ # create a commit with unicode in the message, and the author's name
+ # Verify its serialization and deserialization
+ cmt = self.rorepo.commit('0.1.6')
+ assert isinstance(cmt.message, unicode) # it automatically decodes it as such
+ assert isinstance(cmt.author.name, unicode) # same here
+
+ cmt.message = "üäêèß".decode("utf-8")
+ assert len(cmt.message) == 5
+
+ cmt.author.name = "äüß".decode("utf-8")
+ assert len(cmt.author.name) == 3
+
+ cstream = StringIO()
+ cmt._serialize(cstream)
+ cstream.seek(0)
+ assert len(cstream.getvalue())
+
+ ncmt = Commit(self.rorepo, cmt.binsha)
+ ncmt._deserialize(cstream)
+
+ assert cmt.author.name == ncmt.author.name
+ assert cmt.message == ncmt.message
+ # actually, it can't be printed in a shell as repr wants to have ascii only
+ # it appears
+ cmt.author.__repr__()
+