summaryrefslogtreecommitdiff
path: root/lib/git/objects
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-08-13 14:04:11 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-08-13 14:21:18 +0200
commit394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3 (patch)
treeba71450a7f5d95bc524d87056e81efa4b6ac6c9e /lib/git/objects
parent192472f9673b18c91ce618e64e935f91769c50e7 (diff)
downloadgitpython-394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3.tar.gz
unicode handling in messages and trees was improved. Messages are now written according to the encoding of the commit object, and decoded using that information as well. Trees will encode and decode their names with utf8
Diffstat (limited to 'lib/git/objects')
-rw-r--r--lib/git/objects/commit.py14
-rw-r--r--lib/git/objects/fun.py7
2 files changed, 20 insertions, 1 deletions
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 132d794b..f3a6e216 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -31,6 +31,7 @@ from time import (
altzone
)
import os
+import sys
__all__ = ('Commit', )
@@ -381,7 +382,13 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
write("encoding %s\n" % self.encoding)
write("\n")
- write(self.message)
+
+ # write plain bytes, be sure its encoded according to our encoding
+ if isinstance(self.message, unicode):
+ write(self.message.encode(self.encoding))
+ else:
+ write(self.message)
+ # END handle encoding
return self
def _deserialize(self, stream):
@@ -421,6 +428,11 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
self.message = stream.read()
+ try:
+ self.message = self.message.decode(self.encoding)
+ except Exception:
+ print >> sys.stderr, "Failed to decode message: %s" % self.message
+ # END exception handling
return self
#} END serializable implementation
diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py
index e73e93b0..9b0a377c 100644
--- a/lib/git/objects/fun.py
+++ b/lib/git/objects/fun.py
@@ -66,7 +66,14 @@ def tree_entries_from_data(data):
while data[i] != '\0':
i += 1
# END while not reached NULL
+
+ # default encoding for strings in git is utf8
+ # Only use the respective unicode object if the byte stream was encoded
name = data[ns:i]
+ name_enc = name.decode("utf-8")
+ if len(name) > len(name_enc):
+ name = name_enc
+ # END handle encoding
# byte is NULL, get next 20
i += 1