diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-08-13 14:04:11 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-08-13 14:21:18 +0200 |
commit | 394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3 (patch) | |
tree | ba71450a7f5d95bc524d87056e81efa4b6ac6c9e | |
parent | 192472f9673b18c91ce618e64e935f91769c50e7 (diff) | |
download | gitpython-394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3.tar.gz |
unicode handling in messages and trees was improved. Messages are now written according to the encoding of the commit object, and decoded using that information as well. Trees will encode and decode their names with utf8
m--------- | lib/git/ext/gitdb | 0 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 14 | ||||
-rw-r--r-- | lib/git/objects/fun.py | 7 |
3 files changed, 20 insertions, 1 deletions
diff --git a/lib/git/ext/gitdb b/lib/git/ext/gitdb -Subproject 18152febd428e67b86bb4fb68ec1691d4de75a9 +Subproject 425ecf04aa5038c3d46b01ca20de17c51ef6c4e diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 132d794b..f3a6e216 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -31,6 +31,7 @@ from time import ( altzone ) import os +import sys __all__ = ('Commit', ) @@ -381,7 +382,13 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): write("encoding %s\n" % self.encoding) write("\n") - write(self.message) + + # write plain bytes, be sure its encoded according to our encoding + if isinstance(self.message, unicode): + write(self.message.encode(self.encoding)) + else: + write(self.message) + # END handle encoding return self def _deserialize(self, stream): @@ -421,6 +428,11 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # a stream from our data simply gives us the plain message # The end of our message stream is marked with a newline that we strip self.message = stream.read() + try: + self.message = self.message.decode(self.encoding) + except Exception: + print >> sys.stderr, "Failed to decode message: %s" % self.message + # END exception handling return self #} END serializable implementation diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py index e73e93b0..9b0a377c 100644 --- a/lib/git/objects/fun.py +++ b/lib/git/objects/fun.py @@ -66,7 +66,14 @@ def tree_entries_from_data(data): while data[i] != '\0': i += 1 # END while not reached NULL + + # default encoding for strings in git is utf8 + # Only use the respective unicode object if the byte stream was encoded name = data[ns:i] + name_enc = name.decode("utf-8") + if len(name) > len(name_enc): + name = name_enc + # END handle encoding # byte is NULL, get next 20 i += 1 |