diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2015-07-20 08:51:41 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2015-07-20 09:20:00 +0200 |
commit | 9c272abea2c837e4725c37f5c0467f83f3700cd5 (patch) | |
tree | 1f55edfb7cfe0464b22808bf80990e1aea712101 | |
parent | af44258fa472a14ff25b4715f1ab934d177bf1fa (diff) | |
download | gitpython-9c272abea2c837e4725c37f5c0467f83f3700cd5.tar.gz |
fix(encoding): in untracked_files() and index
* untracked_files could, if there were spaces in the path returned,
re-rencode the previously decoded unicode string thanks to a
`decode("string_escape")` call. Now re-encode into utf-8 afterwards
- added test to assure this works indeed
* IndexFile.add() didn't handle unicode correctly and would write
broken index files. The solution was to compute the path length after
encoding it into utf-8 bytes, not before ... .
Closes #320
-rw-r--r-- | git/index/base.py | 4 | ||||
-rw-r--r-- | git/index/fun.py | 3 | ||||
-rw-r--r-- | git/repo/base.py | 2 | ||||
-rw-r--r-- | git/test/test_repo.py | 28 |
4 files changed, 20 insertions, 17 deletions
diff --git a/git/index/base.py b/git/index/base.py index b955dae4..4317d46a 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -583,7 +583,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): stream = None if S_ISLNK(st.st_mode): # in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8 - stream = BytesIO(force_bytes(os.readlink(filepath), encoding='utf-8')) + stream = BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) else: stream = open(filepath, 'rb') # END handle stream @@ -610,7 +610,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): blob = Blob(self.repo, Blob.NULL_BIN_SHA, stat_mode_to_index_mode(os.stat(abspath).st_mode), - to_native_path_linux(gitrelative_path)) + to_native_path_linux(gitrelative_path), encoding=defenc) # TODO: variable undefined entries.append(BaseIndexEntry.from_blob(blob)) # END for each path diff --git a/git/index/fun.py b/git/index/fun.py index c1188ccb..9ae46861 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -124,12 +124,13 @@ def write_cache(entries, stream, extension_data=None, ShaStreamCls=IndexFileSHA1 write(entry[4]) # ctime write(entry[5]) # mtime path = entry[3] + path = path.encode(defenc) plen = len(path) & CE_NAMEMASK # path length assert plen == len(path), "Path %s too long to fit into index" % entry[3] flags = plen | (entry[2] & CE_NAMEMASK_INV) # clear possible previous values write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], entry[8], entry[9], entry[10], entry[1], flags)) - write(path.encode(defenc)) + write(path) real_size = ((tell() - beginoffset + 8) & ~7) write(b"\0" * ((beginoffset + real_size) - tell())) # END for each entry diff --git a/git/repo/base.py b/git/repo/base.py index 8b8f8db8..cea88f39 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -625,7 +625,7 @@ class Repo(object): filename = line[len(prefix):].rstrip('\n') # Special characters are escaped if filename[0] == filename[-1] == '"': - filename = filename[1:-1].decode('string_escape') + filename = filename[1:-1].decode('string_escape').decode(defenc) untracked_files.append(filename) finalize_process(proc) return untracked_files diff --git a/git/test/test_repo.py b/git/test/test_repo.py index 667ede74..9c08e2e4 100644 --- a/git/test/test_repo.py +++ b/git/test/test_repo.py @@ -1,3 +1,4 @@ +#-*-coding:utf-8-*- # test_repo.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # @@ -324,17 +325,21 @@ class TestRepo(TestBase): assert len(res) == 1 assert len(res[0][1]) == 83, "Unexpected amount of parsed blame lines" - def test_untracked_files(self): - base = self.rorepo.working_tree_dir - files = (join_path_native(base, "__test_myfile"), - join_path_native(base, "__test_other_file")) - num_recently_untracked = 0 - try: + @with_rw_repo('HEAD', bare=False) + def test_untracked_files(self, rwrepo): + for (run, repo_add) in enumerate((rwrepo.index.add, rwrepo.git.add)): + base = rwrepo.working_tree_dir + files = (join_path_native(base, u"%i_test _myfile" % run), + join_path_native(base, "%i_test_other_file" % run), + join_path_native(base, u"%i__çava verböten" % run), + join_path_native(base, u"%i_çava-----verböten" % run)) + + num_recently_untracked = 0 for fpath in files: fd = open(fpath, "wb") fd.close() # END for each filename - untracked_files = self.rorepo.untracked_files + untracked_files = rwrepo.untracked_files num_recently_untracked = len(untracked_files) # assure we have all names - they are relative to the git-dir @@ -342,13 +347,10 @@ class TestRepo(TestBase): for utfile in untracked_files: num_test_untracked += join_path_native(base, utfile) in files assert len(files) == num_test_untracked - finally: - for fpath in files: - if os.path.isfile(fpath): - os.remove(fpath) - # END handle files - assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files)) + repo_add(untracked_files) + assert len(rwrepo.untracked_files) == (num_recently_untracked - len(files)) + # end for each run def test_config_reader(self): reader = self.rorepo.config_reader() # all config files |