diff options
Diffstat (limited to 'git/objects/fun.py')
-rw-r--r-- | git/objects/fun.py | 364 |
1 files changed, 182 insertions, 182 deletions
diff --git a/git/objects/fun.py b/git/objects/fun.py index 9b0a377c..f73be542 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -2,198 +2,198 @@ from stat import S_ISDIR __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', - 'traverse_tree_recursive') + 'traverse_tree_recursive') - + def tree_to_stream(entries, write): - """Write the give list of entries into a stream using its write method - :param entries: **sorted** list of tuples with (binsha, mode, name) - :param write: write method which takes a data string""" - ord_zero = ord('0') - bit_mask = 7 # 3 bits set - - for binsha, mode, name in entries: - mode_str = '' - for i in xrange(6): - mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str - # END for each 8 octal value - - # git slices away the first octal if its zero - if mode_str[0] == '0': - mode_str = mode_str[1:] - # END save a byte + """Write the give list of entries into a stream using its write method + :param entries: **sorted** list of tuples with (binsha, mode, name) + :param write: write method which takes a data string""" + ord_zero = ord('0') + bit_mask = 7 # 3 bits set + + for binsha, mode, name in entries: + mode_str = '' + for i in xrange(6): + mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str + # END for each 8 octal value + + # git slices away the first octal if its zero + if mode_str[0] == '0': + mode_str = mode_str[1:] + # END save a byte - # here it comes: if the name is actually unicode, the replacement below - # will not work as the binsha is not part of the ascii unicode encoding - - # hence we must convert to an utf8 string for it to work properly. - # According to my tests, this is exactly what git does, that is it just - # takes the input literally, which appears to be utf8 on linux. - if isinstance(name, unicode): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) - # END for each item + # here it comes: if the name is actually unicode, the replacement below + # will not work as the binsha is not part of the ascii unicode encoding - + # hence we must convert to an utf8 string for it to work properly. + # According to my tests, this is exactly what git does, that is it just + # takes the input literally, which appears to be utf8 on linux. + if isinstance(name, unicode): + name = name.encode("utf8") + write("%s %s\0%s" % (mode_str, name, binsha)) + # END for each item def tree_entries_from_data(data): - """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data - :return: list(tuple(binsha, mode, tree_relative_path), ...)""" - ord_zero = ord('0') - len_data = len(data) - i = 0 - out = list() - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - - # default encoding for strings in git is utf8 - # Only use the respective unicode object if the byte stream was encoded - name = data[ns:i] - name_enc = name.decode("utf-8") - if len(name) > len(name_enc): - name = name_enc - # END handle encoding - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - out.append((sha, mode, name)) - # END for each byte in data stream - return out - - + """Reads the binary representation of a tree and returns tuples of Tree items + :param data: data block with tree data + :return: list(tuple(binsha, mode, tree_relative_path), ...)""" + ord_zero = ord('0') + len_data = len(data) + i = 0 + out = list() + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + + # default encoding for strings in git is utf8 + # Only use the respective unicode object if the byte stream was encoded + name = data[ns:i] + name_enc = name.decode("utf-8") + if len(name) > len(name_enc): + name = name_enc + # END handle encoding + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + out.append((sha, mode, name)) + # END for each byte in data stream + return out + + def _find_by_name(tree_data, name, is_dir, start_at): - """return data entry matching the given name and tree mode - or None. - Before the item is returned, the respective data item is set - None in the tree_data list to mark it done""" - try: - item = tree_data[start_at] - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[start_at] = None - return item - except IndexError: - pass - # END exception handling - for index, item in enumerate(tree_data): - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[index] = None - return item - # END if item matches - # END for each item - return None + """return data entry matching the given name and tree mode + or None. + Before the item is returned, the respective data item is set + None in the tree_data list to mark it done""" + try: + item = tree_data[start_at] + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[start_at] = None + return item + except IndexError: + pass + # END exception handling + for index, item in enumerate(tree_data): + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[index] = None + return item + # END if item matches + # END for each item + return None def _to_full_path(item, path_prefix): - """Rebuild entry with given path prefix""" - if not item: - return item - return (item[0], item[1], path_prefix+item[2]) - + """Rebuild entry with given path prefix""" + if not item: + return item + return (item[0], item[1], path_prefix+item[2]) + def traverse_trees_recursive(odb, tree_shas, path_prefix): - """ - :return: list with entries according to the given binary tree-shas. - The result is encoded in a list - of n tuple|None per blob/commit, (n == len(tree_shas)), where - * [0] == 20 byte sha - * [1] == mode as int - * [2] == path relative to working tree root - The entry tuple is None if the respective blob/commit did not - exist in the given tree. - :param tree_shas: iterable of shas pointing to trees. All trees must - be on the same level. A tree-sha may be None in which case None - :param path_prefix: a prefix to be added to the returned paths on this level, - set it '' for the first iteration - :note: The ordering of the returned items will be partially lost""" - trees_data = list() - nt = len(tree_shas) - for tree_sha in tree_shas: - if tree_sha is None: - data = list() - else: - data = tree_entries_from_data(odb.stream(tree_sha).read()) - # END handle muted trees - trees_data.append(data) - # END for each sha to get data for - - out = list() - out_append = out.append - - # find all matching entries and recursively process them together if the match - # is a tree. If the match is a non-tree item, put it into the result. - # Processed items will be set None - for ti, tree_data in enumerate(trees_data): - for ii, item in enumerate(tree_data): - if not item: - continue - # END skip already done items - entries = [ None for n in range(nt) ] - entries[ti] = item - sha, mode, name = item # its faster to unpack - is_dir = S_ISDIR(mode) # type mode bits - - # find this item in all other tree data items - # wrap around, but stop one before our current index, hence - # ti+nt, not ti+1+nt - for tio in range(ti+1, ti+nt): - tio = tio % nt - entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) - # END for each other item data - - # if we are a directory, enter recursion - if is_dir: - out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) - else: - out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) - # END handle recursion - - # finally mark it done - tree_data[ii] = None - # END for each item - - # we are done with one tree, set all its data empty - del(tree_data[:]) - # END for each tree_data chunk - return out - + """ + :return: list with entries according to the given binary tree-shas. + The result is encoded in a list + of n tuple|None per blob/commit, (n == len(tree_shas)), where + * [0] == 20 byte sha + * [1] == mode as int + * [2] == path relative to working tree root + The entry tuple is None if the respective blob/commit did not + exist in the given tree. + :param tree_shas: iterable of shas pointing to trees. All trees must + be on the same level. A tree-sha may be None in which case None + :param path_prefix: a prefix to be added to the returned paths on this level, + set it '' for the first iteration + :note: The ordering of the returned items will be partially lost""" + trees_data = list() + nt = len(tree_shas) + for tree_sha in tree_shas: + if tree_sha is None: + data = list() + else: + data = tree_entries_from_data(odb.stream(tree_sha).read()) + # END handle muted trees + trees_data.append(data) + # END for each sha to get data for + + out = list() + out_append = out.append + + # find all matching entries and recursively process them together if the match + # is a tree. If the match is a non-tree item, put it into the result. + # Processed items will be set None + for ti, tree_data in enumerate(trees_data): + for ii, item in enumerate(tree_data): + if not item: + continue + # END skip already done items + entries = [ None for n in range(nt) ] + entries[ti] = item + sha, mode, name = item # its faster to unpack + is_dir = S_ISDIR(mode) # type mode bits + + # find this item in all other tree data items + # wrap around, but stop one before our current index, hence + # ti+nt, not ti+1+nt + for tio in range(ti+1, ti+nt): + tio = tio % nt + entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) + # END for each other item data + + # if we are a directory, enter recursion + if is_dir: + out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) + else: + out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) + # END handle recursion + + # finally mark it done + tree_data[ii] = None + # END for each item + + # we are done with one tree, set all its data empty + del(tree_data[:]) + # END for each tree_data chunk + return out + def traverse_tree_recursive(odb, tree_sha, path_prefix): - """ - :return: list of entries of the tree pointed to by the binary tree_sha. An entry - has the following format: - * [0] 20 byte sha - * [1] mode as int - * [2] path relative to the repository - :param path_prefix: prefix to prepend to the front of all returned paths""" - entries = list() - data = tree_entries_from_data(odb.stream(tree_sha).read()) - - # unpacking/packing is faster than accessing individual items - for sha, mode, name in data: - if S_ISDIR(mode): - entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) - else: - entries.append((sha, mode, path_prefix+name)) - # END for each item - - return entries + """ + :return: list of entries of the tree pointed to by the binary tree_sha. An entry + has the following format: + * [0] 20 byte sha + * [1] mode as int + * [2] path relative to the repository + :param path_prefix: prefix to prepend to the front of all returned paths""" + entries = list() + data = tree_entries_from_data(odb.stream(tree_sha).read()) + + # unpacking/packing is faster than accessing individual items + for sha, mode, name in data: + if S_ISDIR(mode): + entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) + else: + entries.append((sha, mode, path_prefix+name)) + # END for each item + + return entries |