summaryrefslogtreecommitdiff
path: root/lib/git/index.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/git/index.py')
-rw-r--r--lib/git/index.py510
1 files changed, 510 insertions, 0 deletions
diff --git a/lib/git/index.py b/lib/git/index.py
new file mode 100644
index 00000000..9a55da15
--- /dev/null
+++ b/lib/git/index.py
@@ -0,0 +1,510 @@
+# index.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""
+Module containing Index implementation, allowing to perform all kinds of index
+manipulations such as querying and merging.
+"""
+import struct
+import binascii
+import mmap
+import objects
+import tempfile
+import os
+import stat
+from git.objects import Blob, Tree
+from git.utils import SHA1Writer
+
+class _TemporaryFileSwap(object):
+ """
+ Utility class moving a file to a temporary location within the same directory
+ and moving it back on to where on object deletion.
+ """
+ __slots__ = ("file_path", "tmp_file_path")
+
+ def __init__(self, file_path):
+ self.file_path = file_path
+ self.tmp_file_path = self.file_path + tempfile.mktemp('','','')
+ os.rename(self.file_path, self.tmp_file_path)
+
+ def __del__(self):
+ if os.path.isfile(self.tmp_file_path):
+ os.rename(self.tmp_file_path, self.file_path)
+
+
+class IndexEntry(tuple):
+ """
+ Allows convenient access to IndexEntry data without completely unpacking it.
+
+ Attributes usully accessed often are cached in the tuple whereas others are
+ unpacked on demand.
+
+ See the properties for a mapping between names and tuple indices.
+ """
+ @property
+ def ctime(self):
+ """
+ Returns
+ Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the
+ file's creation time
+ """
+ return struct.unpack(">LL", self[0])
+
+ @property
+ def mtime(self):
+ """
+ See ctime property, but returns modification time
+ """
+ return struct.unpack(">LL", self[1])
+
+ @property
+ def dev(self):
+ """
+ Device ID
+ """
+ return self[2]
+
+ @property
+ def inode(self):
+ """
+ Inode ID
+ """
+ return self[3]
+
+ @property
+ def mode(self):
+ """
+ File Mode, compatible to stat module constants
+ """
+ return self[4]
+
+ @property
+ def uid(self):
+ """
+ User ID
+ """
+ return self[5]
+
+ @property
+ def gid(self):
+ """
+ Group ID
+ """
+ return self[6]
+
+ @property
+ def size(self):
+ """
+ Uncompressed size of the blob
+
+ Note
+ Will be 0 if the stage is not 0 ( hence it is an unmerged entry )
+ """
+ return self[7]
+
+ @property
+ def sha(self):
+ """
+ hex sha of the blob
+ """
+ return self[8]
+
+ @property
+ def stage(self):
+ """
+ Stage of the entry, either:
+ 0 = default stage
+ 1 = stage before a merge or common ancestor entry in case of a 3 way merge
+ 2 = stage of entries from the 'left' side of the merge
+ 3 = stage of entries from the right side of the merge
+ Note:
+ For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html
+ """
+ return self[9]
+
+ @property
+ def path(self):
+ return self[10]
+
+
+ @classmethod
+ def from_blob(cls, blob):
+ """
+ Returns
+ Minimal entry resembling the given blob objecft
+ """
+ time = struct.pack(">LL", 0, 0)
+ return IndexEntry((time, time, 0, 0, blob.mode, 0, 0, blob.size, blob.id, 0, blob.path))
+
+
+class Index(object):
+ """
+ Implements an Index that can be manipulated using a native implementation in
+ order to save git command function calls wherever possible.
+
+ It provides custom merging facilities allowing to merge without actually changing
+ your index or your working tree. This way you can perform own test-merges based
+ on the index only without having to deal with the working copy. This is useful
+ in case of partial working trees.
+
+ ``Entries``
+ The index contains an entries dict whose keys are tuples of type IndexEntry
+ to facilitate access.
+ """
+ __slots__ = ( "repo", "version", "entries", "_extension_data" )
+ _VERSION = 2 # latest version we support
+ S_IFGITLINK = 0160000
+
+ def __init__(self, repo, stream = None):
+ """
+ Initialize this Index instance, optionally from the given ``stream``
+ """
+ self.repo = repo
+ self.entries = dict()
+ self.version = self._VERSION
+ self._extension_data = ''
+ if stream is not None:
+ self._read_from_stream(stream)
+
+ @classmethod
+ def _read_entry(cls, stream):
+ """Return: One entry of the given stream"""
+ beginoffset = stream.tell()
+ ctime = struct.unpack(">8s", stream.read(8))[0]
+ mtime = struct.unpack(">8s", stream.read(8))[0]
+ (dev, ino, mode, uid, gid, size, sha, flags) = \
+ struct.unpack(">LLLLLL20sH", stream.read(20 + 4 * 6 + 2))
+ path_size = flags & 0x0fff
+ path = stream.read(path_size)
+
+ real_size = ((stream.tell() - beginoffset + 8) & ~7)
+ data = stream.read((beginoffset + real_size) - stream.tell())
+ return IndexEntry((ctime, mtime, dev, ino, mode, uid, gid, size,
+ binascii.hexlify(sha), flags >> 12, path))
+
+ @classmethod
+ def _read_header(cls, stream):
+ """Return tuple(version_long, num_entries) from the given stream"""
+ type_id = stream.read(4)
+ if type_id != "DIRC":
+ raise AssertionError("Invalid index file header: %r" % type_id)
+ version, num_entries = struct.unpack(">LL", stream.read(4 * 2))
+ assert version in (1, 2)
+ return version, num_entries
+
+ def _read_from_stream(self, stream):
+ """
+ Initialize this instance with index values read from the given stream
+
+ Note
+ We explicitly do not clear the entries dict here to allow for reading
+ multiple chunks from multiple streams into the same Index instance
+ """
+ self.version, num_entries = self._read_header(stream)
+ count = 0
+ while count < num_entries:
+ entry = self._read_entry(stream)
+ self.entries[(entry.path, entry.stage)] = entry
+ count += 1
+ # END for each entry
+
+ # the footer contains extension data and a sha on the content so far
+ # Keep the extension footer,and verify we have a sha in the end
+ self._extension_data = stream.read(~0)
+ assert len(self._extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(self._extension_data)
+
+ content_sha = self._extension_data[-20:]
+
+ # truncate the sha in the end as we will dynamically create it anyway
+ self._extension_data = self._extension_data[:-20]
+
+
+ @classmethod
+ def from_file(cls, repo, file_path):
+ """
+ Returns
+ Index instance as recreated from the given stream.
+
+ ``repo``
+ Repository the index is related to
+
+ ``file_pa ``
+ File path pointing to git index file
+
+ Note
+ Reading is based on the dulwich project.
+ """
+ fp = open(file_path, "r")
+
+ # try memory map for speed
+ stream = fp
+ try:
+ stream = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
+ except Exception:
+ pass
+ # END memory mapping
+
+ try:
+ return cls(repo, stream)
+ finally:
+ fp.close()
+
+
+ @classmethod
+ def to_file(cls, index, file_path):
+ """
+ Write the index data to the given file path.
+
+ ``index``
+ Index you wish to write.
+
+ ``file_path``
+ Path at which to write the index data. Please note that missing directories
+ will lead to an exception to be thrown.
+
+ Raise
+ IOError if the file could not be written
+ """
+ fp = open(file_path, "w")
+ try:
+ return index.write(fp)
+ finally:
+ fp.close()
+ # END exception handling
+
+
+ @classmethod
+ def _write_cache_entry(cls, stream, entry):
+ """
+ Write an IndexEntry to a stream
+ """
+ beginoffset = stream.tell()
+ stream.write(entry[0]) # ctime
+ stream.write(entry[1]) # mtime
+ path = entry[10]
+ plen = len(path) & 0x0fff # path length
+ assert plen == len(path), "Path %s too long to fit into index" % entry[10]
+ flags = plen | (entry[9] << 12)# stage and path length are 2 byte flags
+ stream.write(struct.pack(">LLLLLL20sH", entry[2], entry[3], entry[4],
+ entry[5], entry[6], entry[7], binascii.unhexlify(entry[8]), flags))
+ stream.write(path)
+ real_size = ((stream.tell() - beginoffset + 8) & ~7)
+ stream.write("\0" * ((beginoffset + real_size) - stream.tell()))
+
+ def write(self, stream):
+ """
+ Write the current state to the given stream
+
+ ``stream``
+ File-like object
+
+ Returns
+ self
+
+ Note
+ Index writing based on the dulwich implementation
+ """
+ stream = SHA1Writer(stream)
+
+ # header
+ stream.write("DIRC")
+ stream.write(struct.pack(">LL", self.version, len(self.entries)))
+
+ # body
+ entries_sorted = self.entries.values()
+ entries_sorted.sort(key=lambda e: (e[10], e[9])) # use path/stage as sort key
+ for entry in entries_sorted:
+ self._write_cache_entry(stream, entry)
+ # END for each entry
+
+ # write previously cached extensions data
+ stream.write(self._extension_data)
+
+ # write the sha over the content
+ stream.write_sha()
+
+
+ @classmethod
+ def from_tree(cls, repo, *treeish, **kwargs):
+ """
+ Merge the given treeish revisions into a new index which is returned.
+ The original index will remain unaltered
+
+ ``repo``
+ The repository treeish are located in.
+
+ ``*treeish``
+ One, two or three Tree Objects or Commits. The result changes according to the
+ amount of trees.
+ If 1 Tree is given, it will just be read into a new index
+ If 2 Trees are given, they will be merged into a new index using a
+ two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other'
+ one. It behaves like a fast-forward.
+ If 3 Trees are given, a 3-way merge will be performed with the first tree
+ being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree,
+ tree 3 is the 'other' one
+
+ ``**kwargs``
+ Additional arguments passed to git-read-tree
+
+ Note:
+ In the three-way merge case, --aggressive will be specified to automatically
+ resolve more cases in a commonly correct manner. Specify trivial=True as kwarg
+ to override that.
+
+ As the underlying git-read-tree command takes into account the current index,
+ it will be temporarily moved out of the way to assure there are no unsuspected
+ interferences.
+ """
+ if len(treeish) == 0 or len(treeish) > 3:
+ raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish))
+
+ arg_list = list()
+ # ignore that working tree and index possibly are out of date
+ if len(treeish)>1:
+ # drop unmerged entries when reading our index and merging
+ arg_list.append("--reset")
+ # handle non-trivial cases the way a real merge does
+ arg_list.append("--aggressive")
+ # END merge handling
+
+ # tmp file created in git home directory to be sure renaming
+ # works - /tmp/ dirs could be on another device
+ tmp_index = tempfile.mktemp('','',repo.path)
+ arg_list.append("--index-output=%s" % tmp_index)
+ arg_list.extend(treeish)
+
+ # move current index out of the way - otherwise the merge may fail
+ # as it considers existing entries. moving it essentially clears the index.
+ # Unfortunately there is no 'soft' way to do it.
+ # The _TemporaryFileSwap assure the original file get put back
+ index_handler = _TemporaryFileSwap(os.path.join(repo.path, 'index'))
+ try:
+ repo.git.read_tree(*arg_list, **kwargs)
+ index = cls.from_file(repo, tmp_index)
+ finally:
+ if os.path.exists(tmp_index):
+ os.remove(tmp_index)
+ # END index merge handling
+
+ return index
+
+ @classmethod
+ def _index_mode_to_tree_index_mode(cls, index_mode):
+ """
+ Cleanup a index_mode value.
+ This will return a index_mode that can be stored in a tree object.
+
+ ``index_mode``
+ Index_mode to clean up.
+ """
+ if stat.S_ISLNK(index_mode):
+ return stat.S_IFLNK
+ elif stat.S_ISDIR(index_mode):
+ return stat.S_IFDIR
+ elif stat.S_IFMT(index_mode) == cls.S_IFGITLINK:
+ return cls.S_IFGITLINK
+ ret = stat.S_IFREG | 0644
+ ret |= (index_mode & 0111)
+ return ret
+
+ def iter_blobs(self, predicate = lambda t: True):
+ """
+ Returns
+ Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob)
+
+ ``predicate``
+ Function(t) returning True if tuple(stage, Blob) should be yielded by the
+ iterator
+ """
+ for entry in self.entries.itervalues():
+ mode = self._index_mode_to_tree_index_mode(entry.mode)
+ blob = Blob(self.repo, entry.sha, mode, entry.path)
+ blob.size = entry.size
+ output = (entry.stage, blob)
+ if predicate(output):
+ yield output
+ # END for each entry
+
+ def unmerged_blobs(self):
+ """
+ Returns
+ Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being
+ a dictionary associating a path in the index with a list containing
+ stage/blob pairs
+
+ Note:
+ Blobs that have been removed in one side simply do not exist in the
+ given stage. I.e. a file removed on the 'other' branch whose entries
+ are at stage 3 will not have a stage 3 entry.
+ """
+ is_unmerged_blob = lambda t: t[0] != 0
+ path_map = dict()
+ for stage, blob in self.iter_blobs(is_unmerged_blob):
+ path_map.setdefault(blob.path, list()).append((stage, blob))
+ # END for each unmerged blob
+
+ return path_map
+
+ def resolve_blobs(self, iter_blobs):
+ """
+ Resolve the blobs given in blob iterator. This will effectively remove the
+ index entries of the respective path at all non-null stages and add the given
+ blob as new stage null blob.
+
+ For each path there may only be one blob, otherwise a ValueError will be raised
+ claiming the path is already at stage 0.
+
+ Raise
+ ValueError if one of the blobs already existed at stage 0
+
+ Returns:
+ self
+ """
+ for blob in iter_blobs:
+ stage_null_key = (blob.path, 0)
+ if stage_null_key in self.entries:
+ raise ValueError( "Blob %r already at stage 0" % blob )
+ # END assert blob is not stage 0 already
+
+ # delete all possible stages
+ for stage in (1, 2, 3):
+ try:
+ del( self.entries[(blob.path, stage)] )
+ except KeyError:
+ pass
+ # END ignore key errors
+ # END for each possible stage
+
+ self.entries[stage_null_key] = IndexEntry.from_blob(blob)
+ # END for each blob
+
+ return self
+
+ def write_tree(self):
+ """
+ Writes the Index in self to a corresponding Tree file into the repository
+ object database and returns it as corresponding Tree object.
+
+ Returns
+ Tree object representing this index
+ """
+ index_path = os.path.join(self.repo.path, "index")
+ tmp_index_mover = _TemporaryFileSwap(index_path)
+
+ self.to_file(self, index_path)
+
+ try:
+ tree_sha = self.repo.git.write_tree()
+ finally:
+ # remove our index file so that the original index can move back into place
+ # On linux it will silently overwrite, on windows it won't
+ if os.path.isfile(index_path):
+ os.remove(index_path)
+ # END remove our own index file beforehand
+ # END write tree handling
+ return Tree(self.repo, tree_sha, 0, '')
+
+