summaryrefslogtreecommitdiff
path: root/lib/git/objects
diff options
context:
space:
mode:
Diffstat (limited to 'lib/git/objects')
-rw-r--r--lib/git/objects/base.py43
-rw-r--r--lib/git/objects/commit.py91
-rw-r--r--lib/git/objects/tag.py7
-rw-r--r--lib/git/objects/tree.py252
-rw-r--r--lib/git/objects/utils.py (renamed from lib/git/objects/util.py)0
5 files changed, 268 insertions, 125 deletions
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 43aa8dd1..07538ada 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -4,32 +4,10 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os
-
-class LazyMixin(object):
- lazy_properties = []
- __slots__ = tuple()
+from git.utils import LazyMixin
- def __getattr__(self, attr):
- """
- Whenever an attribute is requested that we do not know, we allow it
- to be created and set. Next time the same attribute is reqeusted, it is simply
- returned from our dict/slots.
- """
- self._set_cache_(attr)
- # will raise in case the cache was not created
- return object.__getattribute__(self, attr)
+_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
- def _set_cache_(self, attr):
- """ This method should be overridden in the derived class.
- It should check whether the attribute named by attr can be created
- and cached. Do nothing if you do not know the attribute or call your subclass
-
- The derived class may create as many additional attributes as it deems
- necessary in case a git command returns more information than represented
- in the single attribute."""
- pass
-
-
class Object(LazyMixin):
"""
Implements an Object which may be Blobs, Trees, Commits and Tags
@@ -71,9 +49,13 @@ class Object(LazyMixin):
Retrieve object information
"""
if attr == "size":
- self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip())
+ hexsha, typename, self.size = self.repo.git.get_object_header(self.id)
+ assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
elif attr == "data":
- self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True)
+ hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id)
+ assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
def __eq__(self, other):
"""
@@ -143,8 +125,15 @@ class IndexObject(Object):
if isinstance(mode, basestring):
self.mode = self._mode_str_to_int(mode)
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+
@classmethod
- def _mode_str_to_int( cls, modestr ):
+ def _mode_str_to_int(cls, modestr):
"""
``modestr``
string like 755 or 644 or 100644 - only the last 3 chars will be used
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index c3e97bf9..101014ab 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -6,14 +6,14 @@
import re
import time
-
+from git.utils import Iterable
from git.actor import Actor
-from tree import Tree
import git.diff as diff
import git.stats as stats
+from tree import Tree
import base
-class Commit(base.Object):
+class Commit(base.Object, Iterable):
"""
Wraps a git Commit object.
@@ -37,7 +37,7 @@ class Commit(base.Object):
The parameter documentation indicates the type of the argument after a colon ':'.
``id``
- is the sha id of the commit
+ is the sha id of the commit or a ref
``parents`` : tuple( Commit, ... )
is a tuple of commit ids or actual Commits
@@ -71,7 +71,7 @@ class Commit(base.Object):
# END for each parent to convert
if self.id and tree is not None:
- self.tree = Tree(repo, id=tree)
+ self.tree = Tree(repo, id=tree, path='')
# END id to tree conversion
def _set_cache_(self, attr):
@@ -80,8 +80,11 @@ class Commit(base.Object):
to be set.
We set all values at once.
"""
- if attr in self.__slots__:
- temp = Commit.find_all(self.repo, self.id, max_count=1)[0]
+ if attr in Commit.__slots__:
+ # prepare our data lines to match rev-list
+ data_lines = self.data.splitlines()
+ data_lines.insert(0, "commit %s" % self.id)
+ temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next()
self.parents = temp.parents
self.tree = temp.tree
self.author = temp.author
@@ -120,7 +123,7 @@ class Commit(base.Object):
return len(repo.git.rev_list(ref, '--', path).strip().splitlines())
@classmethod
- def find_all(cls, repo, ref, path='', **kwargs):
+ def iter_items(cls, repo, ref, path='', **kwargs):
"""
Find all commits matching the given criteria.
@@ -128,7 +131,7 @@ class Commit(base.Object):
is the Repo
``ref``
- is the ref from which to begin (SHA1 or name)
+ is the ref from which to begin (SHA1, Head or name)
``path``
is an optinal path, if set only Commits that include the path
@@ -140,55 +143,67 @@ class Commit(base.Object):
``skip`` is the number of commits to skip
Returns
- git.Commit[]
+ iterator yielding Commit items
"""
- options = {'pretty': 'raw'}
+ options = {'pretty': 'raw', 'as_process' : True }
options.update(kwargs)
- output = repo.git.rev_list(ref, '--', path, **options)
- return cls._list_from_string(repo, output)
+ # the test system might confront us with string values -
+ proc = repo.git.rev_list(ref, '--', path, **options)
+ return cls._iter_from_process_or_stream(repo, proc)
@classmethod
- def _list_from_string(cls, repo, text):
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
"""
Parse out commit information into a list of Commit objects
``repo``
is the Repo
- ``text``
- is the text output from the git-rev-list command (raw format)
+ ``proc``
+ git-rev-list process instance (raw format)
Returns
- git.Commit[]
+ iterator returning Commit objects
"""
- lines =text.splitlines(False)
- commits = []
-
- while lines:
- id = lines.pop(0).split()[1]
- tree = lines.pop(0).split()[1]
+ stream = proc_or_stream
+ if not hasattr(stream,'next'):
+ stream = proc_or_stream.stdout
+
+ for line in stream:
+ id = line.split()[1]
+ assert line.split()[0] == "commit"
+ tree = stream.next().split()[1]
parents = []
- while lines and lines[0].startswith('parent'):
- parents.append(lines.pop(0).split()[-1])
- # END while there are parent lines
- author, authored_date = cls._actor(lines.pop(0))
- committer, committed_date = cls._actor(lines.pop(0))
+ next_line = None
+ for parent_line in stream:
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ parents.append(parent_line.split()[-1])
+ # END for each parent line
+
+ author, authored_date = cls._actor(next_line)
+ committer, committed_date = cls._actor(stream.next())
- # free line
- lines.pop(0)
+ # empty line
+ stream.next()
message_lines = []
- while lines and not lines[0].startswith('commit'):
- message_lines.append(lines.pop(0).strip())
+ next_line = None
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ break
+ # END abort message reading
+ message_lines.append(msg_line.strip())
# END while there are message lines
- message = '\n'.join(message_lines[:-1]) # last line is empty
-
- commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
- committer=committer, committed_date=committed_date, message=message))
- # END while lines
- return commits
+ message = '\n'.join(message_lines)
+
+ yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
+ committer=committer, committed_date=committed_date, message=message)
+ # END for each line in stream
@classmethod
def diff(cls, repo, a, b=None, paths=None):
diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py
index af1022f0..ecf6349d 100644
--- a/lib/git/objects/tag.py
+++ b/lib/git/objects/tag.py
@@ -8,7 +8,7 @@ Module containing all object based types.
"""
import base
import commit
-from util import get_object_type_by_name
+from utils import get_object_type_by_name
class TagObject(base.Object):
"""
@@ -48,9 +48,8 @@ class TagObject(base.Object):
"""
Cache all our attributes at once
"""
- if attr in self.__slots__:
- output = self.repo.git.cat_file(self.type,self.id)
- lines = output.split("\n")
+ if attr in TagObject.__slots__:
+ lines = self.data.splitlines()
obj, hexsha = lines[0].split(" ") # object <hexsha>
type_token, type_name = lines[1].split(" ") # type <type_name>
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index 273384a3..abfa9622 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -7,53 +7,125 @@
import os
import blob
import base
+import binascii
+
+def sha_to_hex(sha):
+ """Takes a string and returns the hex of the sha within"""
+ hexsha = binascii.hexlify(sha)
+ assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
+ return hexsha
class Tree(base.IndexObject):
+ """
+ Tress represent a ordered list of Blobs and other Trees. Hence it can be
+ accessed like a list.
+
+ Tree's will cache their contents after first retrieval to improve efficiency.
+
+ ``Tree as a list``::
+
+ Access a specific blob using the
+ tree['filename'] notation.
+
+ You may as well access by index
+ blob = tree[0]
+
+
+ """
type = "tree"
- __slots__ = "_contents"
+ __slots__ = "_cache"
+
+ # using ascii codes for comparison
+ ascii_commit_id = (0x31 << 4) + 0x36
+ ascii_blob_id = (0x31 << 4) + 0x30
+ ascii_tree_id = (0x34 << 4) + 0x30
- def __init__(self, repo, id, mode=None, path=None):
+
+ def __init__(self, repo, id, mode=0, path=None):
super(Tree, self).__init__(repo, id, mode, path)
def _set_cache_(self, attr):
- if attr == "_contents":
- # Read the tree contents.
- self._contents = {}
- for line in self.repo.git.ls_tree(self.id).splitlines():
- obj = self.content__from_string(self.repo, line)
- if obj is not None:
- self._contents[obj.path] = obj
+ if attr == "_cache":
+ # Set the data when we need it
+ self._cache = self._get_tree_cache()
else:
super(Tree, self)._set_cache_(attr)
- @staticmethod
- def content__from_string(repo, text):
+ def _get_tree_cache(self):
"""
- Parse a content item and create the appropriate object
-
- ``repo``
- is the Repo
-
- ``text``
- is the single line containing the items data in `git ls-tree` format
-
+ Return
+ list(object_instance, ...)
+
+ ``treeish``
+ sha or ref identifying a tree
+ """
+ out = list()
+ for obj in self._iter_from_data():
+ if obj is not None:
+ out.append(obj)
+ # END if object was handled
+ # END for each line from ls-tree
+ return out
+
+
+ def _iter_from_data(self):
+ """
+ Reads the binary non-pretty printed representation of a tree and converts
+ it into Blob, Tree or Commit objects.
+
+ Note: This method was inspired by the parse_tree method in dulwich.
+
Returns
- ``git.Blob`` or ``git.Tree``
+ list(IndexObject, ...)
"""
- try:
- mode, typ, id, path = text.expandtabs(1).split(" ", 3)
- except:
- return None
+ ord_zero = ord('0')
+ data = self.data
+ len_data = len(data)
+ i = 0
+ while i < len_data:
+ mode = 0
+ mode_boundary = i + 6
+
+ # keep it ascii - we compare against the respective values
+ type_id = (ord(data[i])<<4) + ord(data[i+1])
+ i += 2
+
+ while data[i] != ' ':
+ # move existing mode integer up one level being 3 bits
+ # and add the actual ordinal value of the character
+ mode = (mode << 3) + (ord(data[i]) - ord_zero)
+ i += 1
+ # END while reading mode
+
+ # byte is space now, skip it
+ i += 1
+
+ # parse name, it is NULL separated
+
+ ns = i
+ while data[i] != '\0':
+ i += 1
+ # END while not reached NULL
+ name = data[ns:i]
+
+ # byte is NULL, get next 20
+ i += 1
+ sha = data[i:i+20]
+ i = i + 20
+
+ hexsha = sha_to_hex(sha)
+ if type_id == self.ascii_blob_id:
+ yield blob.Blob(self.repo, hexsha, mode, name)
+ elif type_id == self.ascii_tree_id:
+ yield Tree(self.repo, hexsha, mode, name)
+ elif type_id == self.ascii_commit_id:
+ # todo
+ yield None
+ else:
+ raise TypeError( "Unknown type found in tree data: %i" % type_id )
+ # END for each byte in data stream
- if typ == "tree":
- return Tree(repo, id, mode, path)
- elif typ == "blob":
- return blob.Blob(repo, id, mode, path)
- elif typ == "commit":
- return None
- else:
- raise(TypeError, "Invalid type: %s" % typ)
def __div__(self, file):
"""
@@ -67,36 +139,104 @@ class Tree(base.IndexObject):
<git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df">
Returns
- ``git.Blob`` or ``git.Tree`` or ``None`` if not found
+ ``git.Blob`` or ``git.Tree``
+
+ Raise
+ KeyError if given file or tree does not exist in tree
"""
- return self.get(file)
+ return self[file]
def __repr__(self):
return '<git.Tree "%s">' % self.id
+
+ @classmethod
+ def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ):
+
+ for obj in tree:
+ # adjust path to be complete
+ obj.path = os.path.join(tree.path, obj.path)
+ if not predicate(obj):
+ continue
+ yield obj
+ if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ):
+ for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ):
+ yield recursive_obj
+ # END for each recursive object
+ # END if we may enter recursion
+ # END for each object
+
+ def traverse(self, max_depth=-1, predicate = lambda i: True):
+ """
+ Returns
+ Iterator to traverse the tree recursively up to the given level.
+ The iterator returns Blob and Tree objects
+
+ ``max_depth``
+
+ if -1, the whole tree will be traversed
+ if 0, only the first level will be traversed which is the same as
+ the default non-recursive iterator
+
+ ``predicate``
+
+ If predicate(item) returns True, item will be returned by iterator
+ """
+ return self._iter_recursive( self.repo, self, 0, max_depth, predicate )
+
+ @property
+ def trees(self):
+ """
+ Returns
+ list(Tree, ...) list of trees directly below this tree
+ """
+ return [ i for i in self if i.type == "tree" ]
+
+ @property
+ def blobs(self):
+ """
+ Returns
+ list(Blob, ...) list of blobs directly below this tree
+ """
+ return [ i for i in self if i.type == "blob" ]
- # Implement the basics of the dict protocol:
- # directories/trees can be seen as object dicts.
- def __getitem__(self, key):
- return self._contents[key]
+ # List protocol
+ def __getslice__(self,i,j):
+ return self._cache[i:j]
+
def __iter__(self):
- return iter(self._contents)
-
+ return iter(self._cache)
+
def __len__(self):
- return len(self._contents)
-
- def __contains__(self, key):
- return key in self._contents
-
- def get(self, key):
- return self._contents.get(key)
-
- def items(self):
- return self._contents.items()
-
- def keys(self):
- return self._contents.keys()
-
- def values(self):
- return self._contents.values()
+ return len(self._cache)
+
+ def __getitem__(self,item):
+ if isinstance(item, int):
+ return self._cache[item]
+
+ if isinstance(item, basestring):
+ # compatability
+ for obj in self._cache:
+ if obj.path == item:
+ return obj
+ # END for each obj
+ raise KeyError( "Blob or Tree named %s not found" % item )
+ # END index is basestring
+
+ raise TypeError( "Invalid index type: %r" % item )
+
+
+ def __contains__(self,item):
+ if isinstance(item, base.IndexObject):
+ return item in self._cache
+
+ # compatability
+ for obj in self._cache:
+ if item == obj.path:
+ return True
+ # END for each item
+ return False
+
+ def __reversed__(self):
+ return reversed(self._cache)
diff --git a/lib/git/objects/util.py b/lib/git/objects/utils.py
index 15c1d114..15c1d114 100644
--- a/lib/git/objects/util.py
+++ b/lib/git/objects/utils.py