summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES34
-rw-r--r--lib/git/cmd.py120
-rw-r--r--lib/git/objects/base.py43
-rw-r--r--lib/git/objects/commit.py91
-rw-r--r--lib/git/objects/tag.py7
-rw-r--r--lib/git/objects/tree.py252
-rw-r--r--lib/git/objects/utils.py (renamed from lib/git/objects/util.py)0
-rw-r--r--lib/git/refs.py137
-rw-r--r--lib/git/repo.py99
-rw-r--r--lib/git/utils.py63
-rw-r--r--test/git/test_base.py23
-rw-r--r--test/git/test_blob.py49
-rw-r--r--test/git/test_commit.py29
-rw-r--r--test/git/test_git.py35
-rw-r--r--test/git/test_performance.py38
-rw-r--r--test/git/test_repo.py48
-rw-r--r--test/git/test_tree.py159
-rw-r--r--test/testlib/__init__.py1
-rw-r--r--test/testlib/helper.py11
19 files changed, 741 insertions, 498 deletions
diff --git a/CHANGES b/CHANGES
index 78d02b9a..c4ea13b0 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,9 +2,8 @@
CHANGES
=======
-0.1.X
+0.2
=====
-( Future Release )
General
-------
* file mode in Tree, Blob and Diff objects now is an int compatible to definintiions
@@ -19,7 +18,16 @@ General
* from_string and list_from_string methods are now private and were renamed to
_from_string and _list_from_string respectively. As part of the private API, they
may change without prior notice.
-
+* Renamed all find_all methods to list_items - this method is part of the Iterable interface
+ that also provides a more efficients and more responsive iter_items method
+
+Item Iteration
+--------------
+* Previously one would return and process multiple items as list only which can
+ hurt performance and memory consumption and reduce response times.
+ iter_items method provide an iterator that will return items on demand as parsed
+ from a stream. This way any amount of objects can be handled.
+
objects Package
----------------
* blob, tree, tag and commit module have been moved to new objects package. This should
@@ -29,6 +37,13 @@ objects Package
Repo
----
* Moved blame method from Blob to repo as it appeared to belong there much more.
+* active_branch method now returns a Head object instead of a string with the name
+ of the active branch.
+* tree method now requires a Ref instance as input and defaults to the active_branche
+ instead of master
+* Removed 'log' method as it as effectively the same as the 'commits' method
+* 'commits' method has no max-count of returned commits anymore, it now behaves
+ like git-rev-list
Diff
----
@@ -43,7 +58,18 @@ Blob
Tree
----
* former 'name' member renamed to path as it suits the actual data better
-
+* added traverse method allowing to recursively traverse tree items
+* deleted blob method
+* added blobs and trees properties allowing to query the respective items in the
+ tree
+* now mimics behaviour of a read-only list instead of a dict to maintain order.
+* content_from_string method is now private and not part of the public API anymore
+
+Refs
+----
+* Will dynmically retrieve their object at the time of query to assure the information
+ is actual. Recently objects would be cached, hence ref object not be safely kept
+ persistent.
0.1.6
=====
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index 940e35d1..2965eb8b 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -13,7 +13,7 @@ from errors import GitCommandError
GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
- 'with_exceptions', 'with_raw_output')
+ 'with_exceptions', 'with_raw_output', 'as_process')
extra = {}
if sys.platform == 'win32':
@@ -34,6 +34,34 @@ class Git(object):
of the command to stdout.
Set its value to 'full' to see details about the returned values.
"""
+ class AutoInterrupt(object):
+ """
+ Kill/Interrupt the stored process instance once this instance goes out of scope. It is
+ used to prevent processes piling up in case iterators stop reading.
+ Besides all attributes are wired through to the contained process object
+ """
+ __slots__= "proc"
+
+ def __init__(self, proc ):
+ self.proc = proc
+
+ def __del__(self):
+ # did the process finish already so we have a return code ?
+ if self.proc.poll() is not None:
+ return
+
+ # try to kill it
+ try:
+ os.kill(self.proc.pid, 2) # interrupt signal
+ except AttributeError:
+ # try windows
+ subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid))
+ # END exception handling
+
+ def __getattr__(self, attr):
+ return getattr(self.proc, attr)
+
+
def __init__(self, git_dir=None):
"""
Initialize this instance with:
@@ -44,6 +72,10 @@ class Git(object):
"""
super(Git, self).__init__()
self.git_dir = git_dir
+
+ # cached command slots
+ self.cat_file_header = None
+ self.cat_file_all = None
def __getattr__(self, name):
"""
@@ -70,6 +102,7 @@ class Git(object):
with_extended_output=False,
with_exceptions=True,
with_raw_output=False,
+ as_process=False
):
"""
Handles executing the command on the shell and consumes and returns
@@ -96,6 +129,16 @@ class Git(object):
``with_raw_output``
Whether to avoid stripping off trailing whitespace.
+
+ ``as_process``
+ Whether to return the created process instance directly from which
+ streams can be read on demand. This will render with_extended_output,
+ with_exceptions and with_raw_output ineffective - the caller will have
+ to deal with the details himself.
+ It is important to note that the process will be placed into an AutoInterrupt
+ wrapper that will interrupt the process once it goes out of scope. If you
+ use the command in iterators, you should pass the whole process instance
+ instead of a single stream.
Returns::
@@ -127,7 +170,11 @@ class Git(object):
**extra
)
+ if as_process:
+ return self.AutoInterrupt(proc)
+
# Wait for the process to return
+ status = 0
try:
stdout_value = proc.stdout.read()
stderr_value = proc.stderr.read()
@@ -218,3 +265,74 @@ class Git(object):
call.extend(args)
return self.execute(call, **_kwargs)
+
+ def _parse_object_header(self, header_line):
+ """
+ ``header_line``
+ <hex_sha> type_string size_as_int
+
+ Returns
+ (hex_sha, type_string, size_as_int)
+
+ Raises
+ ValueError if the header contains indication for an error due to incorrect
+ input sha
+ """
+ tokens = header_line.split()
+ if len(tokens) != 3:
+ raise ValueError( "SHA named %s could not be resolved" % tokens[0] )
+
+ return (tokens[0], tokens[1], int(tokens[2]))
+
+ def __prepare_ref(self, ref):
+ # required for command to separate refs on stdin
+ refstr = str(ref) # could be ref-object
+ if refstr.endswith("\n"):
+ return refstr
+ return refstr + "\n"
+
+ def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
+ cur_val = getattr(self, attr_name)
+ if cur_val is not None:
+ return cur_val
+
+ options = { "istream" : subprocess.PIPE, "as_process" : True }
+ options.update( kwargs )
+
+ cmd = self._call_process( cmd_name, *args, **options )
+ setattr(self, attr_name, cmd )
+ return cmd
+
+ def __get_object_header(self, cmd, ref):
+ cmd.stdin.write(self.__prepare_ref(ref))
+ cmd.stdin.flush()
+ return self._parse_object_header(cmd.stdout.readline())
+
+ def get_object_header(self, ref):
+ """
+ Use this method to quickly examine the type and size of the object behind
+ the given ref.
+
+ NOTE
+ The method will only suffer from the costs of command invocation
+ once and reuses the command in subsequent calls.
+
+ Return:
+ (hexsha, type_string, size_as_int)
+ """
+ cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
+ return self.__get_object_header(cmd, ref)
+
+ def get_object_data(self, ref):
+ """
+ As get_object_header, but returns object data as well
+
+ Return:
+ (hexsha, type_string, size_as_int,data_string)
+ """
+ cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
+ hexsha, typename, size = self.__get_object_header(cmd, ref)
+ data = cmd.stdout.read(size)
+ cmd.stdout.read(1) # finishing newlines
+
+ return (hexsha, typename, size, data)
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 43aa8dd1..07538ada 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -4,32 +4,10 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os
-
-class LazyMixin(object):
- lazy_properties = []
- __slots__ = tuple()
+from git.utils import LazyMixin
- def __getattr__(self, attr):
- """
- Whenever an attribute is requested that we do not know, we allow it
- to be created and set. Next time the same attribute is reqeusted, it is simply
- returned from our dict/slots.
- """
- self._set_cache_(attr)
- # will raise in case the cache was not created
- return object.__getattribute__(self, attr)
+_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
- def _set_cache_(self, attr):
- """ This method should be overridden in the derived class.
- It should check whether the attribute named by attr can be created
- and cached. Do nothing if you do not know the attribute or call your subclass
-
- The derived class may create as many additional attributes as it deems
- necessary in case a git command returns more information than represented
- in the single attribute."""
- pass
-
-
class Object(LazyMixin):
"""
Implements an Object which may be Blobs, Trees, Commits and Tags
@@ -71,9 +49,13 @@ class Object(LazyMixin):
Retrieve object information
"""
if attr == "size":
- self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip())
+ hexsha, typename, self.size = self.repo.git.get_object_header(self.id)
+ assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
elif attr == "data":
- self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True)
+ hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id)
+ assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
def __eq__(self, other):
"""
@@ -143,8 +125,15 @@ class IndexObject(Object):
if isinstance(mode, basestring):
self.mode = self._mode_str_to_int(mode)
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+
@classmethod
- def _mode_str_to_int( cls, modestr ):
+ def _mode_str_to_int(cls, modestr):
"""
``modestr``
string like 755 or 644 or 100644 - only the last 3 chars will be used
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index c3e97bf9..101014ab 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -6,14 +6,14 @@
import re
import time
-
+from git.utils import Iterable
from git.actor import Actor
-from tree import Tree
import git.diff as diff
import git.stats as stats
+from tree import Tree
import base
-class Commit(base.Object):
+class Commit(base.Object, Iterable):
"""
Wraps a git Commit object.
@@ -37,7 +37,7 @@ class Commit(base.Object):
The parameter documentation indicates the type of the argument after a colon ':'.
``id``
- is the sha id of the commit
+ is the sha id of the commit or a ref
``parents`` : tuple( Commit, ... )
is a tuple of commit ids or actual Commits
@@ -71,7 +71,7 @@ class Commit(base.Object):
# END for each parent to convert
if self.id and tree is not None:
- self.tree = Tree(repo, id=tree)
+ self.tree = Tree(repo, id=tree, path='')
# END id to tree conversion
def _set_cache_(self, attr):
@@ -80,8 +80,11 @@ class Commit(base.Object):
to be set.
We set all values at once.
"""
- if attr in self.__slots__:
- temp = Commit.find_all(self.repo, self.id, max_count=1)[0]
+ if attr in Commit.__slots__:
+ # prepare our data lines to match rev-list
+ data_lines = self.data.splitlines()
+ data_lines.insert(0, "commit %s" % self.id)
+ temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next()
self.parents = temp.parents
self.tree = temp.tree
self.author = temp.author
@@ -120,7 +123,7 @@ class Commit(base.Object):
return len(repo.git.rev_list(ref, '--', path).strip().splitlines())
@classmethod
- def find_all(cls, repo, ref, path='', **kwargs):
+ def iter_items(cls, repo, ref, path='', **kwargs):
"""
Find all commits matching the given criteria.
@@ -128,7 +131,7 @@ class Commit(base.Object):
is the Repo
``ref``
- is the ref from which to begin (SHA1 or name)
+ is the ref from which to begin (SHA1, Head or name)
``path``
is an optinal path, if set only Commits that include the path
@@ -140,55 +143,67 @@ class Commit(base.Object):
``skip`` is the number of commits to skip
Returns
- git.Commit[]
+ iterator yielding Commit items
"""
- options = {'pretty': 'raw'}
+ options = {'pretty': 'raw', 'as_process' : True }
options.update(kwargs)
- output = repo.git.rev_list(ref, '--', path, **options)
- return cls._list_from_string(repo, output)
+ # the test system might confront us with string values -
+ proc = repo.git.rev_list(ref, '--', path, **options)
+ return cls._iter_from_process_or_stream(repo, proc)
@classmethod
- def _list_from_string(cls, repo, text):
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
"""
Parse out commit information into a list of Commit objects
``repo``
is the Repo
- ``text``
- is the text output from the git-rev-list command (raw format)
+ ``proc``
+ git-rev-list process instance (raw format)
Returns
- git.Commit[]
+ iterator returning Commit objects
"""
- lines =text.splitlines(False)
- commits = []
-
- while lines:
- id = lines.pop(0).split()[1]
- tree = lines.pop(0).split()[1]
+ stream = proc_or_stream
+ if not hasattr(stream,'next'):
+ stream = proc_or_stream.stdout
+
+ for line in stream:
+ id = line.split()[1]
+ assert line.split()[0] == "commit"
+ tree = stream.next().split()[1]
parents = []
- while lines and lines[0].startswith('parent'):
- parents.append(lines.pop(0).split()[-1])
- # END while there are parent lines
- author, authored_date = cls._actor(lines.pop(0))
- committer, committed_date = cls._actor(lines.pop(0))
+ next_line = None
+ for parent_line in stream:
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ parents.append(parent_line.split()[-1])
+ # END for each parent line
+
+ author, authored_date = cls._actor(next_line)
+ committer, committed_date = cls._actor(stream.next())
- # free line
- lines.pop(0)
+ # empty line
+ stream.next()
message_lines = []
- while lines and not lines[0].startswith('commit'):
- message_lines.append(lines.pop(0).strip())
+ next_line = None
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ break
+ # END abort message reading
+ message_lines.append(msg_line.strip())
# END while there are message lines
- message = '\n'.join(message_lines[:-1]) # last line is empty
-
- commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
- committer=committer, committed_date=committed_date, message=message))
- # END while lines
- return commits
+ message = '\n'.join(message_lines)
+
+ yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
+ committer=committer, committed_date=committed_date, message=message)
+ # END for each line in stream
@classmethod
def diff(cls, repo, a, b=None, paths=None):
diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py
index af1022f0..ecf6349d 100644
--- a/lib/git/objects/tag.py
+++ b/lib/git/objects/tag.py
@@ -8,7 +8,7 @@ Module containing all object based types.
"""
import base
import commit
-from util import get_object_type_by_name
+from utils import get_object_type_by_name
class TagObject(base.Object):
"""
@@ -48,9 +48,8 @@ class TagObject(base.Object):
"""
Cache all our attributes at once
"""
- if attr in self.__slots__:
- output = self.repo.git.cat_file(self.type,self.id)
- lines = output.split("\n")
+ if attr in TagObject.__slots__:
+ lines = self.data.splitlines()
obj, hexsha = lines[0].split(" ") # object <hexsha>
type_token, type_name = lines[1].split(" ") # type <type_name>
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index 273384a3..abfa9622 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -7,53 +7,125 @@
import os
import blob
import base
+import binascii
+
+def sha_to_hex(sha):
+ """Takes a string and returns the hex of the sha within"""
+ hexsha = binascii.hexlify(sha)
+ assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
+ return hexsha
class Tree(base.IndexObject):
+ """
+ Tress represent a ordered list of Blobs and other Trees. Hence it can be
+ accessed like a list.
+
+ Tree's will cache their contents after first retrieval to improve efficiency.
+
+ ``Tree as a list``::
+
+ Access a specific blob using the
+ tree['filename'] notation.
+
+ You may as well access by index
+ blob = tree[0]
+
+
+ """
type = "tree"
- __slots__ = "_contents"
+ __slots__ = "_cache"
+
+ # using ascii codes for comparison
+ ascii_commit_id = (0x31 << 4) + 0x36
+ ascii_blob_id = (0x31 << 4) + 0x30
+ ascii_tree_id = (0x34 << 4) + 0x30
- def __init__(self, repo, id, mode=None, path=None):
+
+ def __init__(self, repo, id, mode=0, path=None):
super(Tree, self).__init__(repo, id, mode, path)
def _set_cache_(self, attr):
- if attr == "_contents":
- # Read the tree contents.
- self._contents = {}
- for line in self.repo.git.ls_tree(self.id).splitlines():
- obj = self.content__from_string(self.repo, line)
- if obj is not None:
- self._contents[obj.path] = obj
+ if attr == "_cache":
+ # Set the data when we need it
+ self._cache = self._get_tree_cache()
else:
super(Tree, self)._set_cache_(attr)
- @staticmethod
- def content__from_string(repo, text):
+ def _get_tree_cache(self):
"""
- Parse a content item and create the appropriate object
-
- ``repo``
- is the Repo
-
- ``text``
- is the single line containing the items data in `git ls-tree` format
-
+ Return
+ list(object_instance, ...)
+
+ ``treeish``
+ sha or ref identifying a tree
+ """
+ out = list()
+ for obj in self._iter_from_data():
+ if obj is not None:
+ out.append(obj)
+ # END if object was handled
+ # END for each line from ls-tree
+ return out
+
+
+ def _iter_from_data(self):
+ """
+ Reads the binary non-pretty printed representation of a tree and converts
+ it into Blob, Tree or Commit objects.
+
+ Note: This method was inspired by the parse_tree method in dulwich.
+
Returns
- ``git.Blob`` or ``git.Tree``
+ list(IndexObject, ...)
"""
- try:
- mode, typ, id, path = text.expandtabs(1).split(" ", 3)
- except:
- return None
+ ord_zero = ord('0')
+ data = self.data
+ len_data = len(data)
+ i = 0
+ while i < len_data:
+ mode = 0
+ mode_boundary = i + 6
+
+ # keep it ascii - we compare against the respective values
+ type_id = (ord(data[i])<<4) + ord(data[i+1])
+ i += 2
+
+ while data[i] != ' ':
+ # move existing mode integer up one level being 3 bits
+ # and add the actual ordinal value of the character
+ mode = (mode << 3) + (ord(data[i]) - ord_zero)
+ i += 1
+ # END while reading mode
+
+ # byte is space now, skip it
+ i += 1
+
+ # parse name, it is NULL separated
+
+ ns = i
+ while data[i] != '\0':
+ i += 1
+ # END while not reached NULL
+ name = data[ns:i]
+
+ # byte is NULL, get next 20
+ i += 1
+ sha = data[i:i+20]
+ i = i + 20
+
+ hexsha = sha_to_hex(sha)
+ if type_id == self.ascii_blob_id:
+ yield blob.Blob(self.repo, hexsha, mode, name)
+ elif type_id == self.ascii_tree_id:
+ yield Tree(self.repo, hexsha, mode, name)
+ elif type_id == self.ascii_commit_id:
+ # todo
+ yield None
+ else:
+ raise TypeError( "Unknown type found in tree data: %i" % type_id )
+ # END for each byte in data stream
- if typ == "tree":
- return Tree(repo, id, mode, path)
- elif typ == "blob":
- return blob.Blob(repo, id, mode, path)
- elif typ == "commit":
- return None
- else:
- raise(TypeError, "Invalid type: %s" % typ)
def __div__(self, file):
"""
@@ -67,36 +139,104 @@ class Tree(base.IndexObject):
<git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df">
Returns
- ``git.Blob`` or ``git.Tree`` or ``None`` if not found
+ ``git.Blob`` or ``git.Tree``
+
+ Raise
+ KeyError if given file or tree does not exist in tree
"""
- return self.get(file)
+ return self[file]
def __repr__(self):
return '<git.Tree "%s">' % self.id
+
+ @classmethod
+ def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ):
+
+ for obj in tree:
+ # adjust path to be complete
+ obj.path = os.path.join(tree.path, obj.path)
+ if not predicate(obj):
+ continue
+ yield obj
+ if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ):
+ for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ):
+ yield recursive_obj
+ # END for each recursive object
+ # END if we may enter recursion
+ # END for each object
+
+ def traverse(self, max_depth=-1, predicate = lambda i: True):
+ """
+ Returns
+ Iterator to traverse the tree recursively up to the given level.
+ The iterator returns Blob and Tree objects
+
+ ``max_depth``
+
+ if -1, the whole tree will be traversed
+ if 0, only the first level will be traversed which is the same as
+ the default non-recursive iterator
+
+ ``predicate``
+
+ If predicate(item) returns True, item will be returned by iterator
+ """
+ return self._iter_recursive( self.repo, self, 0, max_depth, predicate )
+
+ @property
+ def trees(self):
+ """
+ Returns
+ list(Tree, ...) list of trees directly below this tree
+ """
+ return [ i for i in self if i.type == "tree" ]
+
+ @property
+ def blobs(self):
+ """
+ Returns
+ list(Blob, ...) list of blobs directly below this tree
+ """
+ return [ i for i in self if i.type == "blob" ]
- # Implement the basics of the dict protocol:
- # directories/trees can be seen as object dicts.
- def __getitem__(self, key):
- return self._contents[key]
+ # List protocol
+ def __getslice__(self,i,j):
+ return self._cache[i:j]
+
def __iter__(self):
- return iter(self._contents)
-
+ return iter(self._cache)
+
def __len__(self):
- return len(self._contents)
-
- def __contains__(self, key):
- return key in self._contents
-
- def get(self, key):
- return self._contents.get(key)
-
- def items(self):
- return self._contents.items()
-
- def keys(self):
- return self._contents.keys()
-
- def values(self):
- return self._contents.values()
+ return len(self._cache)
+
+ def __getitem__(self,item):
+ if isinstance(item, int):
+ return self._cache[item]
+
+ if isinstance(item, basestring):
+ # compatability
+ for obj in self._cache:
+ if obj.path == item:
+ return obj
+ # END for each obj
+ raise KeyError( "Blob or Tree named %s not found" % item )
+ # END index is basestring
+
+ raise TypeError( "Invalid index type: %r" % item )
+
+
+ def __contains__(self,item):
+ if isinstance(item, base.IndexObject):
+ return item in self._cache
+
+ # compatability
+ for obj in self._cache:
+ if item == obj.path:
+ return True
+ # END for each item
+ return False
+
+ def __reversed__(self):
+ return reversed(self._cache)
diff --git a/lib/git/objects/util.py b/lib/git/objects/utils.py
index 15c1d114..15c1d114 100644
--- a/lib/git/objects/util.py
+++ b/lib/git/objects/utils.py
diff --git a/lib/git/refs.py b/lib/git/refs.py
index 820150d3..3c9eb817 100644
--- a/lib/git/refs.py
+++ b/lib/git/refs.py
@@ -7,17 +7,20 @@
Module containing all ref based objects
"""
from objects.base import Object
-from objects.util import get_object_type_by_name
+from objects.utils import get_object_type_by_name
+from utils import LazyMixin, Iterable
-class Ref(object):
+class Ref(LazyMixin, Iterable):
"""
Represents a named reference to any object
"""
- __slots__ = ("path", "object")
+ __slots__ = ("repo", "path")
- def __init__(self, path, object = None):
+ def __init__(self, repo, path, object = None):
"""
Initialize this instance
+ ``repo``
+ Our parent repository
``path``
Path relative to the .git/ directory pointing to the ref in question, i.e.
@@ -26,8 +29,10 @@ class Ref(object):
``object``
Object instance, will be retrieved on demand if None
"""
+ self.repo = repo
self.path = path
- self.object = object
+ if object is not None:
+ self.object = object
def __str__(self):
return self.name
@@ -57,9 +62,20 @@ class Ref(object):
return self.path # could be refs/HEAD
return '/'.join(tokens[2:])
-
+
+ @property
+ def object(self):
+ """
+ Returns
+ The object our ref currently refers to. Refs can be cached, they will
+ always point to the actual object as it gets re-created on each query
+ """
+ # have to be dynamic here as we may be a tag which can point to anything
+ hexsha, typename, size = self.repo.git.get_object_header(self.path)
+ return get_object_type_by_name(typename)(self.repo, hexsha)
+
@classmethod
- def find_all(cls, repo, common_path = "refs", **kwargs):
+ def iter_items(cls, repo, common_path = "refs", **kwargs):
"""
Find all refs in the repository
@@ -88,54 +104,38 @@ class Ref(object):
options.update(kwargs)
output = repo.git.for_each_ref(common_path, **options)
- return cls._list_from_string(repo, output)
+ return cls._iter_from_stream(repo, iter(output.splitlines()))
@classmethod
- def _list_from_string(cls, repo, text):
- """
- Parse out ref information into a list of Ref compatible objects
-
- ``repo``
- is the Repo
- ``text``
- is the text output from the git-for-each-ref command
-
- Returns
- git.Ref[]
-
- list of Ref objects
- """
+ def _iter_from_stream(cls, repo, stream):
+ """ Parse out ref information into a list of Ref compatible objects
+ Returns git.Ref[] list of Ref objects """
heads = []
- for line in text.splitlines():
+ for line in stream:
heads.append(cls._from_string(repo, line))
return heads
@classmethod
def _from_string(cls, repo, line):
- """
- Create a new Ref instance from the given string.
-
- ``repo``
- is the Repo
-
- ``line``
- is the formatted ref information
-
- Format::
-
+ """ Create a new Ref instance from the given string.
+ Format
name: [a-zA-Z_/]+
<null byte>
id: [0-9A-Fa-f]{40}
-
- Returns
- git.Head
- """
+ Returns git.Head """
full_path, hexsha, type_name, object_size = line.split("\x00")
- obj = get_object_type_by_name(type_name)(repo, hexsha)
- obj.size = object_size
- return cls(full_path, obj)
+
+ # No, we keep the object dynamic by allowing it to be retrieved by
+ # our path on demand - due to perstent commands it is fast.
+ # This reduces the risk that the object does not match
+ # the changed ref anymore in case it changes in the meanwhile
+ return cls(repo, full_path)
+
+ # obj = get_object_type_by_name(type_name)(repo, hexsha)
+ # obj.size = object_size
+ # return cls(repo, full_path, obj)
class Head(Ref):
@@ -167,14 +167,14 @@ class Head(Ref):
return self.object
@classmethod
- def find_all(cls, repo, common_path = "refs/heads", **kwargs):
+ def iter_items(cls, repo, common_path = "refs/heads", **kwargs):
"""
Returns
- git.Head[]
+ Iterator yielding Head items
- For more documentation, please refer to git.base.Ref.find_all
+ For more documentation, please refer to git.base.Ref.list_items
"""
- return super(Head,cls).find_all(repo, common_path, **kwargs)
+ return super(Head,cls).iter_items(repo, common_path, **kwargs)
def __repr__(self):
return '<git.Head "%s">' % self.name
@@ -190,30 +190,13 @@ class TagRef(Ref):
This tag object will always point to a commit object, but may carray additional
information in a tag object::
- tagref = TagRef.find_all(repo)[0]
+ tagref = TagRef.list_items(repo)[0]
print tagref.commit.message
if tagref.tag is not None:
print tagref.tag.message
"""
- __slots__ = "tag"
-
- def __init__(self, path, commit_or_tag):
- """
- Initialize a newly instantiated Tag
-
- ``path``
- is the full path to the tag
-
- ``commit_or_tag``
- is the Commit or TagObject that this tag ref points to
- """
- super(TagRef, self).__init__(path, commit_or_tag)
- self.tag = None
-
- if commit_or_tag.type == "tag":
- self.tag = commit_or_tag
- # END tag object handling
+ __slots__ = tuple()
@property
def commit(self):
@@ -223,18 +206,32 @@ class TagRef(Ref):
"""
if self.object.type == "commit":
return self.object
- # it is a tag object
- return self.object.object
+ elif self.object.type == "tag":
+ # it is a tag object which carries the commit as an object - we can point to anything
+ return self.object.object
+ else:
+ raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self )
+
+ @property
+ def tag(self):
+ """
+ Returns
+ Tag object this tag ref points to or None in case
+ we are a light weight tag
+ """
+ if self.object.type == "tag":
+ return self.object
+ return None
@classmethod
- def find_all(cls, repo, common_path = "refs/tags", **kwargs):
+ def iter_items(cls, repo, common_path = "refs/tags", **kwargs):
"""
Returns
- git.Tag[]
+ Iterator yielding commit items
- For more documentation, please refer to git.base.Ref.find_all
+ For more documentation, please refer to git.base.Ref.list_items
"""
- return super(TagRef,cls).find_all(repo, common_path, **kwargs)
+ return super(TagRef,cls).iter_items(repo, common_path, **kwargs)
# provide an alias
diff --git a/lib/git/repo.py b/lib/git/repo.py
index dd5acfc3..c74c7e8d 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -102,7 +102,7 @@ class Repo(object):
Returns
``git.Head[]``
"""
- return Head.find_all(self)
+ return Head.list_items(self)
# alias heads
branches = heads
@@ -115,7 +115,7 @@ class Repo(object):
Returns
``git.Tag[]``
"""
- return Tag.find_all(self)
+ return Tag.list_items(self)
def blame(self, commit, file):
"""
@@ -197,7 +197,7 @@ class Repo(object):
# END distinguish hexsha vs other information
return blames
- def commits(self, start='master', path='', max_count=10, skip=0):
+ def commits(self, start='master', path='', max_count=None, skip=0):
"""
A list of Commit objects representing the history of a given ref/commit
@@ -209,7 +209,7 @@ class Repo(object):
Commits that do not contain that path will not be returned.
``max_count``
- is the maximum number of commits to return (default 10)
+ is the maximum number of commits to return (default None)
``skip``
is the number of commits to skip (default 0) which will effectively
@@ -220,8 +220,11 @@ class Repo(object):
"""
options = {'max_count': max_count,
'skip': skip}
-
- return Commit.find_all(self, start, path, **options)
+
+ if max_count is None:
+ options.pop('max_count')
+
+ return Commit.list_items(self, start, path, **options)
def commits_between(self, frm, to):
"""
@@ -237,7 +240,7 @@ class Repo(object):
Returns
``git.Commit[]``
"""
- return reversed(Commit.find_all(self, "%s..%s" % (frm, to)))
+ return reversed(Commit.list_items(self, "%s..%s" % (frm, to)))
def commits_since(self, start='master', path='', since='1970-01-01'):
"""
@@ -259,7 +262,7 @@ class Repo(object):
"""
options = {'since': since}
- return Commit.find_all(self, start, path, **options)
+ return Commit.list_items(self, start, path, **options)
def commit_count(self, start='master', path=''):
"""
@@ -277,12 +280,14 @@ class Repo(object):
"""
return Commit.count(self, start, path)
- def commit(self, id, path = ''):
+ def commit(self, id=None, path = ''):
"""
The Commit object for the specified id
``id``
- is the SHA1 identifier of the commit
+ is the SHA1 identifier of the commit or a ref or a ref name
+ if None, it defaults to the active branch
+
``path``
is an optional path, if set the returned commit must contain the path.
@@ -290,9 +295,11 @@ class Repo(object):
Returns
``git.Commit``
"""
+ if id is None:
+ id = self.active_branch
options = {'max_count': 1}
- commits = Commit.find_all(self, id, path, **options)
+ commits = Commit.list_items(self, id, path, **options)
if not commits:
raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path )
@@ -309,55 +316,47 @@ class Repo(object):
other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines()
diff_refs = list(set(other_repo_refs) - set(repo_refs))
- return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs)
+ return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs)
- def tree(self, treeish='master'):
+ def tree(self, treeish=None):
"""
The Tree object for the given treeish reference
``treeish``
- is the reference (default 'master')
+ is a Ref instance defaulting to the active_branch if None.
Examples::
- repo.tree('master')
-
+ repo.tree(repo.heads[0])
Returns
``git.Tree``
+
+ NOTE
+ A ref is requried here to assure you point to a commit or tag. Otherwise
+ it is not garantueed that you point to the root-level tree.
+
+ If you need a non-root level tree, find it by iterating the root tree.
"""
- return Tree(self, id=treeish)
-
- def blob(self, id):
- """
- The Blob object for the given id
-
- ``id``
- is the SHA1 id of the blob
-
- Returns
- ``git.Blob``
- """
- return Blob(self, id=id)
-
- def log(self, commit='master', path=None, **kwargs):
- """
- The Commit for a treeish, and all commits leading to it.
+ if treeish is None:
+ treeish = self.active_branch
+ if not isinstance(treeish, Ref):
+ raise ValueError( "Treeish reference required, got %r" % treeish )
- ``kwargs``
- keyword arguments specifying flags to be used in git-log command,
- i.e.: max_count=1 to limit the amount of commits returned
+
+ # As we are directly reading object information, we must make sure
+ # we truly point to a tree object. We resolve the ref to a sha in all cases
+ # to assure the returned tree can be compared properly. Except for
+ # heads, ids should always be hexshas
+ hexsha, typename, size = self.git.get_object_header( treeish )
+ if typename != "tree":
+ hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' )
+ # END tree handling
+ treeish = hexsha
+
+ # the root has an empty relative path and the default mode
+ return Tree(self, treeish, 0, '')
- Returns
- ``git.Commit[]``
- """
- options = {'pretty': 'raw'}
- options.update(kwargs)
- arg = [commit, '--']
- if path:
- arg.append(path)
- commits = self.git.log(*arg, **options)
- return Commit._list_from_string(self, commits)
def diff(self, a, b, *paths):
"""
@@ -588,13 +587,9 @@ class Repo(object):
The name of the currently active branch.
Returns
- str (the branch name)
+ Head to the active branch
"""
- branch = self.git.symbolic_ref('HEAD').strip()
- if branch.startswith('refs/heads/'):
- branch = branch[len('refs/heads/'):]
-
- return branch
+ return Head( self, self.git.symbolic_ref('HEAD').strip() )
def __repr__(self):
return '<git.Repo "%s">' % self.path
diff --git a/lib/git/utils.py b/lib/git/utils.py
index c204c432..f84c247d 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -24,3 +24,66 @@ def is_git_dir(d):
(os.path.islink(headref) and
os.readlink(headref).startswith('refs'))
return False
+
+
+class LazyMixin(object):
+ """
+ Base class providing an interface to lazily retrieve attribute values upon
+ first access. If slots are used, memory will only be reserved once the attribute
+ is actually accessed and retrieved the first time. All future accesses will
+ return the cached value as stored in the Instance's dict or slot.
+ """
+ __slots__ = tuple()
+
+ def __getattr__(self, attr):
+ """
+ Whenever an attribute is requested that we do not know, we allow it
+ to be created and set. Next time the same attribute is reqeusted, it is simply
+ returned from our dict/slots.
+ """
+ self._set_cache_(attr)
+ # will raise in case the cache was not created
+ return object.__getattribute__(self, attr)
+
+ def _set_cache_(self, attr):
+ """ This method should be overridden in the derived class.
+ It should check whether the attribute named by attr can be created
+ and cached. Do nothing if you do not know the attribute or call your subclass
+
+ The derived class may create as many additional attributes as it deems
+ necessary in case a git command returns more information than represented
+ in the single attribute."""
+ pass
+
+
+class Iterable(object):
+ """
+ Defines an interface for iterable items which is to assure a uniform
+ way to retrieve and iterate items within the git repository
+ """
+ __slots__ = tuple()
+
+ @classmethod
+ def list_items(cls, repo, *args, **kwargs):
+ """
+ Find all items of this type - subclasses can specify args and kwargs differently.
+ If no args are given, subclasses are obliged to return all items if no additional
+ arguments arg given.
+
+ Note: Favor the iter_items method as it will
+
+ Returns:
+ list(Item,...) list of item instances
+ """
+ return list(cls.iter_items(repo, *args, **kwargs))
+
+
+ @classmethod
+ def iter_items(cls, repo, *args, **kwargs):
+ """
+ For more information about the arguments, see list_items
+ Return:
+ iterator yielding Items
+ """
+ raise NotImplementedError("To be implemented by Subclass")
+
diff --git a/test/git/test_base.py b/test/git/test_base.py
index a153eb83..402cdba3 100644
--- a/test/git/test_base.py
+++ b/test/git/test_base.py
@@ -10,7 +10,7 @@ from git import *
import git.objects.base as base
import git.refs as refs
from itertools import chain
-from git.objects.util import get_object_type_by_name
+from git.objects.utils import get_object_type_by_name
class TestBase(object):
@@ -24,14 +24,14 @@ class TestBase(object):
def test_base_object(self):
# test interface of base object classes
- fcreators = (self.repo.blob, self.repo.tree, self.repo.commit, lambda id: TagObject(self.repo,id) )
- assert len(fcreators) == len(self.type_tuples)
+ types = (Blob, Tree, Commit, TagObject)
+ assert len(types) == len(self.type_tuples)
s = set()
num_objs = 0
num_index_objs = 0
- for fcreator, (typename, hexsha) in zip(fcreators, self.type_tuples):
- item = fcreator(hexsha)
+ for obj_type, (typename, hexsha) in zip(types, self.type_tuples):
+ item = obj_type(self.repo,hexsha)
num_objs += 1
assert item.id == hexsha
assert item.type == typename
@@ -53,6 +53,7 @@ class TestBase(object):
# each has a unique sha
assert len(s) == num_objs
+ assert len(s|s) == num_objs
assert num_index_objs == 2
@@ -70,6 +71,18 @@ class TestBase(object):
s.add(ref)
# END for each ref
assert len(s) == ref_count
+ assert len(s|s) == ref_count
+
+ def test_heads(self):
+ # see how it dynmically updates its object
+ for head in self.repo.heads:
+ head.name
+ head.path
+ prev_object = head.object
+ cur_object = head.object
+ assert prev_object == cur_object # represent the same git object
+ assert prev_object is not cur_object # but are different instances
+ # END for each head
def test_get_object_type_by_name(self):
for tname in base.Object.TYPES:
diff --git a/test/git/test_blob.py b/test/git/test_blob.py
index ebb53d0c..266f3a23 100644
--- a/test/git/test_blob.py
+++ b/test/git/test_blob.py
@@ -12,51 +12,14 @@ class TestBlob(object):
def setup(self):
self.repo = Repo(GIT_REPO)
- @patch_object(Git, '_call_process')
- def test_should_return_blob_contents(self, git):
- git.return_value = fixture('cat_file_blob')
- blob = Blob(self.repo, **{'id': 'abc'})
- assert_equal("Hello world", blob.data)
- assert_true(git.called)
- assert_equal(git.call_args, (('cat_file', 'abc'), {'p': True, 'with_raw_output': True}))
-
- @patch_object(Git, '_call_process')
- def test_should_return_blob_contents_with_newline(self, git):
- git.return_value = fixture('cat_file_blob_nl')
- blob = Blob(self.repo, **{'id': 'abc'})
- assert_equal("Hello world\n", blob.data)
- assert_true(git.called)
- assert_equal(git.call_args, (('cat_file', 'abc'), {'p': True, 'with_raw_output': True}))
-
- @patch_object(Git, '_call_process')
- def test_should_cache_data(self, git):
- git.return_value = fixture('cat_file_blob')
- bid = '787b92b63f629398f3d2ceb20f7f0c2578259e84'
+ def test_should_cache_data(self):
+ bid = 'a802c139d4767c89dcad79d836d05f7004d39aac'
blob = Blob(self.repo, bid)
blob.data
- blob.data
- assert_true(git.called)
- assert_equal(git.call_count, 1)
- assert_equal(git.call_args, (('cat_file', bid), {'p': True, 'with_raw_output': True}))
-
- @patch_object(Git, '_call_process')
- def test_should_return_file_size(self, git):
- git.return_value = fixture('cat_file_blob_size')
- blob = Blob(self.repo, **{'id': 'abc'})
- assert_equal(11, blob.size)
- assert_true(git.called)
- assert_equal(git.call_args, (('cat_file', 'abc'), {'s': True}))
-
- @patch_object(Git, '_call_process')
- def test_should_cache_file_size(self, git):
- git.return_value = fixture('cat_file_blob_size')
- blob = Blob(self.repo, **{'id': 'abc'})
- assert_equal(11, blob.size)
- assert_equal(11, blob.size)
- assert_true(git.called)
- assert_equal(git.call_count, 1)
- assert_equal(git.call_args, (('cat_file', 'abc'), {'s': True}))
-
+ assert blob.data
+ blob.size
+ blob.size
+
def test_mime_type_should_return_mime_type_for_known_types(self):
blob = Blob(self.repo, **{'id': 'abc', 'path': 'foo.png'})
assert_equal("image/png", blob.mime_type)
diff --git a/test/git/test_commit.py b/test/git/test_commit.py
index fa49821d..a95fb675 100644
--- a/test/git/test_commit.py
+++ b/test/git/test_commit.py
@@ -11,18 +11,13 @@ class TestCommit(object):
def setup(self):
self.repo = Repo(GIT_REPO)
- @patch_object(Git, '_call_process')
- def test_bake(self, git):
- git.return_value = fixture('rev_list_single')
+ def test_bake(self):
- commit = Commit(self.repo, **{'id': '4c8124ffcf4039d292442eeccabdeca5af5c5017'})
+ commit = Commit(self.repo, **{'id': '2454ae89983a4496a445ce347d7a41c0bb0ea7ae'})
commit.author # bake
- assert_equal("Tom Preston-Werner", commit.author.name)
- assert_equal("tom@mojombo.com", commit.author.email)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('rev_list', '4c8124ffcf4039d292442eeccabdeca5af5c5017', '--', ''), {'pretty': 'raw', 'max_count': 1}))
+ assert_equal("Sebastian Thiel", commit.author.name)
+ assert_equal("byronimo@gmail.com", commit.author.email)
@patch_object(Git, '_call_process')
@@ -159,17 +154,10 @@ class TestCommit(object):
assert diff.deleted_file and isinstance(diff.deleted_file, bool)
# END for each diff in initial import commit
- @patch_object(Git, '_call_process')
- def test_diffs_on_initial_import_with_empty_commit(self, git):
- git.return_value = fixture('show_empty_commit')
-
- commit = Commit(self.repo, id='634396b2f541a9f2d58b00be1a07f0c358b999b3')
+ def test_diffs_on_initial_import_without_parents(self):
+ commit = Commit(self.repo, id='33ebe7acec14b25c5f84f35a664803fcab2f7781')
diffs = commit.diffs
-
- assert_equal([], diffs)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('show', '634396b2f541a9f2d58b00be1a07f0c358b999b3', '-M'), {'full_index': True, 'pretty': 'raw'}))
+ assert diffs
def test_diffs_with_mode_only_change(self):
commit = Commit(self.repo, id='ccde80b7a3037a004a7807a6b79916ce2a1e9729')
@@ -216,7 +204,7 @@ class TestCommit(object):
bisect_all=True)
assert_true(git.called)
- commits = Commit._list_from_string(self.repo, revs)
+ commits = Commit._iter_from_process_or_stream(self.repo, ListProcessAdapter(revs))
expected_ids = (
'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b',
'33ebe7acec14b25c5f84f35a664803fcab2f7781',
@@ -241,3 +229,4 @@ class TestCommit(object):
commit3 = Commit(self.repo, id='zyx')
assert_equal(commit1, commit2)
assert_not_equal(commit2, commit3)
+
diff --git a/test/git/test_git.py b/test/git/test_git.py
index c9f399cc..1f44aebc 100644
--- a/test/git/test_git.py
+++ b/test/git/test_git.py
@@ -10,8 +10,7 @@ from git import Git, GitCommandError
class TestGit(object):
def setup(self):
- base = os.path.join(os.path.dirname(__file__), "../..")
- self.git = Git(base)
+ self.git = Git(GIT_REPO)
@patch_object(Git, 'execute')
def test_call_process_calls_execute(self, git):
@@ -56,3 +55,35 @@ class TestGit(object):
# this_should_not_be_ignored=False implies it *should* be ignored
output = self.git.version(pass_this_kwarg=False)
assert_true("pass_this_kwarg" not in git.call_args[1])
+
+ def test_persistent_cat_file_command(self):
+ # read header only
+ import subprocess as sp
+ hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167"
+ g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True)
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ obj_info = g.stdout.readline()
+
+ # read header + data
+ g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True)
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ obj_info_two = g.stdout.readline()
+ assert obj_info == obj_info_two
+
+ # read data - have to read it in one large chunk
+ size = int(obj_info.split()[2])
+ data = g.stdout.read(size)
+ terminating_newline = g.stdout.read(1)
+
+ # now we should be able to read a new object
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ assert g.stdout.readline() == obj_info
+
+
+ # same can be achived using the respective command functions
+ hexsha, typename, size = self.git.get_object_header(hexsha)
+ hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha)
+ assert typename == typename_two and size == size_two
diff --git a/test/git/test_performance.py b/test/git/test_performance.py
new file mode 100644
index 00000000..96f13a2e
--- /dev/null
+++ b/test/git/test_performance.py
@@ -0,0 +1,38 @@
+# test_performance.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from test.testlib import *
+from git import *
+from time import time
+
+class TestPerformance(object):
+ def setup(self):
+ self.repo = Repo(GIT_REPO)
+
+ def test_iteration(self):
+ num_objs = 0
+ num_commits = 0
+
+ # find the first commit containing the given path - always do a full
+ # iteration ( restricted to the path in question ), but in fact it should
+ # return quite a lot of commits, we just take one and hence abort the operation
+
+ st = time()
+ for c in self.repo.commits():
+ num_commits += 1
+ c.author
+ c.authored_date
+ c.committer
+ c.committed_date
+ c.message
+ for obj in c.tree.traverse():
+ obj.size
+ num_objs += 1
+ # END for each object
+ # END for each commit
+ elapsed_time = time() - st
+ print "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (num_commits, num_objs, elapsed_time, num_objs/elapsed_time)
+
diff --git a/test/git/test_repo.py b/test/git/test_repo.py
index 3e2fb3dc..e998ac6d 100644
--- a/test/git/test_repo.py
+++ b/test/git/test_repo.py
@@ -41,7 +41,7 @@ class TestRepo(object):
@patch_object(Git, '_call_process')
def test_commits(self, git):
- git.return_value = fixture('rev_list')
+ git.return_value = ListProcessAdapter(fixture('rev_list'))
commits = self.repo.commits('master', max_count=10)
@@ -65,7 +65,6 @@ class TestRepo(object):
assert_equal("Merge branch 'site'", c.summary)
assert_true(git.called)
- assert_equal(git.call_args, (('rev_list', 'master', '--', ''), {'skip': 0, 'pretty': 'raw', 'max_count': 10}))
@patch_object(Git, '_call_process')
def test_commit_count(self, git):
@@ -78,37 +77,14 @@ class TestRepo(object):
@patch_object(Git, '_call_process')
def test_commit(self, git):
- git.return_value = fixture('rev_list_single')
+ git.return_value = ListProcessAdapter(fixture('rev_list_single'))
commit = self.repo.commit('4c8124ffcf4039d292442eeccabdeca5af5c5017')
assert_equal("4c8124ffcf4039d292442eeccabdeca5af5c5017", commit.id)
assert_true(git.called)
- assert_equal(git.call_args, (('rev_list', '4c8124ffcf4039d292442eeccabdeca5af5c5017', '--', ''), {'pretty': 'raw', 'max_count': 1}))
-
- @patch_object(Git, '_call_process')
- def test_tree(self, git):
- git.return_value = fixture('ls_tree_a')
-
- tree = self.repo.tree('master')
-
- assert_equal(4, len([c for c in tree.values() if isinstance(c, Blob)]))
- assert_equal(3, len([c for c in tree.values() if isinstance(c, Tree)]))
-
- assert_true(git.called)
- assert_equal(git.call_args, (('ls_tree', 'master'), {}))
-
- @patch_object(Git, '_call_process')
- def test_blob(self, git):
- git.return_value = fixture('cat_file_blob')
-
- blob = self.repo.blob("abc")
- assert_equal("Hello world", blob.data)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('cat_file', 'abc'), {'p': True, 'with_raw_output': True}))
-
+
@patch_object(Repo, '__init__')
@patch_object(Git, '_call_process')
def test_init_bare(self, git, repo):
@@ -218,22 +194,6 @@ class TestRepo(object):
path = os.path.join(os.path.abspath(GIT_REPO), '.git')
assert_equal('<git.Repo "%s">' % path, repr(self.repo))
- @patch_object(Git, '_call_process')
- def test_log(self, git):
- git.return_value = fixture('rev_list')
- assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', self.repo.log()[0].id)
- assert_equal('ab25fd8483882c3bda8a458ad2965d2248654335', self.repo.log()[-1].id)
- assert_true(git.called)
- assert_equal(git.call_count, 2)
- assert_equal(git.call_args, (('log', 'master', '--'), {'pretty': 'raw'}))
-
- @patch_object(Git, '_call_process')
- def test_log_with_path_and_options(self, git):
- git.return_value = fixture('rev_list')
- self.repo.log('master', 'file.rb', **{'max_count': 1})
- assert_true(git.called)
- assert_equal(git.call_args, (('log', 'master', '--', 'file.rb'), {'pretty': 'raw', 'max_count': 1}))
-
def test_is_dirty_with_bare_repository(self):
self.repo.bare = True
assert_false(self.repo.is_dirty)
@@ -255,7 +215,7 @@ class TestRepo(object):
@patch_object(Git, '_call_process')
def test_active_branch(self, git):
git.return_value = 'refs/heads/major-refactoring'
- assert_equal(self.repo.active_branch, 'major-refactoring')
+ assert_equal(self.repo.active_branch.name, 'major-refactoring')
assert_equal(git.call_args, (('symbolic_ref', 'HEAD'), {}))
@patch_object(Git, '_call_process')
diff --git a/test/git/test_tree.py b/test/git/test_tree.py
index cb8ebb04..dafb6f3f 100644
--- a/test/git/test_tree.py
+++ b/test/git/test_tree.py
@@ -7,143 +7,38 @@
from test.testlib import *
from git import *
-class TestTree(object):
- def setup(self):
- self.repo = Repo(GIT_REPO)
-
- @patch_object(Git, '_call_process')
- def test_contents_should_cache(self, git):
- git.return_value = fixture('ls_tree_a') + fixture('ls_tree_b')
+class TestTree(TestCase):
- tree = self.repo.tree('master')
-
- child = tree['grit']
- child.items()
- child.items()
-
- assert_true(git.called)
- assert_equal(2, git.call_count)
- assert_equal(git.call_args, (('ls_tree', '34868e6e7384cb5ee51c543a8187fdff2675b5a7'), {}))
-
- def test_content_from_string_tree_should_return_tree(self):
- text = fixture('ls_tree_a').splitlines()[-1]
- tree = Tree.content__from_string(None, text)
-
- assert_equal(Tree, tree.__class__)
- assert_equal("650fa3f0c17f1edb4ae53d8dcca4ac59d86e6c44", tree.id)
- assert_equal(0,tree.mode) # git tree objects always use this mode
- assert_equal("test", tree.path)
-
- def test_content_from_string_tree_should_return_blob(self):
- text = fixture('ls_tree_b').split("\n")[0]
-
- tree = Tree.content__from_string(None, text)
-
- assert_equal(Blob, tree.__class__)
- assert_equal("aa94e396335d2957ca92606f909e53e7beaf3fbb", tree.id)
- assert_mode_644(tree.mode)
- assert_equal("grit.rb", tree.path)
+ def setUp(self):
+ self.repo = Repo(GIT_REPO)
- def test_content_from_string_tree_should_return_commit(self):
- text = fixture('ls_tree_commit').split("\n")[1]
-
- tree = Tree.content__from_string(None, text)
- assert_none(tree)
- @raises(TypeError)
- def test_content_from_string_invalid_type_should_raise(self):
- Tree.content__from_string(None, "040000 bogus 650fa3f0c17f1edb4ae53d8dcca4ac59d86e6c44 test")
-
- @patch_object(Blob, 'size')
- @patch_object(Git, '_call_process')
- def test_slash(self, git, blob):
- git.return_value = fixture('ls_tree_a')
- blob.return_value = 1
-
- tree = self.repo.tree('master')
-
- assert_equal('aa06ba24b4e3f463b3c4a85469d0fb9e5b421cf8', (tree/'lib').id)
- assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', (tree/'README.txt').id)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('ls_tree', 'master'), {}))
-
- @patch_object(Blob, 'size')
- @patch_object(Git, '_call_process')
- def test_slash_with_zero_length_file(self, git, blob):
- git.return_value = fixture('ls_tree_a')
- blob.return_value = 0
-
- tree = self.repo.tree('master')
-
- assert_not_none(tree/'README.txt')
- assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', (tree/'README.txt').id)
- assert_true(git.called)
- assert_equal(git.call_args, (('ls_tree', 'master'), {}))
+ def test_traverse(self):
+ root = self.repo.tree()
+ num_recursive = 0
+ all_items = list()
+ for obj in root.traverse():
+ if "/" in obj.path:
+ num_recursive += 1
+
+ assert isinstance(obj, (Blob, Tree))
+ all_items.append(obj)
+ # END for each object
+ # limit recursion level to 0 - should be same as default iteration
+ assert all_items
+ assert 'CHANGES' in root
+ assert len(list(root)) == len(list(root.traverse(max_depth=0)))
+
+ # only choose trees
+ trees_only = lambda i: i.type == "tree"
+ trees = list(root.traverse(predicate = trees_only))
+ assert len(trees) == len(list( i for i in root.traverse() if trees_only(i) ))
+
+ # trees and blobs
+ assert len(set(trees)|set(root.trees)) == len(trees)
+ assert len(set(b for b in root if isinstance(b, Blob)) | set(root.blobs)) == len( root.blobs )
- @patch_object(Git, '_call_process')
- def test_slash_with_commits(self, git):
- git.return_value = fixture('ls_tree_commit')
-
- tree = self.repo.tree('master')
-
- assert_none(tree/'bar')
- assert_equal('2afb47bcedf21663580d5e6d2f406f08f3f65f19', (tree/'foo').id)
- assert_equal('f623ee576a09ca491c4a27e48c0dfe04be5f4a2e', (tree/'baz').id)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('ls_tree', 'master'), {}))
-
- @patch_object(Blob, 'size')
- @patch_object(Git, '_call_process')
- def test_dict(self, git, blob):
- git.return_value = fixture('ls_tree_a')
- blob.return_value = 1
-
- tree = self.repo.tree('master')
-
- assert_equal('aa06ba24b4e3f463b3c4a85469d0fb9e5b421cf8', tree['lib'].id)
- assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', tree['README.txt'].id)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('ls_tree', 'master'), {}))
-
- @patch_object(Blob, 'size')
- @patch_object(Git, '_call_process')
- def test_dict_with_zero_length_file(self, git, blob):
- git.return_value = fixture('ls_tree_a')
- blob.return_value = 0
-
- tree = self.repo.tree('master')
-
- assert_not_none(tree['README.txt'])
- assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', tree['README.txt'].id)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('ls_tree', 'master'), {}))
-
- @patch_object(Git, '_call_process')
- def test_dict_with_commits(self, git):
- git.return_value = fixture('ls_tree_commit')
-
- tree = self.repo.tree('master')
-
- assert_none(tree.get('bar'))
- assert_equal('2afb47bcedf21663580d5e6d2f406f08f3f65f19', tree['foo'].id)
- assert_equal('f623ee576a09ca491c4a27e48c0dfe04be5f4a2e', tree['baz'].id)
-
- assert_true(git.called)
- assert_equal(git.call_args, (('ls_tree', 'master'), {}))
-
- @patch_object(Git, '_call_process')
- @raises(KeyError)
- def test_dict_with_non_existant_file(self, git):
- git.return_value = fixture('ls_tree_commit')
-
- tree = self.repo.tree('master')
- tree['bar']
-
def test_repr(self):
tree = Tree(self.repo, id='abc')
assert_equal('<git.Tree "abc">', repr(tree))
diff --git a/test/testlib/__init__.py b/test/testlib/__init__.py
index 2133eb8c..f364171b 100644
--- a/test/testlib/__init__.py
+++ b/test/testlib/__init__.py
@@ -8,6 +8,7 @@ import inspect
from mock import *
from asserts import *
from helper import *
+from unittest import TestCase
__all__ = [ name for name, obj in locals().items()
if not (name.startswith('_') or inspect.ismodule(obj)) ]
diff --git a/test/testlib/helper.py b/test/testlib/helper.py
index 74f48447..b66d3eaa 100644
--- a/test/testlib/helper.py
+++ b/test/testlib/helper.py
@@ -17,3 +17,14 @@ def fixture(name):
def absolute_project_path():
return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+
+
+class ListProcessAdapter(object):
+ """Allows to use lists as Process object as returned by SubProcess.Popen.
+ Its tailored to work with the test system only"""
+
+ def __init__(self, input_list_or_string):
+ l = input_list_or_string
+ if isinstance(l,basestring):
+ l = l.splitlines()
+ self.stdout = iter(l)