diff options
-rw-r--r-- | CHANGES | 34 | ||||
-rw-r--r-- | lib/git/cmd.py | 120 | ||||
-rw-r--r-- | lib/git/objects/base.py | 43 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 91 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 7 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 252 | ||||
-rw-r--r-- | lib/git/objects/utils.py (renamed from lib/git/objects/util.py) | 0 | ||||
-rw-r--r-- | lib/git/refs.py | 137 | ||||
-rw-r--r-- | lib/git/repo.py | 99 | ||||
-rw-r--r-- | lib/git/utils.py | 63 | ||||
-rw-r--r-- | test/git/test_base.py | 23 | ||||
-rw-r--r-- | test/git/test_blob.py | 49 | ||||
-rw-r--r-- | test/git/test_commit.py | 29 | ||||
-rw-r--r-- | test/git/test_git.py | 35 | ||||
-rw-r--r-- | test/git/test_performance.py | 38 | ||||
-rw-r--r-- | test/git/test_repo.py | 48 | ||||
-rw-r--r-- | test/git/test_tree.py | 159 | ||||
-rw-r--r-- | test/testlib/__init__.py | 1 | ||||
-rw-r--r-- | test/testlib/helper.py | 11 |
19 files changed, 741 insertions, 498 deletions
@@ -2,9 +2,8 @@ CHANGES ======= -0.1.X +0.2 ===== -( Future Release ) General ------- * file mode in Tree, Blob and Diff objects now is an int compatible to definintiions @@ -19,7 +18,16 @@ General * from_string and list_from_string methods are now private and were renamed to _from_string and _list_from_string respectively. As part of the private API, they may change without prior notice. - +* Renamed all find_all methods to list_items - this method is part of the Iterable interface + that also provides a more efficients and more responsive iter_items method + +Item Iteration +-------------- +* Previously one would return and process multiple items as list only which can + hurt performance and memory consumption and reduce response times. + iter_items method provide an iterator that will return items on demand as parsed + from a stream. This way any amount of objects can be handled. + objects Package ---------------- * blob, tree, tag and commit module have been moved to new objects package. This should @@ -29,6 +37,13 @@ objects Package Repo ---- * Moved blame method from Blob to repo as it appeared to belong there much more. +* active_branch method now returns a Head object instead of a string with the name + of the active branch. +* tree method now requires a Ref instance as input and defaults to the active_branche + instead of master +* Removed 'log' method as it as effectively the same as the 'commits' method +* 'commits' method has no max-count of returned commits anymore, it now behaves + like git-rev-list Diff ---- @@ -43,7 +58,18 @@ Blob Tree ---- * former 'name' member renamed to path as it suits the actual data better - +* added traverse method allowing to recursively traverse tree items +* deleted blob method +* added blobs and trees properties allowing to query the respective items in the + tree +* now mimics behaviour of a read-only list instead of a dict to maintain order. +* content_from_string method is now private and not part of the public API anymore + +Refs +---- +* Will dynmically retrieve their object at the time of query to assure the information + is actual. Recently objects would be cached, hence ref object not be safely kept + persistent. 0.1.6 ===== diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 940e35d1..2965eb8b 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -13,7 +13,7 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'with_raw_output') + 'with_exceptions', 'with_raw_output', 'as_process') extra = {} if sys.platform == 'win32': @@ -34,6 +34,34 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ + class AutoInterrupt(object): + """ + Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object + """ + __slots__= "proc" + + def __init__(self, proc ): + self.proc = proc + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid)) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + def __init__(self, git_dir=None): """ Initialize this instance with: @@ -44,6 +72,10 @@ class Git(object): """ super(Git, self).__init__() self.git_dir = git_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None def __getattr__(self, name): """ @@ -70,6 +102,7 @@ class Git(object): with_extended_output=False, with_exceptions=True, with_raw_output=False, + as_process=False ): """ Handles executing the command on the shell and consumes and returns @@ -96,6 +129,16 @@ class Git(object): ``with_raw_output`` Whether to avoid stripping off trailing whitespace. + + ``as_process`` + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output, + with_exceptions and with_raw_output ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. Returns:: @@ -127,7 +170,11 @@ class Git(object): **extra ) + if as_process: + return self.AutoInterrupt(proc) + # Wait for the process to return + status = 0 try: stdout_value = proc.stdout.read() stderr_value = proc.stderr.read() @@ -218,3 +265,74 @@ class Git(object): call.extend(args) return self.execute(call, **_kwargs) + + def _parse_object_header(self, header_line): + """ + ``header_line`` + <hex_sha> type_string size_as_int + + Returns + (hex_sha, type_string, size_as_int) + + Raises + ValueError if the header contains indication for an error due to incorrect + input sha + """ + tokens = header_line.split() + if len(tokens) != 3: + raise ValueError( "SHA named %s could not be resolved" % tokens[0] ) + + return (tokens[0], tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : subprocess.PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ + Use this method to quickly examine the type and size of the object behind + the given ref. + + NOTE + The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + Return: + (hexsha, type_string, size_as_int) + """ + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ + As get_object_header, but returns object data as well + + Return: + (hexsha, type_string, size_as_int,data_string) + """ + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + hexsha, typename, size = self.__get_object_header(cmd, ref) + data = cmd.stdout.read(size) + cmd.stdout.read(1) # finishing newlines + + return (hexsha, typename, size, data) diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 43aa8dd1..07538ada 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -4,32 +4,10 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os - -class LazyMixin(object): - lazy_properties = [] - __slots__ = tuple() +from git.utils import LazyMixin - def __getattr__(self, attr): - """ - Whenever an attribute is requested that we do not know, we allow it - to be created and set. Next time the same attribute is reqeusted, it is simply - returned from our dict/slots. - """ - self._set_cache_(attr) - # will raise in case the cache was not created - return object.__getattribute__(self, attr) +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" - def _set_cache_(self, attr): - """ This method should be overridden in the derived class. - It should check whether the attribute named by attr can be created - and cached. Do nothing if you do not know the attribute or call your subclass - - The derived class may create as many additional attributes as it deems - necessary in case a git command returns more information than represented - in the single attribute.""" - pass - - class Object(LazyMixin): """ Implements an Object which may be Blobs, Trees, Commits and Tags @@ -71,9 +49,13 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + hexsha, typename, self.size = self.repo.git.get_object_header(self.id) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) elif attr == "data": - self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + else: + super(Object,self)._set_cache_(attr) def __eq__(self, other): """ @@ -143,8 +125,15 @@ class IndexObject(Object): if isinstance(mode, basestring): self.mode = self._mode_str_to_int(mode) + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + @classmethod - def _mode_str_to_int( cls, modestr ): + def _mode_str_to_int(cls, modestr): """ ``modestr`` string like 755 or 644 or 100644 - only the last 3 chars will be used diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c3e97bf9..101014ab 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -6,14 +6,14 @@ import re import time - +from git.utils import Iterable from git.actor import Actor -from tree import Tree import git.diff as diff import git.stats as stats +from tree import Tree import base -class Commit(base.Object): +class Commit(base.Object, Iterable): """ Wraps a git Commit object. @@ -37,7 +37,7 @@ class Commit(base.Object): The parameter documentation indicates the type of the argument after a colon ':'. ``id`` - is the sha id of the commit + is the sha id of the commit or a ref ``parents`` : tuple( Commit, ... ) is a tuple of commit ids or actual Commits @@ -71,7 +71,7 @@ class Commit(base.Object): # END for each parent to convert if self.id and tree is not None: - self.tree = Tree(repo, id=tree) + self.tree = Tree(repo, id=tree, path='') # END id to tree conversion def _set_cache_(self, attr): @@ -80,8 +80,11 @@ class Commit(base.Object): to be set. We set all values at once. """ - if attr in self.__slots__: - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + if attr in Commit.__slots__: + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.id) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -120,7 +123,7 @@ class Commit(base.Object): return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) @classmethod - def find_all(cls, repo, ref, path='', **kwargs): + def iter_items(cls, repo, ref, path='', **kwargs): """ Find all commits matching the given criteria. @@ -128,7 +131,7 @@ class Commit(base.Object): is the Repo ``ref`` - is the ref from which to begin (SHA1 or name) + is the ref from which to begin (SHA1, Head or name) ``path`` is an optinal path, if set only Commits that include the path @@ -140,55 +143,67 @@ class Commit(base.Object): ``skip`` is the number of commits to skip Returns - git.Commit[] + iterator yielding Commit items """ - options = {'pretty': 'raw'} + options = {'pretty': 'raw', 'as_process' : True } options.update(kwargs) - output = repo.git.rev_list(ref, '--', path, **options) - return cls._list_from_string(repo, output) + # the test system might confront us with string values - + proc = repo.git.rev_list(ref, '--', path, **options) + return cls._iter_from_process_or_stream(repo, proc) @classmethod - def _list_from_string(cls, repo, text): + def _iter_from_process_or_stream(cls, repo, proc_or_stream): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``text`` - is the text output from the git-rev-list command (raw format) + ``proc`` + git-rev-list process instance (raw format) Returns - git.Commit[] + iterator returning Commit objects """ - lines =text.splitlines(False) - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + + for line in stream: + id = line.split()[1] + assert line.split()[0] == "commit" + tree = stream.next().split()[1] parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - # END while there are parent lines - author, authored_date = cls._actor(lines.pop(0)) - committer, committed_date = cls._actor(lines.pop(0)) + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = cls._actor(next_line) + committer, committed_date = cls._actor(stream.next()) - # free line - lines.pop(0) + # empty line + stream.next() message_lines = [] - while lines and not lines[0].startswith('commit'): - message_lines.append(lines.pop(0).strip()) + next_line = None + for msg_line in stream: + if not msg_line.startswith(' '): + break + # END abort message reading + message_lines.append(msg_line.strip()) # END while there are message lines - message = '\n'.join(message_lines[:-1]) # last line is empty - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - # END while lines - return commits + message = '\n'.join(message_lines) + + yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream @classmethod def diff(cls, repo, a, b=None, paths=None): diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index af1022f0..ecf6349d 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -8,7 +8,7 @@ Module containing all object based types. """ import base import commit -from util import get_object_type_by_name +from utils import get_object_type_by_name class TagObject(base.Object): """ @@ -48,9 +48,8 @@ class TagObject(base.Object): """ Cache all our attributes at once """ - if attr in self.__slots__: - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") + if attr in TagObject.__slots__: + lines = self.data.splitlines() obj, hexsha = lines[0].split(" ") # object <hexsha> type_token, type_name = lines[1].split(" ") # type <type_name> diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 273384a3..abfa9622 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -7,53 +7,125 @@ import os import blob import base +import binascii + +def sha_to_hex(sha): + """Takes a string and returns the hex of the sha within""" + hexsha = binascii.hexlify(sha) + assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha + return hexsha class Tree(base.IndexObject): + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ type = "tree" - __slots__ = "_contents" + __slots__ = "_cache" + + # using ascii codes for comparison + ascii_commit_id = (0x31 << 4) + 0x36 + ascii_blob_id = (0x31 << 4) + 0x30 + ascii_tree_id = (0x34 << 4) + 0x30 - def __init__(self, repo, id, mode=None, path=None): + + def __init__(self, repo, id, mode=0, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): - if attr == "_contents": - # Read the tree contents. - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content__from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache() else: super(Tree, self)._set_cache_(attr) - @staticmethod - def content__from_string(repo, text): + def _get_tree_cache(self): """ - Parse a content item and create the appropriate object - - ``repo`` - is the Repo - - ``text`` - is the single line containing the items data in `git ls-tree` format - + Return + list(object_instance, ...) + + ``treeish`` + sha or ref identifying a tree + """ + out = list() + for obj in self._iter_from_data(): + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + def _iter_from_data(self): + """ + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + Returns - ``git.Blob`` or ``git.Tree`` + list(IndexObject, ...) """ - try: - mode, typ, id, path = text.expandtabs(1).split(" ", 3) - except: - return None + ord_zero = ord('0') + data = self.data + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + mode_boundary = i + 6 + + # keep it ascii - we compare against the respective values + type_id = (ord(data[i])<<4) + ord(data[i+1]) + i += 2 + + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.ascii_blob_id: + yield blob.Blob(self.repo, hexsha, mode, name) + elif type_id == self.ascii_tree_id: + yield Tree(self.repo, hexsha, mode, name) + elif type_id == self.ascii_commit_id: + # todo + yield None + else: + raise TypeError( "Unknown type found in tree data: %i" % type_id ) + # END for each byte in data stream - if typ == "tree": - return Tree(repo, id, mode, path) - elif typ == "blob": - return blob.Blob(repo, id, mode, path) - elif typ == "commit": - return None - else: - raise(TypeError, "Invalid type: %s" % typ) def __div__(self, file): """ @@ -67,36 +139,104 @@ class Tree(base.IndexObject): <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> Returns - ``git.Blob`` or ``git.Tree`` or ``None`` if not found + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree """ - return self.get(file) + return self[file] def __repr__(self): return '<git.Tree "%s">' % self.id + + @classmethod + def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ): + + for obj in tree: + # adjust path to be complete + obj.path = os.path.join(tree.path, obj.path) + if not predicate(obj): + continue + yield obj + if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ): + for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ): + yield recursive_obj + # END for each recursive object + # END if we may enter recursion + # END for each object + + def traverse(self, max_depth=-1, predicate = lambda i: True): + """ + Returns + Iterator to traverse the tree recursively up to the given level. + The iterator returns Blob and Tree objects + + ``max_depth`` + + if -1, the whole tree will be traversed + if 0, only the first level will be traversed which is the same as + the default non-recursive iterator + + ``predicate`` + + If predicate(item) returns True, item will be returned by iterator + """ + return self._iter_recursive( self.repo, self, 0, max_depth, predicate ) + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] - # Implement the basics of the dict protocol: - # directories/trees can be seen as object dicts. - def __getitem__(self, key): - return self._contents[key] + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + def __iter__(self): - return iter(self._contents) - + return iter(self._cache) + def __len__(self): - return len(self._contents) - - def __contains__(self, key): - return key in self._contents - - def get(self, key): - return self._contents.get(key) - - def items(self): - return self._contents.items() - - def keys(self): - return self._contents.keys() - - def values(self): - return self._contents.values() + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + for obj in self._cache: + if obj.path == item: + return obj + # END for each obj + raise KeyError( "Blob or Tree named %s not found" % item ) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) diff --git a/lib/git/objects/util.py b/lib/git/objects/utils.py index 15c1d114..15c1d114 100644 --- a/lib/git/objects/util.py +++ b/lib/git/objects/utils.py diff --git a/lib/git/refs.py b/lib/git/refs.py index 820150d3..3c9eb817 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -7,17 +7,20 @@ Module containing all ref based objects """ from objects.base import Object -from objects.util import get_object_type_by_name +from objects.utils import get_object_type_by_name +from utils import LazyMixin, Iterable -class Ref(object): +class Ref(LazyMixin, Iterable): """ Represents a named reference to any object """ - __slots__ = ("path", "object") + __slots__ = ("repo", "path") - def __init__(self, path, object = None): + def __init__(self, repo, path, object = None): """ Initialize this instance + ``repo`` + Our parent repository ``path`` Path relative to the .git/ directory pointing to the ref in question, i.e. @@ -26,8 +29,10 @@ class Ref(object): ``object`` Object instance, will be retrieved on demand if None """ + self.repo = repo self.path = path - self.object = object + if object is not None: + self.object = object def __str__(self): return self.name @@ -57,9 +62,20 @@ class Ref(object): return self.path # could be refs/HEAD return '/'.join(tokens[2:]) - + + @property + def object(self): + """ + Returns + The object our ref currently refers to. Refs can be cached, they will + always point to the actual object as it gets re-created on each query + """ + # have to be dynamic here as we may be a tag which can point to anything + hexsha, typename, size = self.repo.git.get_object_header(self.path) + return get_object_type_by_name(typename)(self.repo, hexsha) + @classmethod - def find_all(cls, repo, common_path = "refs", **kwargs): + def iter_items(cls, repo, common_path = "refs", **kwargs): """ Find all refs in the repository @@ -88,54 +104,38 @@ class Ref(object): options.update(kwargs) output = repo.git.for_each_ref(common_path, **options) - return cls._list_from_string(repo, output) + return cls._iter_from_stream(repo, iter(output.splitlines())) @classmethod - def _list_from_string(cls, repo, text): - """ - Parse out ref information into a list of Ref compatible objects - - ``repo`` - is the Repo - ``text`` - is the text output from the git-for-each-ref command - - Returns - git.Ref[] - - list of Ref objects - """ + def _iter_from_stream(cls, repo, stream): + """ Parse out ref information into a list of Ref compatible objects + Returns git.Ref[] list of Ref objects """ heads = [] - for line in text.splitlines(): + for line in stream: heads.append(cls._from_string(repo, line)) return heads @classmethod def _from_string(cls, repo, line): - """ - Create a new Ref instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted ref information - - Format:: - + """ Create a new Ref instance from the given string. + Format name: [a-zA-Z_/]+ <null byte> id: [0-9A-Fa-f]{40} - - Returns - git.Head - """ + Returns git.Head """ full_path, hexsha, type_name, object_size = line.split("\x00") - obj = get_object_type_by_name(type_name)(repo, hexsha) - obj.size = object_size - return cls(full_path, obj) + + # No, we keep the object dynamic by allowing it to be retrieved by + # our path on demand - due to perstent commands it is fast. + # This reduces the risk that the object does not match + # the changed ref anymore in case it changes in the meanwhile + return cls(repo, full_path) + + # obj = get_object_type_by_name(type_name)(repo, hexsha) + # obj.size = object_size + # return cls(repo, full_path, obj) class Head(Ref): @@ -167,14 +167,14 @@ class Head(Ref): return self.object @classmethod - def find_all(cls, repo, common_path = "refs/heads", **kwargs): + def iter_items(cls, repo, common_path = "refs/heads", **kwargs): """ Returns - git.Head[] + Iterator yielding Head items - For more documentation, please refer to git.base.Ref.find_all + For more documentation, please refer to git.base.Ref.list_items """ - return super(Head,cls).find_all(repo, common_path, **kwargs) + return super(Head,cls).iter_items(repo, common_path, **kwargs) def __repr__(self): return '<git.Head "%s">' % self.name @@ -190,30 +190,13 @@ class TagRef(Ref): This tag object will always point to a commit object, but may carray additional information in a tag object:: - tagref = TagRef.find_all(repo)[0] + tagref = TagRef.list_items(repo)[0] print tagref.commit.message if tagref.tag is not None: print tagref.tag.message """ - __slots__ = "tag" - - def __init__(self, path, commit_or_tag): - """ - Initialize a newly instantiated Tag - - ``path`` - is the full path to the tag - - ``commit_or_tag`` - is the Commit or TagObject that this tag ref points to - """ - super(TagRef, self).__init__(path, commit_or_tag) - self.tag = None - - if commit_or_tag.type == "tag": - self.tag = commit_or_tag - # END tag object handling + __slots__ = tuple() @property def commit(self): @@ -223,18 +206,32 @@ class TagRef(Ref): """ if self.object.type == "commit": return self.object - # it is a tag object - return self.object.object + elif self.object.type == "tag": + # it is a tag object which carries the commit as an object - we can point to anything + return self.object.object + else: + raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) + + @property + def tag(self): + """ + Returns + Tag object this tag ref points to or None in case + we are a light weight tag + """ + if self.object.type == "tag": + return self.object + return None @classmethod - def find_all(cls, repo, common_path = "refs/tags", **kwargs): + def iter_items(cls, repo, common_path = "refs/tags", **kwargs): """ Returns - git.Tag[] + Iterator yielding commit items - For more documentation, please refer to git.base.Ref.find_all + For more documentation, please refer to git.base.Ref.list_items """ - return super(TagRef,cls).find_all(repo, common_path, **kwargs) + return super(TagRef,cls).iter_items(repo, common_path, **kwargs) # provide an alias diff --git a/lib/git/repo.py b/lib/git/repo.py index dd5acfc3..c74c7e8d 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -102,7 +102,7 @@ class Repo(object): Returns ``git.Head[]`` """ - return Head.find_all(self) + return Head.list_items(self) # alias heads branches = heads @@ -115,7 +115,7 @@ class Repo(object): Returns ``git.Tag[]`` """ - return Tag.find_all(self) + return Tag.list_items(self) def blame(self, commit, file): """ @@ -197,7 +197,7 @@ class Repo(object): # END distinguish hexsha vs other information return blames - def commits(self, start='master', path='', max_count=10, skip=0): + def commits(self, start='master', path='', max_count=None, skip=0): """ A list of Commit objects representing the history of a given ref/commit @@ -209,7 +209,7 @@ class Repo(object): Commits that do not contain that path will not be returned. ``max_count`` - is the maximum number of commits to return (default 10) + is the maximum number of commits to return (default None) ``skip`` is the number of commits to skip (default 0) which will effectively @@ -220,8 +220,11 @@ class Repo(object): """ options = {'max_count': max_count, 'skip': skip} - - return Commit.find_all(self, start, path, **options) + + if max_count is None: + options.pop('max_count') + + return Commit.list_items(self, start, path, **options) def commits_between(self, frm, to): """ @@ -237,7 +240,7 @@ class Repo(object): Returns ``git.Commit[]`` """ - return reversed(Commit.find_all(self, "%s..%s" % (frm, to))) + return reversed(Commit.list_items(self, "%s..%s" % (frm, to))) def commits_since(self, start='master', path='', since='1970-01-01'): """ @@ -259,7 +262,7 @@ class Repo(object): """ options = {'since': since} - return Commit.find_all(self, start, path, **options) + return Commit.list_items(self, start, path, **options) def commit_count(self, start='master', path=''): """ @@ -277,12 +280,14 @@ class Repo(object): """ return Commit.count(self, start, path) - def commit(self, id, path = ''): + def commit(self, id=None, path = ''): """ The Commit object for the specified id ``id`` - is the SHA1 identifier of the commit + is the SHA1 identifier of the commit or a ref or a ref name + if None, it defaults to the active branch + ``path`` is an optional path, if set the returned commit must contain the path. @@ -290,9 +295,11 @@ class Repo(object): Returns ``git.Commit`` """ + if id is None: + id = self.active_branch options = {'max_count': 1} - commits = Commit.find_all(self, id, path, **options) + commits = Commit.list_items(self, id, path, **options) if not commits: raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) @@ -309,55 +316,47 @@ class Repo(object): other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() diff_refs = list(set(other_repo_refs) - set(repo_refs)) - return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs) + return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs) - def tree(self, treeish='master'): + def tree(self, treeish=None): """ The Tree object for the given treeish reference ``treeish`` - is the reference (default 'master') + is a Ref instance defaulting to the active_branch if None. Examples:: - repo.tree('master') - + repo.tree(repo.heads[0]) Returns ``git.Tree`` + + NOTE + A ref is requried here to assure you point to a commit or tag. Otherwise + it is not garantueed that you point to the root-level tree. + + If you need a non-root level tree, find it by iterating the root tree. """ - return Tree(self, id=treeish) - - def blob(self, id): - """ - The Blob object for the given id - - ``id`` - is the SHA1 id of the blob - - Returns - ``git.Blob`` - """ - return Blob(self, id=id) - - def log(self, commit='master', path=None, **kwargs): - """ - The Commit for a treeish, and all commits leading to it. + if treeish is None: + treeish = self.active_branch + if not isinstance(treeish, Ref): + raise ValueError( "Treeish reference required, got %r" % treeish ) - ``kwargs`` - keyword arguments specifying flags to be used in git-log command, - i.e.: max_count=1 to limit the amount of commits returned + + # As we are directly reading object information, we must make sure + # we truly point to a tree object. We resolve the ref to a sha in all cases + # to assure the returned tree can be compared properly. Except for + # heads, ids should always be hexshas + hexsha, typename, size = self.git.get_object_header( treeish ) + if typename != "tree": + hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' ) + # END tree handling + treeish = hexsha + + # the root has an empty relative path and the default mode + return Tree(self, treeish, 0, '') - Returns - ``git.Commit[]`` - """ - options = {'pretty': 'raw'} - options.update(kwargs) - arg = [commit, '--'] - if path: - arg.append(path) - commits = self.git.log(*arg, **options) - return Commit._list_from_string(self, commits) def diff(self, a, b, *paths): """ @@ -588,13 +587,9 @@ class Repo(object): The name of the currently active branch. Returns - str (the branch name) + Head to the active branch """ - branch = self.git.symbolic_ref('HEAD').strip() - if branch.startswith('refs/heads/'): - branch = branch[len('refs/heads/'):] - - return branch + return Head( self, self.git.symbolic_ref('HEAD').strip() ) def __repr__(self): return '<git.Repo "%s">' % self.path diff --git a/lib/git/utils.py b/lib/git/utils.py index c204c432..f84c247d 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -24,3 +24,66 @@ def is_git_dir(d): (os.path.islink(headref) and os.readlink(headref).startswith('refs')) return False + + +class LazyMixin(object): + """ + Base class providing an interface to lazily retrieve attribute values upon + first access. If slots are used, memory will only be reserved once the attribute + is actually accessed and retrieved the first time. All future accesses will + return the cached value as stored in the Instance's dict or slot. + """ + __slots__ = tuple() + + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) + + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass + + +class Iterable(object): + """ + Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository + """ + __slots__ = tuple() + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + Note: Favor the iter_items method as it will + + Returns: + list(Item,...) list of item instances + """ + return list(cls.iter_items(repo, *args, **kwargs)) + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """ + For more information about the arguments, see list_items + Return: + iterator yielding Items + """ + raise NotImplementedError("To be implemented by Subclass") + diff --git a/test/git/test_base.py b/test/git/test_base.py index a153eb83..402cdba3 100644 --- a/test/git/test_base.py +++ b/test/git/test_base.py @@ -10,7 +10,7 @@ from git import * import git.objects.base as base import git.refs as refs from itertools import chain -from git.objects.util import get_object_type_by_name +from git.objects.utils import get_object_type_by_name class TestBase(object): @@ -24,14 +24,14 @@ class TestBase(object): def test_base_object(self): # test interface of base object classes - fcreators = (self.repo.blob, self.repo.tree, self.repo.commit, lambda id: TagObject(self.repo,id) ) - assert len(fcreators) == len(self.type_tuples) + types = (Blob, Tree, Commit, TagObject) + assert len(types) == len(self.type_tuples) s = set() num_objs = 0 num_index_objs = 0 - for fcreator, (typename, hexsha) in zip(fcreators, self.type_tuples): - item = fcreator(hexsha) + for obj_type, (typename, hexsha) in zip(types, self.type_tuples): + item = obj_type(self.repo,hexsha) num_objs += 1 assert item.id == hexsha assert item.type == typename @@ -53,6 +53,7 @@ class TestBase(object): # each has a unique sha assert len(s) == num_objs + assert len(s|s) == num_objs assert num_index_objs == 2 @@ -70,6 +71,18 @@ class TestBase(object): s.add(ref) # END for each ref assert len(s) == ref_count + assert len(s|s) == ref_count + + def test_heads(self): + # see how it dynmically updates its object + for head in self.repo.heads: + head.name + head.path + prev_object = head.object + cur_object = head.object + assert prev_object == cur_object # represent the same git object + assert prev_object is not cur_object # but are different instances + # END for each head def test_get_object_type_by_name(self): for tname in base.Object.TYPES: diff --git a/test/git/test_blob.py b/test/git/test_blob.py index ebb53d0c..266f3a23 100644 --- a/test/git/test_blob.py +++ b/test/git/test_blob.py @@ -12,51 +12,14 @@ class TestBlob(object): def setup(self): self.repo = Repo(GIT_REPO) - @patch_object(Git, '_call_process') - def test_should_return_blob_contents(self, git): - git.return_value = fixture('cat_file_blob') - blob = Blob(self.repo, **{'id': 'abc'}) - assert_equal("Hello world", blob.data) - assert_true(git.called) - assert_equal(git.call_args, (('cat_file', 'abc'), {'p': True, 'with_raw_output': True})) - - @patch_object(Git, '_call_process') - def test_should_return_blob_contents_with_newline(self, git): - git.return_value = fixture('cat_file_blob_nl') - blob = Blob(self.repo, **{'id': 'abc'}) - assert_equal("Hello world\n", blob.data) - assert_true(git.called) - assert_equal(git.call_args, (('cat_file', 'abc'), {'p': True, 'with_raw_output': True})) - - @patch_object(Git, '_call_process') - def test_should_cache_data(self, git): - git.return_value = fixture('cat_file_blob') - bid = '787b92b63f629398f3d2ceb20f7f0c2578259e84' + def test_should_cache_data(self): + bid = 'a802c139d4767c89dcad79d836d05f7004d39aac' blob = Blob(self.repo, bid) blob.data - blob.data - assert_true(git.called) - assert_equal(git.call_count, 1) - assert_equal(git.call_args, (('cat_file', bid), {'p': True, 'with_raw_output': True})) - - @patch_object(Git, '_call_process') - def test_should_return_file_size(self, git): - git.return_value = fixture('cat_file_blob_size') - blob = Blob(self.repo, **{'id': 'abc'}) - assert_equal(11, blob.size) - assert_true(git.called) - assert_equal(git.call_args, (('cat_file', 'abc'), {'s': True})) - - @patch_object(Git, '_call_process') - def test_should_cache_file_size(self, git): - git.return_value = fixture('cat_file_blob_size') - blob = Blob(self.repo, **{'id': 'abc'}) - assert_equal(11, blob.size) - assert_equal(11, blob.size) - assert_true(git.called) - assert_equal(git.call_count, 1) - assert_equal(git.call_args, (('cat_file', 'abc'), {'s': True})) - + assert blob.data + blob.size + blob.size + def test_mime_type_should_return_mime_type_for_known_types(self): blob = Blob(self.repo, **{'id': 'abc', 'path': 'foo.png'}) assert_equal("image/png", blob.mime_type) diff --git a/test/git/test_commit.py b/test/git/test_commit.py index fa49821d..a95fb675 100644 --- a/test/git/test_commit.py +++ b/test/git/test_commit.py @@ -11,18 +11,13 @@ class TestCommit(object): def setup(self): self.repo = Repo(GIT_REPO) - @patch_object(Git, '_call_process') - def test_bake(self, git): - git.return_value = fixture('rev_list_single') + def test_bake(self): - commit = Commit(self.repo, **{'id': '4c8124ffcf4039d292442eeccabdeca5af5c5017'}) + commit = Commit(self.repo, **{'id': '2454ae89983a4496a445ce347d7a41c0bb0ea7ae'}) commit.author # bake - assert_equal("Tom Preston-Werner", commit.author.name) - assert_equal("tom@mojombo.com", commit.author.email) - - assert_true(git.called) - assert_equal(git.call_args, (('rev_list', '4c8124ffcf4039d292442eeccabdeca5af5c5017', '--', ''), {'pretty': 'raw', 'max_count': 1})) + assert_equal("Sebastian Thiel", commit.author.name) + assert_equal("byronimo@gmail.com", commit.author.email) @patch_object(Git, '_call_process') @@ -159,17 +154,10 @@ class TestCommit(object): assert diff.deleted_file and isinstance(diff.deleted_file, bool) # END for each diff in initial import commit - @patch_object(Git, '_call_process') - def test_diffs_on_initial_import_with_empty_commit(self, git): - git.return_value = fixture('show_empty_commit') - - commit = Commit(self.repo, id='634396b2f541a9f2d58b00be1a07f0c358b999b3') + def test_diffs_on_initial_import_without_parents(self): + commit = Commit(self.repo, id='33ebe7acec14b25c5f84f35a664803fcab2f7781') diffs = commit.diffs - - assert_equal([], diffs) - - assert_true(git.called) - assert_equal(git.call_args, (('show', '634396b2f541a9f2d58b00be1a07f0c358b999b3', '-M'), {'full_index': True, 'pretty': 'raw'})) + assert diffs def test_diffs_with_mode_only_change(self): commit = Commit(self.repo, id='ccde80b7a3037a004a7807a6b79916ce2a1e9729') @@ -216,7 +204,7 @@ class TestCommit(object): bisect_all=True) assert_true(git.called) - commits = Commit._list_from_string(self.repo, revs) + commits = Commit._iter_from_process_or_stream(self.repo, ListProcessAdapter(revs)) expected_ids = ( 'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b', '33ebe7acec14b25c5f84f35a664803fcab2f7781', @@ -241,3 +229,4 @@ class TestCommit(object): commit3 = Commit(self.repo, id='zyx') assert_equal(commit1, commit2) assert_not_equal(commit2, commit3) + diff --git a/test/git/test_git.py b/test/git/test_git.py index c9f399cc..1f44aebc 100644 --- a/test/git/test_git.py +++ b/test/git/test_git.py @@ -10,8 +10,7 @@ from git import Git, GitCommandError class TestGit(object): def setup(self): - base = os.path.join(os.path.dirname(__file__), "../..") - self.git = Git(base) + self.git = Git(GIT_REPO) @patch_object(Git, 'execute') def test_call_process_calls_execute(self, git): @@ -56,3 +55,35 @@ class TestGit(object): # this_should_not_be_ignored=False implies it *should* be ignored output = self.git.version(pass_this_kwarg=False) assert_true("pass_this_kwarg" not in git.call_args[1]) + + def test_persistent_cat_file_command(self): + # read header only + import subprocess as sp + hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167" + g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True) + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + obj_info = g.stdout.readline() + + # read header + data + g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True) + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + obj_info_two = g.stdout.readline() + assert obj_info == obj_info_two + + # read data - have to read it in one large chunk + size = int(obj_info.split()[2]) + data = g.stdout.read(size) + terminating_newline = g.stdout.read(1) + + # now we should be able to read a new object + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + assert g.stdout.readline() == obj_info + + + # same can be achived using the respective command functions + hexsha, typename, size = self.git.get_object_header(hexsha) + hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha) + assert typename == typename_two and size == size_two diff --git a/test/git/test_performance.py b/test/git/test_performance.py new file mode 100644 index 00000000..96f13a2e --- /dev/null +++ b/test/git/test_performance.py @@ -0,0 +1,38 @@ +# test_performance.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from test.testlib import * +from git import * +from time import time + +class TestPerformance(object): + def setup(self): + self.repo = Repo(GIT_REPO) + + def test_iteration(self): + num_objs = 0 + num_commits = 0 + + # find the first commit containing the given path - always do a full + # iteration ( restricted to the path in question ), but in fact it should + # return quite a lot of commits, we just take one and hence abort the operation + + st = time() + for c in self.repo.commits(): + num_commits += 1 + c.author + c.authored_date + c.committer + c.committed_date + c.message + for obj in c.tree.traverse(): + obj.size + num_objs += 1 + # END for each object + # END for each commit + elapsed_time = time() - st + print "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (num_commits, num_objs, elapsed_time, num_objs/elapsed_time) + diff --git a/test/git/test_repo.py b/test/git/test_repo.py index 3e2fb3dc..e998ac6d 100644 --- a/test/git/test_repo.py +++ b/test/git/test_repo.py @@ -41,7 +41,7 @@ class TestRepo(object): @patch_object(Git, '_call_process') def test_commits(self, git): - git.return_value = fixture('rev_list') + git.return_value = ListProcessAdapter(fixture('rev_list')) commits = self.repo.commits('master', max_count=10) @@ -65,7 +65,6 @@ class TestRepo(object): assert_equal("Merge branch 'site'", c.summary) assert_true(git.called) - assert_equal(git.call_args, (('rev_list', 'master', '--', ''), {'skip': 0, 'pretty': 'raw', 'max_count': 10})) @patch_object(Git, '_call_process') def test_commit_count(self, git): @@ -78,37 +77,14 @@ class TestRepo(object): @patch_object(Git, '_call_process') def test_commit(self, git): - git.return_value = fixture('rev_list_single') + git.return_value = ListProcessAdapter(fixture('rev_list_single')) commit = self.repo.commit('4c8124ffcf4039d292442eeccabdeca5af5c5017') assert_equal("4c8124ffcf4039d292442eeccabdeca5af5c5017", commit.id) assert_true(git.called) - assert_equal(git.call_args, (('rev_list', '4c8124ffcf4039d292442eeccabdeca5af5c5017', '--', ''), {'pretty': 'raw', 'max_count': 1})) - - @patch_object(Git, '_call_process') - def test_tree(self, git): - git.return_value = fixture('ls_tree_a') - - tree = self.repo.tree('master') - - assert_equal(4, len([c for c in tree.values() if isinstance(c, Blob)])) - assert_equal(3, len([c for c in tree.values() if isinstance(c, Tree)])) - - assert_true(git.called) - assert_equal(git.call_args, (('ls_tree', 'master'), {})) - - @patch_object(Git, '_call_process') - def test_blob(self, git): - git.return_value = fixture('cat_file_blob') - - blob = self.repo.blob("abc") - assert_equal("Hello world", blob.data) - - assert_true(git.called) - assert_equal(git.call_args, (('cat_file', 'abc'), {'p': True, 'with_raw_output': True})) - + @patch_object(Repo, '__init__') @patch_object(Git, '_call_process') def test_init_bare(self, git, repo): @@ -218,22 +194,6 @@ class TestRepo(object): path = os.path.join(os.path.abspath(GIT_REPO), '.git') assert_equal('<git.Repo "%s">' % path, repr(self.repo)) - @patch_object(Git, '_call_process') - def test_log(self, git): - git.return_value = fixture('rev_list') - assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', self.repo.log()[0].id) - assert_equal('ab25fd8483882c3bda8a458ad2965d2248654335', self.repo.log()[-1].id) - assert_true(git.called) - assert_equal(git.call_count, 2) - assert_equal(git.call_args, (('log', 'master', '--'), {'pretty': 'raw'})) - - @patch_object(Git, '_call_process') - def test_log_with_path_and_options(self, git): - git.return_value = fixture('rev_list') - self.repo.log('master', 'file.rb', **{'max_count': 1}) - assert_true(git.called) - assert_equal(git.call_args, (('log', 'master', '--', 'file.rb'), {'pretty': 'raw', 'max_count': 1})) - def test_is_dirty_with_bare_repository(self): self.repo.bare = True assert_false(self.repo.is_dirty) @@ -255,7 +215,7 @@ class TestRepo(object): @patch_object(Git, '_call_process') def test_active_branch(self, git): git.return_value = 'refs/heads/major-refactoring' - assert_equal(self.repo.active_branch, 'major-refactoring') + assert_equal(self.repo.active_branch.name, 'major-refactoring') assert_equal(git.call_args, (('symbolic_ref', 'HEAD'), {})) @patch_object(Git, '_call_process') diff --git a/test/git/test_tree.py b/test/git/test_tree.py index cb8ebb04..dafb6f3f 100644 --- a/test/git/test_tree.py +++ b/test/git/test_tree.py @@ -7,143 +7,38 @@ from test.testlib import * from git import * -class TestTree(object): - def setup(self): - self.repo = Repo(GIT_REPO) - - @patch_object(Git, '_call_process') - def test_contents_should_cache(self, git): - git.return_value = fixture('ls_tree_a') + fixture('ls_tree_b') +class TestTree(TestCase): - tree = self.repo.tree('master') - - child = tree['grit'] - child.items() - child.items() - - assert_true(git.called) - assert_equal(2, git.call_count) - assert_equal(git.call_args, (('ls_tree', '34868e6e7384cb5ee51c543a8187fdff2675b5a7'), {})) - - def test_content_from_string_tree_should_return_tree(self): - text = fixture('ls_tree_a').splitlines()[-1] - tree = Tree.content__from_string(None, text) - - assert_equal(Tree, tree.__class__) - assert_equal("650fa3f0c17f1edb4ae53d8dcca4ac59d86e6c44", tree.id) - assert_equal(0,tree.mode) # git tree objects always use this mode - assert_equal("test", tree.path) - - def test_content_from_string_tree_should_return_blob(self): - text = fixture('ls_tree_b').split("\n")[0] - - tree = Tree.content__from_string(None, text) - - assert_equal(Blob, tree.__class__) - assert_equal("aa94e396335d2957ca92606f909e53e7beaf3fbb", tree.id) - assert_mode_644(tree.mode) - assert_equal("grit.rb", tree.path) + def setUp(self): + self.repo = Repo(GIT_REPO) - def test_content_from_string_tree_should_return_commit(self): - text = fixture('ls_tree_commit').split("\n")[1] - - tree = Tree.content__from_string(None, text) - assert_none(tree) - @raises(TypeError) - def test_content_from_string_invalid_type_should_raise(self): - Tree.content__from_string(None, "040000 bogus 650fa3f0c17f1edb4ae53d8dcca4ac59d86e6c44 test") - - @patch_object(Blob, 'size') - @patch_object(Git, '_call_process') - def test_slash(self, git, blob): - git.return_value = fixture('ls_tree_a') - blob.return_value = 1 - - tree = self.repo.tree('master') - - assert_equal('aa06ba24b4e3f463b3c4a85469d0fb9e5b421cf8', (tree/'lib').id) - assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', (tree/'README.txt').id) - - assert_true(git.called) - assert_equal(git.call_args, (('ls_tree', 'master'), {})) - - @patch_object(Blob, 'size') - @patch_object(Git, '_call_process') - def test_slash_with_zero_length_file(self, git, blob): - git.return_value = fixture('ls_tree_a') - blob.return_value = 0 - - tree = self.repo.tree('master') - - assert_not_none(tree/'README.txt') - assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', (tree/'README.txt').id) - assert_true(git.called) - assert_equal(git.call_args, (('ls_tree', 'master'), {})) + def test_traverse(self): + root = self.repo.tree() + num_recursive = 0 + all_items = list() + for obj in root.traverse(): + if "/" in obj.path: + num_recursive += 1 + + assert isinstance(obj, (Blob, Tree)) + all_items.append(obj) + # END for each object + # limit recursion level to 0 - should be same as default iteration + assert all_items + assert 'CHANGES' in root + assert len(list(root)) == len(list(root.traverse(max_depth=0))) + + # only choose trees + trees_only = lambda i: i.type == "tree" + trees = list(root.traverse(predicate = trees_only)) + assert len(trees) == len(list( i for i in root.traverse() if trees_only(i) )) + + # trees and blobs + assert len(set(trees)|set(root.trees)) == len(trees) + assert len(set(b for b in root if isinstance(b, Blob)) | set(root.blobs)) == len( root.blobs ) - @patch_object(Git, '_call_process') - def test_slash_with_commits(self, git): - git.return_value = fixture('ls_tree_commit') - - tree = self.repo.tree('master') - - assert_none(tree/'bar') - assert_equal('2afb47bcedf21663580d5e6d2f406f08f3f65f19', (tree/'foo').id) - assert_equal('f623ee576a09ca491c4a27e48c0dfe04be5f4a2e', (tree/'baz').id) - - assert_true(git.called) - assert_equal(git.call_args, (('ls_tree', 'master'), {})) - - @patch_object(Blob, 'size') - @patch_object(Git, '_call_process') - def test_dict(self, git, blob): - git.return_value = fixture('ls_tree_a') - blob.return_value = 1 - - tree = self.repo.tree('master') - - assert_equal('aa06ba24b4e3f463b3c4a85469d0fb9e5b421cf8', tree['lib'].id) - assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', tree['README.txt'].id) - - assert_true(git.called) - assert_equal(git.call_args, (('ls_tree', 'master'), {})) - - @patch_object(Blob, 'size') - @patch_object(Git, '_call_process') - def test_dict_with_zero_length_file(self, git, blob): - git.return_value = fixture('ls_tree_a') - blob.return_value = 0 - - tree = self.repo.tree('master') - - assert_not_none(tree['README.txt']) - assert_equal('8b1e02c0fb554eed2ce2ef737a68bb369d7527df', tree['README.txt'].id) - - assert_true(git.called) - assert_equal(git.call_args, (('ls_tree', 'master'), {})) - - @patch_object(Git, '_call_process') - def test_dict_with_commits(self, git): - git.return_value = fixture('ls_tree_commit') - - tree = self.repo.tree('master') - - assert_none(tree.get('bar')) - assert_equal('2afb47bcedf21663580d5e6d2f406f08f3f65f19', tree['foo'].id) - assert_equal('f623ee576a09ca491c4a27e48c0dfe04be5f4a2e', tree['baz'].id) - - assert_true(git.called) - assert_equal(git.call_args, (('ls_tree', 'master'), {})) - - @patch_object(Git, '_call_process') - @raises(KeyError) - def test_dict_with_non_existant_file(self, git): - git.return_value = fixture('ls_tree_commit') - - tree = self.repo.tree('master') - tree['bar'] - def test_repr(self): tree = Tree(self.repo, id='abc') assert_equal('<git.Tree "abc">', repr(tree)) diff --git a/test/testlib/__init__.py b/test/testlib/__init__.py index 2133eb8c..f364171b 100644 --- a/test/testlib/__init__.py +++ b/test/testlib/__init__.py @@ -8,6 +8,7 @@ import inspect from mock import * from asserts import * from helper import * +from unittest import TestCase __all__ = [ name for name, obj in locals().items() if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/test/testlib/helper.py b/test/testlib/helper.py index 74f48447..b66d3eaa 100644 --- a/test/testlib/helper.py +++ b/test/testlib/helper.py @@ -17,3 +17,14 @@ def fixture(name): def absolute_project_path(): return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) + + +class ListProcessAdapter(object): + """Allows to use lists as Process object as returned by SubProcess.Popen. + Its tailored to work with the test system only""" + + def __init__(self, input_list_or_string): + l = input_list_or_string + if isinstance(l,basestring): + l = l.splitlines() + self.stdout = iter(l) |