diff options
Diffstat (limited to 'lib/git')
-rw-r--r-- | lib/git/cmd.py | 120 | ||||
-rw-r--r-- | lib/git/objects/base.py | 43 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 91 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 7 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 252 | ||||
-rw-r--r-- | lib/git/objects/utils.py (renamed from lib/git/objects/util.py) | 0 | ||||
-rw-r--r-- | lib/git/refs.py | 137 | ||||
-rw-r--r-- | lib/git/repo.py | 99 | ||||
-rw-r--r-- | lib/git/utils.py | 63 |
9 files changed, 564 insertions, 248 deletions
diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 940e35d1..2965eb8b 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -13,7 +13,7 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'with_raw_output') + 'with_exceptions', 'with_raw_output', 'as_process') extra = {} if sys.platform == 'win32': @@ -34,6 +34,34 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ + class AutoInterrupt(object): + """ + Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object + """ + __slots__= "proc" + + def __init__(self, proc ): + self.proc = proc + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid)) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + def __init__(self, git_dir=None): """ Initialize this instance with: @@ -44,6 +72,10 @@ class Git(object): """ super(Git, self).__init__() self.git_dir = git_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None def __getattr__(self, name): """ @@ -70,6 +102,7 @@ class Git(object): with_extended_output=False, with_exceptions=True, with_raw_output=False, + as_process=False ): """ Handles executing the command on the shell and consumes and returns @@ -96,6 +129,16 @@ class Git(object): ``with_raw_output`` Whether to avoid stripping off trailing whitespace. + + ``as_process`` + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output, + with_exceptions and with_raw_output ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. Returns:: @@ -127,7 +170,11 @@ class Git(object): **extra ) + if as_process: + return self.AutoInterrupt(proc) + # Wait for the process to return + status = 0 try: stdout_value = proc.stdout.read() stderr_value = proc.stderr.read() @@ -218,3 +265,74 @@ class Git(object): call.extend(args) return self.execute(call, **_kwargs) + + def _parse_object_header(self, header_line): + """ + ``header_line`` + <hex_sha> type_string size_as_int + + Returns + (hex_sha, type_string, size_as_int) + + Raises + ValueError if the header contains indication for an error due to incorrect + input sha + """ + tokens = header_line.split() + if len(tokens) != 3: + raise ValueError( "SHA named %s could not be resolved" % tokens[0] ) + + return (tokens[0], tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : subprocess.PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ + Use this method to quickly examine the type and size of the object behind + the given ref. + + NOTE + The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + Return: + (hexsha, type_string, size_as_int) + """ + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ + As get_object_header, but returns object data as well + + Return: + (hexsha, type_string, size_as_int,data_string) + """ + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + hexsha, typename, size = self.__get_object_header(cmd, ref) + data = cmd.stdout.read(size) + cmd.stdout.read(1) # finishing newlines + + return (hexsha, typename, size, data) diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 43aa8dd1..07538ada 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -4,32 +4,10 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os - -class LazyMixin(object): - lazy_properties = [] - __slots__ = tuple() +from git.utils import LazyMixin - def __getattr__(self, attr): - """ - Whenever an attribute is requested that we do not know, we allow it - to be created and set. Next time the same attribute is reqeusted, it is simply - returned from our dict/slots. - """ - self._set_cache_(attr) - # will raise in case the cache was not created - return object.__getattribute__(self, attr) +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" - def _set_cache_(self, attr): - """ This method should be overridden in the derived class. - It should check whether the attribute named by attr can be created - and cached. Do nothing if you do not know the attribute or call your subclass - - The derived class may create as many additional attributes as it deems - necessary in case a git command returns more information than represented - in the single attribute.""" - pass - - class Object(LazyMixin): """ Implements an Object which may be Blobs, Trees, Commits and Tags @@ -71,9 +49,13 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + hexsha, typename, self.size = self.repo.git.get_object_header(self.id) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) elif attr == "data": - self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + else: + super(Object,self)._set_cache_(attr) def __eq__(self, other): """ @@ -143,8 +125,15 @@ class IndexObject(Object): if isinstance(mode, basestring): self.mode = self._mode_str_to_int(mode) + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + @classmethod - def _mode_str_to_int( cls, modestr ): + def _mode_str_to_int(cls, modestr): """ ``modestr`` string like 755 or 644 or 100644 - only the last 3 chars will be used diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c3e97bf9..101014ab 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -6,14 +6,14 @@ import re import time - +from git.utils import Iterable from git.actor import Actor -from tree import Tree import git.diff as diff import git.stats as stats +from tree import Tree import base -class Commit(base.Object): +class Commit(base.Object, Iterable): """ Wraps a git Commit object. @@ -37,7 +37,7 @@ class Commit(base.Object): The parameter documentation indicates the type of the argument after a colon ':'. ``id`` - is the sha id of the commit + is the sha id of the commit or a ref ``parents`` : tuple( Commit, ... ) is a tuple of commit ids or actual Commits @@ -71,7 +71,7 @@ class Commit(base.Object): # END for each parent to convert if self.id and tree is not None: - self.tree = Tree(repo, id=tree) + self.tree = Tree(repo, id=tree, path='') # END id to tree conversion def _set_cache_(self, attr): @@ -80,8 +80,11 @@ class Commit(base.Object): to be set. We set all values at once. """ - if attr in self.__slots__: - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + if attr in Commit.__slots__: + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.id) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -120,7 +123,7 @@ class Commit(base.Object): return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) @classmethod - def find_all(cls, repo, ref, path='', **kwargs): + def iter_items(cls, repo, ref, path='', **kwargs): """ Find all commits matching the given criteria. @@ -128,7 +131,7 @@ class Commit(base.Object): is the Repo ``ref`` - is the ref from which to begin (SHA1 or name) + is the ref from which to begin (SHA1, Head or name) ``path`` is an optinal path, if set only Commits that include the path @@ -140,55 +143,67 @@ class Commit(base.Object): ``skip`` is the number of commits to skip Returns - git.Commit[] + iterator yielding Commit items """ - options = {'pretty': 'raw'} + options = {'pretty': 'raw', 'as_process' : True } options.update(kwargs) - output = repo.git.rev_list(ref, '--', path, **options) - return cls._list_from_string(repo, output) + # the test system might confront us with string values - + proc = repo.git.rev_list(ref, '--', path, **options) + return cls._iter_from_process_or_stream(repo, proc) @classmethod - def _list_from_string(cls, repo, text): + def _iter_from_process_or_stream(cls, repo, proc_or_stream): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``text`` - is the text output from the git-rev-list command (raw format) + ``proc`` + git-rev-list process instance (raw format) Returns - git.Commit[] + iterator returning Commit objects """ - lines =text.splitlines(False) - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + + for line in stream: + id = line.split()[1] + assert line.split()[0] == "commit" + tree = stream.next().split()[1] parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - # END while there are parent lines - author, authored_date = cls._actor(lines.pop(0)) - committer, committed_date = cls._actor(lines.pop(0)) + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = cls._actor(next_line) + committer, committed_date = cls._actor(stream.next()) - # free line - lines.pop(0) + # empty line + stream.next() message_lines = [] - while lines and not lines[0].startswith('commit'): - message_lines.append(lines.pop(0).strip()) + next_line = None + for msg_line in stream: + if not msg_line.startswith(' '): + break + # END abort message reading + message_lines.append(msg_line.strip()) # END while there are message lines - message = '\n'.join(message_lines[:-1]) # last line is empty - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - # END while lines - return commits + message = '\n'.join(message_lines) + + yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream @classmethod def diff(cls, repo, a, b=None, paths=None): diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index af1022f0..ecf6349d 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -8,7 +8,7 @@ Module containing all object based types. """ import base import commit -from util import get_object_type_by_name +from utils import get_object_type_by_name class TagObject(base.Object): """ @@ -48,9 +48,8 @@ class TagObject(base.Object): """ Cache all our attributes at once """ - if attr in self.__slots__: - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") + if attr in TagObject.__slots__: + lines = self.data.splitlines() obj, hexsha = lines[0].split(" ") # object <hexsha> type_token, type_name = lines[1].split(" ") # type <type_name> diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 273384a3..abfa9622 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -7,53 +7,125 @@ import os import blob import base +import binascii + +def sha_to_hex(sha): + """Takes a string and returns the hex of the sha within""" + hexsha = binascii.hexlify(sha) + assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha + return hexsha class Tree(base.IndexObject): + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ type = "tree" - __slots__ = "_contents" + __slots__ = "_cache" + + # using ascii codes for comparison + ascii_commit_id = (0x31 << 4) + 0x36 + ascii_blob_id = (0x31 << 4) + 0x30 + ascii_tree_id = (0x34 << 4) + 0x30 - def __init__(self, repo, id, mode=None, path=None): + + def __init__(self, repo, id, mode=0, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): - if attr == "_contents": - # Read the tree contents. - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content__from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache() else: super(Tree, self)._set_cache_(attr) - @staticmethod - def content__from_string(repo, text): + def _get_tree_cache(self): """ - Parse a content item and create the appropriate object - - ``repo`` - is the Repo - - ``text`` - is the single line containing the items data in `git ls-tree` format - + Return + list(object_instance, ...) + + ``treeish`` + sha or ref identifying a tree + """ + out = list() + for obj in self._iter_from_data(): + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + def _iter_from_data(self): + """ + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + Returns - ``git.Blob`` or ``git.Tree`` + list(IndexObject, ...) """ - try: - mode, typ, id, path = text.expandtabs(1).split(" ", 3) - except: - return None + ord_zero = ord('0') + data = self.data + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + mode_boundary = i + 6 + + # keep it ascii - we compare against the respective values + type_id = (ord(data[i])<<4) + ord(data[i+1]) + i += 2 + + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.ascii_blob_id: + yield blob.Blob(self.repo, hexsha, mode, name) + elif type_id == self.ascii_tree_id: + yield Tree(self.repo, hexsha, mode, name) + elif type_id == self.ascii_commit_id: + # todo + yield None + else: + raise TypeError( "Unknown type found in tree data: %i" % type_id ) + # END for each byte in data stream - if typ == "tree": - return Tree(repo, id, mode, path) - elif typ == "blob": - return blob.Blob(repo, id, mode, path) - elif typ == "commit": - return None - else: - raise(TypeError, "Invalid type: %s" % typ) def __div__(self, file): """ @@ -67,36 +139,104 @@ class Tree(base.IndexObject): <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> Returns - ``git.Blob`` or ``git.Tree`` or ``None`` if not found + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree """ - return self.get(file) + return self[file] def __repr__(self): return '<git.Tree "%s">' % self.id + + @classmethod + def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ): + + for obj in tree: + # adjust path to be complete + obj.path = os.path.join(tree.path, obj.path) + if not predicate(obj): + continue + yield obj + if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ): + for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ): + yield recursive_obj + # END for each recursive object + # END if we may enter recursion + # END for each object + + def traverse(self, max_depth=-1, predicate = lambda i: True): + """ + Returns + Iterator to traverse the tree recursively up to the given level. + The iterator returns Blob and Tree objects + + ``max_depth`` + + if -1, the whole tree will be traversed + if 0, only the first level will be traversed which is the same as + the default non-recursive iterator + + ``predicate`` + + If predicate(item) returns True, item will be returned by iterator + """ + return self._iter_recursive( self.repo, self, 0, max_depth, predicate ) + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] - # Implement the basics of the dict protocol: - # directories/trees can be seen as object dicts. - def __getitem__(self, key): - return self._contents[key] + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + def __iter__(self): - return iter(self._contents) - + return iter(self._cache) + def __len__(self): - return len(self._contents) - - def __contains__(self, key): - return key in self._contents - - def get(self, key): - return self._contents.get(key) - - def items(self): - return self._contents.items() - - def keys(self): - return self._contents.keys() - - def values(self): - return self._contents.values() + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + for obj in self._cache: + if obj.path == item: + return obj + # END for each obj + raise KeyError( "Blob or Tree named %s not found" % item ) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) diff --git a/lib/git/objects/util.py b/lib/git/objects/utils.py index 15c1d114..15c1d114 100644 --- a/lib/git/objects/util.py +++ b/lib/git/objects/utils.py diff --git a/lib/git/refs.py b/lib/git/refs.py index 820150d3..3c9eb817 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -7,17 +7,20 @@ Module containing all ref based objects """ from objects.base import Object -from objects.util import get_object_type_by_name +from objects.utils import get_object_type_by_name +from utils import LazyMixin, Iterable -class Ref(object): +class Ref(LazyMixin, Iterable): """ Represents a named reference to any object """ - __slots__ = ("path", "object") + __slots__ = ("repo", "path") - def __init__(self, path, object = None): + def __init__(self, repo, path, object = None): """ Initialize this instance + ``repo`` + Our parent repository ``path`` Path relative to the .git/ directory pointing to the ref in question, i.e. @@ -26,8 +29,10 @@ class Ref(object): ``object`` Object instance, will be retrieved on demand if None """ + self.repo = repo self.path = path - self.object = object + if object is not None: + self.object = object def __str__(self): return self.name @@ -57,9 +62,20 @@ class Ref(object): return self.path # could be refs/HEAD return '/'.join(tokens[2:]) - + + @property + def object(self): + """ + Returns + The object our ref currently refers to. Refs can be cached, they will + always point to the actual object as it gets re-created on each query + """ + # have to be dynamic here as we may be a tag which can point to anything + hexsha, typename, size = self.repo.git.get_object_header(self.path) + return get_object_type_by_name(typename)(self.repo, hexsha) + @classmethod - def find_all(cls, repo, common_path = "refs", **kwargs): + def iter_items(cls, repo, common_path = "refs", **kwargs): """ Find all refs in the repository @@ -88,54 +104,38 @@ class Ref(object): options.update(kwargs) output = repo.git.for_each_ref(common_path, **options) - return cls._list_from_string(repo, output) + return cls._iter_from_stream(repo, iter(output.splitlines())) @classmethod - def _list_from_string(cls, repo, text): - """ - Parse out ref information into a list of Ref compatible objects - - ``repo`` - is the Repo - ``text`` - is the text output from the git-for-each-ref command - - Returns - git.Ref[] - - list of Ref objects - """ + def _iter_from_stream(cls, repo, stream): + """ Parse out ref information into a list of Ref compatible objects + Returns git.Ref[] list of Ref objects """ heads = [] - for line in text.splitlines(): + for line in stream: heads.append(cls._from_string(repo, line)) return heads @classmethod def _from_string(cls, repo, line): - """ - Create a new Ref instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted ref information - - Format:: - + """ Create a new Ref instance from the given string. + Format name: [a-zA-Z_/]+ <null byte> id: [0-9A-Fa-f]{40} - - Returns - git.Head - """ + Returns git.Head """ full_path, hexsha, type_name, object_size = line.split("\x00") - obj = get_object_type_by_name(type_name)(repo, hexsha) - obj.size = object_size - return cls(full_path, obj) + + # No, we keep the object dynamic by allowing it to be retrieved by + # our path on demand - due to perstent commands it is fast. + # This reduces the risk that the object does not match + # the changed ref anymore in case it changes in the meanwhile + return cls(repo, full_path) + + # obj = get_object_type_by_name(type_name)(repo, hexsha) + # obj.size = object_size + # return cls(repo, full_path, obj) class Head(Ref): @@ -167,14 +167,14 @@ class Head(Ref): return self.object @classmethod - def find_all(cls, repo, common_path = "refs/heads", **kwargs): + def iter_items(cls, repo, common_path = "refs/heads", **kwargs): """ Returns - git.Head[] + Iterator yielding Head items - For more documentation, please refer to git.base.Ref.find_all + For more documentation, please refer to git.base.Ref.list_items """ - return super(Head,cls).find_all(repo, common_path, **kwargs) + return super(Head,cls).iter_items(repo, common_path, **kwargs) def __repr__(self): return '<git.Head "%s">' % self.name @@ -190,30 +190,13 @@ class TagRef(Ref): This tag object will always point to a commit object, but may carray additional information in a tag object:: - tagref = TagRef.find_all(repo)[0] + tagref = TagRef.list_items(repo)[0] print tagref.commit.message if tagref.tag is not None: print tagref.tag.message """ - __slots__ = "tag" - - def __init__(self, path, commit_or_tag): - """ - Initialize a newly instantiated Tag - - ``path`` - is the full path to the tag - - ``commit_or_tag`` - is the Commit or TagObject that this tag ref points to - """ - super(TagRef, self).__init__(path, commit_or_tag) - self.tag = None - - if commit_or_tag.type == "tag": - self.tag = commit_or_tag - # END tag object handling + __slots__ = tuple() @property def commit(self): @@ -223,18 +206,32 @@ class TagRef(Ref): """ if self.object.type == "commit": return self.object - # it is a tag object - return self.object.object + elif self.object.type == "tag": + # it is a tag object which carries the commit as an object - we can point to anything + return self.object.object + else: + raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) + + @property + def tag(self): + """ + Returns + Tag object this tag ref points to or None in case + we are a light weight tag + """ + if self.object.type == "tag": + return self.object + return None @classmethod - def find_all(cls, repo, common_path = "refs/tags", **kwargs): + def iter_items(cls, repo, common_path = "refs/tags", **kwargs): """ Returns - git.Tag[] + Iterator yielding commit items - For more documentation, please refer to git.base.Ref.find_all + For more documentation, please refer to git.base.Ref.list_items """ - return super(TagRef,cls).find_all(repo, common_path, **kwargs) + return super(TagRef,cls).iter_items(repo, common_path, **kwargs) # provide an alias diff --git a/lib/git/repo.py b/lib/git/repo.py index dd5acfc3..c74c7e8d 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -102,7 +102,7 @@ class Repo(object): Returns ``git.Head[]`` """ - return Head.find_all(self) + return Head.list_items(self) # alias heads branches = heads @@ -115,7 +115,7 @@ class Repo(object): Returns ``git.Tag[]`` """ - return Tag.find_all(self) + return Tag.list_items(self) def blame(self, commit, file): """ @@ -197,7 +197,7 @@ class Repo(object): # END distinguish hexsha vs other information return blames - def commits(self, start='master', path='', max_count=10, skip=0): + def commits(self, start='master', path='', max_count=None, skip=0): """ A list of Commit objects representing the history of a given ref/commit @@ -209,7 +209,7 @@ class Repo(object): Commits that do not contain that path will not be returned. ``max_count`` - is the maximum number of commits to return (default 10) + is the maximum number of commits to return (default None) ``skip`` is the number of commits to skip (default 0) which will effectively @@ -220,8 +220,11 @@ class Repo(object): """ options = {'max_count': max_count, 'skip': skip} - - return Commit.find_all(self, start, path, **options) + + if max_count is None: + options.pop('max_count') + + return Commit.list_items(self, start, path, **options) def commits_between(self, frm, to): """ @@ -237,7 +240,7 @@ class Repo(object): Returns ``git.Commit[]`` """ - return reversed(Commit.find_all(self, "%s..%s" % (frm, to))) + return reversed(Commit.list_items(self, "%s..%s" % (frm, to))) def commits_since(self, start='master', path='', since='1970-01-01'): """ @@ -259,7 +262,7 @@ class Repo(object): """ options = {'since': since} - return Commit.find_all(self, start, path, **options) + return Commit.list_items(self, start, path, **options) def commit_count(self, start='master', path=''): """ @@ -277,12 +280,14 @@ class Repo(object): """ return Commit.count(self, start, path) - def commit(self, id, path = ''): + def commit(self, id=None, path = ''): """ The Commit object for the specified id ``id`` - is the SHA1 identifier of the commit + is the SHA1 identifier of the commit or a ref or a ref name + if None, it defaults to the active branch + ``path`` is an optional path, if set the returned commit must contain the path. @@ -290,9 +295,11 @@ class Repo(object): Returns ``git.Commit`` """ + if id is None: + id = self.active_branch options = {'max_count': 1} - commits = Commit.find_all(self, id, path, **options) + commits = Commit.list_items(self, id, path, **options) if not commits: raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) @@ -309,55 +316,47 @@ class Repo(object): other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() diff_refs = list(set(other_repo_refs) - set(repo_refs)) - return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs) + return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs) - def tree(self, treeish='master'): + def tree(self, treeish=None): """ The Tree object for the given treeish reference ``treeish`` - is the reference (default 'master') + is a Ref instance defaulting to the active_branch if None. Examples:: - repo.tree('master') - + repo.tree(repo.heads[0]) Returns ``git.Tree`` + + NOTE + A ref is requried here to assure you point to a commit or tag. Otherwise + it is not garantueed that you point to the root-level tree. + + If you need a non-root level tree, find it by iterating the root tree. """ - return Tree(self, id=treeish) - - def blob(self, id): - """ - The Blob object for the given id - - ``id`` - is the SHA1 id of the blob - - Returns - ``git.Blob`` - """ - return Blob(self, id=id) - - def log(self, commit='master', path=None, **kwargs): - """ - The Commit for a treeish, and all commits leading to it. + if treeish is None: + treeish = self.active_branch + if not isinstance(treeish, Ref): + raise ValueError( "Treeish reference required, got %r" % treeish ) - ``kwargs`` - keyword arguments specifying flags to be used in git-log command, - i.e.: max_count=1 to limit the amount of commits returned + + # As we are directly reading object information, we must make sure + # we truly point to a tree object. We resolve the ref to a sha in all cases + # to assure the returned tree can be compared properly. Except for + # heads, ids should always be hexshas + hexsha, typename, size = self.git.get_object_header( treeish ) + if typename != "tree": + hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' ) + # END tree handling + treeish = hexsha + + # the root has an empty relative path and the default mode + return Tree(self, treeish, 0, '') - Returns - ``git.Commit[]`` - """ - options = {'pretty': 'raw'} - options.update(kwargs) - arg = [commit, '--'] - if path: - arg.append(path) - commits = self.git.log(*arg, **options) - return Commit._list_from_string(self, commits) def diff(self, a, b, *paths): """ @@ -588,13 +587,9 @@ class Repo(object): The name of the currently active branch. Returns - str (the branch name) + Head to the active branch """ - branch = self.git.symbolic_ref('HEAD').strip() - if branch.startswith('refs/heads/'): - branch = branch[len('refs/heads/'):] - - return branch + return Head( self, self.git.symbolic_ref('HEAD').strip() ) def __repr__(self): return '<git.Repo "%s">' % self.path diff --git a/lib/git/utils.py b/lib/git/utils.py index c204c432..f84c247d 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -24,3 +24,66 @@ def is_git_dir(d): (os.path.islink(headref) and os.readlink(headref).startswith('refs')) return False + + +class LazyMixin(object): + """ + Base class providing an interface to lazily retrieve attribute values upon + first access. If slots are used, memory will only be reserved once the attribute + is actually accessed and retrieved the first time. All future accesses will + return the cached value as stored in the Instance's dict or slot. + """ + __slots__ = tuple() + + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) + + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass + + +class Iterable(object): + """ + Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository + """ + __slots__ = tuple() + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + Note: Favor the iter_items method as it will + + Returns: + list(Item,...) list of item instances + """ + return list(cls.iter_items(repo, *args, **kwargs)) + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """ + For more information about the arguments, see list_items + Return: + iterator yielding Items + """ + raise NotImplementedError("To be implemented by Subclass") + |