diff options
Diffstat (limited to 'lib/git')
-rw-r--r-- | lib/git/cmd.py | 54 | ||||
-rw-r--r-- | lib/git/objects/base.py | 15 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 188 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 12 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 20 | ||||
-rw-r--r-- | lib/git/refs.py | 10 | ||||
-rw-r--r-- | lib/git/repo.py | 590 |
7 files changed, 437 insertions, 452 deletions
diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 2965eb8b..500fcd93 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -13,7 +13,8 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'with_raw_output', 'as_process') + 'with_exceptions', 'with_raw_output', 'as_process', + 'output_stream' ) extra = {} if sys.platform == 'win32': @@ -102,7 +103,8 @@ class Git(object): with_extended_output=False, with_exceptions=True, with_raw_output=False, - as_process=False + as_process=False, + output_stream=None ): """ Handles executing the command on the shell and consumes and returns @@ -130,16 +132,20 @@ class Git(object): ``with_raw_output`` Whether to avoid stripping off trailing whitespace. - ``as_process`` - Whether to return the created process instance directly from which - streams can be read on demand. This will render with_extended_output, - with_exceptions and with_raw_output ineffective - the caller will have - to deal with the details himself. - It is important to note that the process will be placed into an AutoInterrupt - wrapper that will interrupt the process once it goes out of scope. If you - use the command in iterators, you should pass the whole process instance - instead of a single stream. - + ``as_process`` + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output, + with_exceptions and with_raw_output ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. + ``output_stream`` + If set to a file-like object, data produced by the git command will be + output to the given stream directly. + Otherwise a new file will be opened. + Returns:: str(output) # extended_output = False (Default) @@ -160,13 +166,17 @@ class Git(object): cwd = os.getcwd() else: cwd=self.git_dir + + ostream = subprocess.PIPE + if output_stream is not None: + ostream = output_stream # Start the process proc = subprocess.Popen(command, cwd=cwd, stdin=istream, stderr=subprocess.PIPE, - stdout=subprocess.PIPE, + stdout=ostream, **extra ) @@ -223,6 +233,21 @@ class Git(object): args.append("--%s=%s" % (dashify(k), v)) return args + @classmethod + def __unpack_args(cls, arg_list): + if not isinstance(arg_list, (list,tuple)): + return [ str(arg_list) ] + + outlist = list() + for arg in arg_list: + if isinstance(arg_list, (list, tuple)): + outlist.extend(cls.__unpack_args( arg )) + # END recursion + else: + outlist.append(str(arg)) + # END for each arg + return outlist + def _call_process(self, method, *args, **kwargs): """ Run the given git command with the specified arguments and return @@ -258,7 +283,8 @@ class Git(object): # Prepare the argument list opt_args = self.transform_kwargs(**kwargs) - ext_args = map(str, args) + + ext_args = self.__unpack_args(args) args = opt_args + ext_args call = ["git", dashify(method)] diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 07538ada..3b48e066 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -5,17 +5,32 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os from git.utils import LazyMixin +import utils _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" class Object(LazyMixin): """ Implements an Object which may be Blobs, Trees, Commits and Tags + + This Object also serves as a constructor for instances of the correct type:: + + inst = Object(repo,id) """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass + def __new__(cls, repo, id, *args, **kwargs): + if cls is Object: + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = super(Object,cls).__new__(obj_type, repo, hexsha, *args, **kwargs) + inst.size = size + return inst + else: + return super(Object,cls).__new__(cls, repo, id, *args, **kwargs) + def __init__(self, repo, id): """ Initialize an object by identifying it by its id. All keyword arguments diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 101014ab..847f4dec 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -4,14 +4,12 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -import re -import time from git.utils import Iterable -from git.actor import Actor import git.diff as diff import git.stats as stats from tree import Tree import base +import utils class Commit(base.Object, Iterable): """ @@ -20,8 +18,6 @@ class Commit(base.Object, Iterable): This class will act lazily on some of its attributes and will query the value on demand only if it involves calling the git binary. """ - # precompiled regex - re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') # object configuration type = "commit" @@ -48,14 +44,16 @@ class Commit(base.Object, Iterable): ``author`` : Actor is the author string ( will be implicitly converted into an Actor object ) - ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) - is the authored DateTime + ``authored_date`` : int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format ``committer`` : Actor is the committer string - ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) - is the committed DateTime + ``committed_date`` : int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format ``message`` : string is the commit message @@ -102,45 +100,49 @@ class Commit(base.Object, Iterable): First line of the commit message. """ return self.message.split('\n', 1)[0] - + @classmethod - def count(cls, repo, ref, path=''): + def count(cls, repo, rev, paths='', **kwargs): """ - Count the number of commits reachable from this ref + Count the number of commits reachable from this revision ``repo`` is the Repo - ``ref`` - is the ref from which to begin (SHA1 or name) + ``rev`` + revision specifier, see git-rev-parse for viable options - ``path`` - is an optinal path + ``paths`` + is an optinal path or a list of paths restricting the return value + to commits actually containing the paths + ``kwargs`` + Additional options to be passed to git-rev-list Returns int """ - return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) + return len(repo.git.rev_list(rev, '--', paths, **kwargs).strip().splitlines()) @classmethod - def iter_items(cls, repo, ref, path='', **kwargs): + def iter_items(cls, repo, rev, paths='', **kwargs): """ Find all commits matching the given criteria. ``repo`` is the Repo - ``ref`` - is the ref from which to begin (SHA1, Head or name) + ``rev`` + revision specifier, see git-rev-parse for viable options - ``path`` - is an optinal path, if set only Commits that include the path - will be considered + ``paths`` + is an optinal path or list of paths, if set only Commits that include the path + or paths will be considered ``kwargs`` - optional keyword arguments to git where + optional keyword arguments to git rev-list where ``max_count`` is the maximum number of commits to fetch ``skip`` is the number of commits to skip + ``since`` all commits since i.e. '1970-01-01' Returns iterator yielding Commit items @@ -149,61 +151,30 @@ class Commit(base.Object, Iterable): options.update(kwargs) # the test system might confront us with string values - - proc = repo.git.rev_list(ref, '--', path, **options) + proc = repo.git.rev_list(rev, '--', paths, **options) return cls._iter_from_process_or_stream(repo, proc) - - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): - """ - Parse out commit information into a list of Commit objects - - ``repo`` - is the Repo - - ``proc`` - git-rev-list process instance (raw format) - - Returns - iterator returning Commit objects + + def iter_parents(self, paths='', **kwargs): """ - stream = proc_or_stream - if not hasattr(stream,'next'): - stream = proc_or_stream.stdout - - for line in stream: - id = line.split()[1] - assert line.split()[0] == "commit" - tree = stream.next().split()[1] - - parents = [] - next_line = None - for parent_line in stream: - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - parents.append(parent_line.split()[-1]) - # END for each parent line - - author, authored_date = cls._actor(next_line) - committer, committed_date = cls._actor(stream.next()) - - # empty line - stream.next() - - message_lines = [] - next_line = None - for msg_line in stream: - if not msg_line.startswith(' '): - break - # END abort message reading - message_lines.append(msg_line.strip()) - # END while there are message lines - message = '\n'.join(message_lines) + Iterate _all_ parents of this commit. + + ``paths`` + Optional path or list of paths limiting the Commits to those that + contain at least one of the paths + + ``kwargs`` + All arguments allowed by git-rev-list - yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message) - # END for each line in stream + Return: + Iterator yielding Commit objects which are parents of self + """ + # skip ourselves + skip = kwargs.get("skip", 1) + if skip == 0: # skip ourselves + skip = 1 + kwargs['skip'] = skip + + return self.iter_items( self.repo, self, paths, **kwargs ) @classmethod def diff(cls, repo, a, b=None, paths=None): @@ -279,6 +250,60 @@ class Commit(base.Object, Iterable): text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) return stats.Stats._list_from_string(self.repo, text) + @classmethod + def _iter_from_process_or_stream(cls, repo, proc_or_stream): + """ + Parse out commit information into a list of Commit objects + + ``repo`` + is the Repo + + ``proc`` + git-rev-list process instance (raw format) + + Returns + iterator returning Commit objects + """ + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + + for line in stream: + id = line.split()[1] + assert line.split()[0] == "commit" + tree = stream.next().split()[1] + + parents = [] + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = utils.parse_actor_and_date(next_line) + committer, committed_date = utils.parse_actor_and_date(stream.next()) + + # empty line + stream.next() + + message_lines = [] + next_line = None + for msg_line in stream: + if not msg_line.startswith(' '): + break + # END abort message reading + message_lines.append(msg_line.strip()) + # END while there are message lines + message = '\n'.join(message_lines) + + yield Commit(repo, id=id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream + + def __str__(self): """ Convert commit to string which is SHA1 """ return self.id @@ -286,14 +311,3 @@ class Commit(base.Object, Iterable): def __repr__(self): return '<git.Commit "%s">' % self.id - @classmethod - def _actor(cls, line): - """ - Parse out the actor (author or committer) info - - Returns - [Actor, gmtime(acted at time)] - """ - m = cls.re_actor_epoch.search(line) - actor, epoch = m.groups() - return (Actor._from_string(actor), time.gmtime(int(epoch))) diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index ecf6349d..f54d4b64 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -7,8 +7,7 @@ Module containing all object based types. """ import base -import commit -from utils import get_object_type_by_name +import utils class TagObject(base.Object): """ @@ -38,8 +37,9 @@ class TagObject(base.Object): ``tagger`` Actor identifying the tagger - ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) - is the DateTime of the tag creation + ``tagged_date`` : int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format """ super(TagObject, self).__init__(repo, id ) self._set_self_from_args_(locals()) @@ -53,12 +53,12 @@ class TagObject(base.Object): obj, hexsha = lines[0].split(" ") # object <hexsha> type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = get_object_type_by_name(type_name)(self.repo, hexsha) + self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha) self.tag = lines[2][4:] # tag <tag name> tagger_info = lines[3][7:]# tagger <actor> <date> - self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info) # line 4 empty - check git source to figure out purpose self.message = "\n".join(lines[5:]) diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index 15c1d114..367ed2b7 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -6,7 +6,8 @@ """ Module for general utility functions """ -import commit, tag, blob, tree +import re +from git.actor import Actor def get_object_type_by_name(object_type_name): """ @@ -34,3 +35,20 @@ def get_object_type_by_name(object_type_name): return tree.Tree else: raise ValueError("Cannot handle unknown object type: %s" % object_type_name) + + +# precompiled regex +_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') + +def parse_actor_and_date(line): + """ + Parse out the actor (author or committer) info from a line like:: + + author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 + + Returns + [Actor, int_seconds_since_epoch] + """ + m = _re_actor_epoch.search(line) + actor, epoch = m.groups() + return (Actor._from_string(actor), int(epoch)) diff --git a/lib/git/refs.py b/lib/git/refs.py index 3c9eb817..a4d7bbb1 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -10,7 +10,7 @@ from objects.base import Object from objects.utils import get_object_type_by_name from utils import LazyMixin, Iterable -class Ref(LazyMixin, Iterable): +class Reference(LazyMixin, Iterable): """ Represents a named reference to any object """ @@ -71,8 +71,8 @@ class Ref(LazyMixin, Iterable): always point to the actual object as it gets re-created on each query """ # have to be dynamic here as we may be a tag which can point to anything - hexsha, typename, size = self.repo.git.get_object_header(self.path) - return get_object_type_by_name(typename)(self.repo, hexsha) + # Our path will be resolved to the hexsha which will be used accordingly + return Object(self.repo, self.path) @classmethod def iter_items(cls, repo, common_path = "refs", **kwargs): @@ -138,7 +138,7 @@ class Ref(LazyMixin, Iterable): # return cls(repo, full_path, obj) -class Head(Ref): +class Head(Reference): """ A Head is a named reference to a Commit. Every Head instance contains a name and a Commit object. @@ -181,7 +181,7 @@ class Head(Ref): -class TagRef(Ref): +class TagRef(Reference): """ Class representing a lightweight tag reference which either points to a commit or to a tag object. In the latter case additional information, like the signature diff --git a/lib/git/repo.py b/lib/git/repo.py index c74c7e8d..554c10cb 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -8,7 +8,6 @@ import os import re import gzip import StringIO -import time from errors import InvalidGitRepositoryError, NoSuchPathError from utils import touch, is_git_dir @@ -117,212 +116,28 @@ class Repo(object): """ return Tag.list_items(self) - def blame(self, commit, file): + def commit(self, rev=None): """ - The blame information for the given file at the given commit + The Commit object for the specified revision - Returns - list: [git.Commit, list: [<line>]] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance. - """ - data = self.git.blame(commit, '--', file, p=True) - commits = {} - blames = [] - info = None - - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] - if self.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - digits = parts[-1].split(" ") - if len(digits) == 3: - info = {'id': firstpart} - blames.append([None, []]) - # END blame data initialization - else: - m = self.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail <tom@mojombo.com> - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail <tom@mojombo.com> - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = time.gmtime(int(parts[-1])) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # <and rest> - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( self, id=sha, - author=Actor._from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - def commits(self, start='master', path='', max_count=None, skip=0): - """ - A list of Commit objects representing the history of a given ref/commit - - ``start`` - is the branch/commit name (default 'master') - - ``path`` - is an optional path to limit the returned commits to - Commits that do not contain that path will not be returned. - - ``max_count`` - is the maximum number of commits to return (default None) - - ``skip`` - is the number of commits to skip (default 0) which will effectively - move your commit-window by the given number. - - Returns - ``git.Commit[]`` - """ - options = {'max_count': max_count, - 'skip': skip} - - if max_count is None: - options.pop('max_count') - - return Commit.list_items(self, start, path, **options) - - def commits_between(self, frm, to): - """ - The Commits objects that are reachable via ``to`` but not via ``frm`` - Commits are returned in chronological order. - - ``from`` - is the branch/commit name of the younger item - - ``to`` - is the branch/commit name of the older item - - Returns - ``git.Commit[]`` - """ - return reversed(Commit.list_items(self, "%s..%s" % (frm, to))) - - def commits_since(self, start='master', path='', since='1970-01-01'): - """ - The Commits objects that are newer than the specified date. - Commits are returned in chronological order. - - ``start`` - is the branch/commit name (default 'master') - - ``path`` - is an optinal path to limit the returned commits to. - - - ``since`` - is a string represeting a date/time - - Returns - ``git.Commit[]`` - """ - options = {'since': since} - - return Commit.list_items(self, start, path, **options) - - def commit_count(self, start='master', path=''): - """ - The number of commits reachable by the given branch/commit - - ``start`` - is the branch/commit name (default 'master') - - ``path`` - is an optional path - Commits that do not contain the path will not contribute to the count. - - Returns - ``int`` - """ - return Commit.count(self, start, path) - - def commit(self, id=None, path = ''): - """ - The Commit object for the specified id - - ``id`` - is the SHA1 identifier of the commit or a ref or a ref name - if None, it defaults to the active branch + ``rev`` + revision specifier, see git-rev-parse for viable options. - - ``path`` - is an optional path, if set the returned commit must contain the path. - Returns ``git.Commit`` """ - if id is None: - id = self.active_branch - options = {'max_count': 1} - - commits = Commit.list_items(self, id, path, **options) - - if not commits: - raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) - return commits[0] - - def commit_deltas_from(self, other_repo, ref='master', other_ref='master'): - """ - Returns a list of commits that is in ``other_repo`` but not in self - - Returns - git.Commit[] - """ - repo_refs = self.git.rev_list(ref, '--').strip().splitlines() - other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() - - diff_refs = list(set(other_repo_refs) - set(repo_refs)) - return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs) + if rev is None: + rev = self.active_branch + + c = Object(self, rev) + assert c.type == "commit", "Revision %s did not point to a commit, but to %s" % (rev, c) + return c - def tree(self, treeish=None): + def tree(self, ref=None): """ The Tree object for the given treeish reference - ``treeish`` + ``ref`` is a Ref instance defaulting to the active_branch if None. Examples:: @@ -336,166 +151,56 @@ class Repo(object): A ref is requried here to assure you point to a commit or tag. Otherwise it is not garantueed that you point to the root-level tree. - If you need a non-root level tree, find it by iterating the root tree. - """ - if treeish is None: - treeish = self.active_branch - if not isinstance(treeish, Ref): - raise ValueError( "Treeish reference required, got %r" % treeish ) + If you need a non-root level tree, find it by iterating the root tree. Otherwise + it cannot know about its path relative to the repository root and subsequent + operations might have unexpected results. + """ + if ref is None: + ref = self.active_branch + if not isinstance(ref, Reference): + raise ValueError( "Reference required, got %r" % ref ) # As we are directly reading object information, we must make sure # we truly point to a tree object. We resolve the ref to a sha in all cases # to assure the returned tree can be compared properly. Except for # heads, ids should always be hexshas - hexsha, typename, size = self.git.get_object_header( treeish ) + hexsha, typename, size = self.git.get_object_header( ref ) if typename != "tree": - hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' ) + # will raise if this is not a valid tree + hexsha, typename, size = self.git.get_object_header( str(ref)+'^{tree}' ) # END tree handling - treeish = hexsha + ref = hexsha # the root has an empty relative path and the default mode - return Tree(self, treeish, 0, '') - - - def diff(self, a, b, *paths): - """ - The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) - - ``a`` - is the base commit - ``b`` - is the other commit - - ``paths`` - is an optional list of file paths on which to restrict the diff - - Returns - ``str`` - """ - return self.git.diff(a, b, '--', *paths) - - def commit_diff(self, commit): - """ - The commit diff for the given commit - ``commit`` is the commit name/id - - Returns - ``git.Diff[]`` - """ - return Commit.diff(self, commit) - - @classmethod - def init_bare(self, path, mkdir=True, **kwargs): - """ - Initialize a bare git repository at the given path - - ``path`` - is the full path to the repo (traditionally ends with /<name>.git) - - ``mkdir`` - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - - ``kwargs`` - keyword arguments serving as additional options to the git init command - - Examples:: - - git.Repo.init_bare('/var/git/myrepo.git') - - Returns - ``git.Repo`` (the newly created repo) - """ - - if mkdir and not os.path.exists(path): - os.makedirs(path, 0755) - - git = Git(path) - output = git.init('--bare', **kwargs) - return Repo(path) - create = init_bare - - def fork_bare(self, path, **kwargs): - """ - Fork a bare git repository from this repo - - ``path`` - is the full path of the new repo (traditionally ends with /<name>.git) - - ``kwargs`` - keyword arguments to be given to the git clone command - - Returns - ``git.Repo`` (the newly forked repo) - """ - options = {'bare': True} - options.update(kwargs) - self.git.clone(self.path, path, **options) - return Repo(path) - - def archive_tar(self, treeish='master', prefix=None): - """ - Archive the given treeish - - ``treeish`` - is the treeish name/id (default 'master') + return Tree(self, ref, 0, '') - ``prefix`` - is the optional prefix to prepend to each filename in the archive - - Examples:: - - >>> repo.archive_tar - <String containing tar archive> - - >>> repo.archive_tar('a87ff14') - <String containing tar archive for commit a87ff14> - - >>> repo.archive_tar('master', 'myproject/') - <String containing tar bytes archive, whose files are prefixed with 'myproject/'> - - Returns - str (containing bytes of tar archive) - """ - options = {} - if prefix: - options['prefix'] = prefix - return self.git.archive(treeish, **options) - - def archive_tar_gz(self, treeish='master', prefix=None): + def iter_commits(self, rev=None, paths='', **kwargs): """ - Archive and gzip the given treeish - - ``treeish`` - is the treeish name/id (default 'master') - - ``prefix`` - is the optional prefix to prepend to each filename in the archive - - Examples:: + A list of Commit objects representing the history of a given ref/commit - >>> repo.archive_tar_gz - <String containing tar.gz archive> + ``rev`` + revision specifier, see git-rev-parse for viable options. + If None, the active branch will be used. - >>> repo.archive_tar_gz('a87ff14') - <String containing tar.gz archive for commit a87ff14> + ``paths`` + is an optional path or a list of paths to limit the returned commits to + Commits that do not contain that path or the paths will not be returned. + + ``kwargs`` + Arguments to be passed to git-rev-parse - common ones are + max_count and skip - >>> repo.archive_tar_gz('master', 'myproject/') - <String containing tar.gz archive and prefixed with 'myproject/'> + Note: to receive only commits between two named revisions, use the + "revA..revB" revision specifier Returns - str (containing the bytes of tar.gz archive) + ``git.Commit[]`` """ - kwargs = {} - if prefix: - kwargs['prefix'] = prefix - resultstr = self.git.archive(treeish, **kwargs) - sio = StringIO.StringIO() - gf = gzip.GzipFile(fileobj=sio, mode ='wb') - gf.write(resultstr) - gf.close() - return sio.getvalue() + if rev is None: + rev = self.active_branch + + return Commit.iter_items(self, rev, paths, **kwargs) def _get_daemon_export(self): filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) @@ -590,6 +295,213 @@ class Repo(object): Head to the active branch """ return Head( self, self.git.symbolic_ref('HEAD').strip() ) + + + def diff(self, a, b, *paths): + """ + The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) + + ``a`` + is the base commit + ``b`` + is the other commit + + ``paths`` + is an optional list of file paths on which to restrict the diff + + Returns + ``str`` + """ + return self.git.diff(a, b, '--', *paths) + + def commit_diff(self, commit): + """ + The commit diff for the given commit + ``commit`` is the commit name/id + + Returns + ``git.Diff[]`` + """ + return Commit.diff(self, commit) + + def blame(self, rev, file): + """ + The blame information for the given file at the given revision. + + ``rev`` + revision specifier, see git-rev-parse for viable options. + + Returns + list: [git.Commit, list: [<line>]] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance. + """ + data = self.git.blame(rev, '--', file, p=True) + commits = {} + blames = [] + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail <tom@mojombo.com> + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail <tom@mojombo.com> + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = int(parts[-1]) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # <and rest> + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( self, id=sha, + author=Actor._from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + @classmethod + def init(cls, path=None, mkdir=True, **kwargs): + """ + Initialize a git repository at the given path if specified + + ``path`` + is the full path to the repo (traditionally ends with /<name>.git) + or None in which case the repository will be created in the current + working directory + + ``mkdir`` + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. + Only effective if a path is explicitly given + + ``kwargs`` + keyword arguments serving as additional options to the git-init command + + Examples:: + + git.Repo.init('/var/git/myrepo.git',bare=True) + + Returns + ``git.Repo`` (the newly created repo) + """ + + if mkdir and path and not os.path.exists(path): + os.makedirs(path, 0755) + + git = Git(path) + output = git.init(path, **kwargs) + return Repo(path) + + def clone(self, path, **kwargs): + """ + Create a clone from this repository. + + ``path`` + is the full path of the new repo (traditionally ends with /<name>.git) + + ``kwargs`` + keyword arguments to be given to the git-clone command + + Returns + ``git.Repo`` (the newly cloned repo) + """ + self.git.clone(self.path, path, **kwargs) + return Repo(path) + + + def archive(self, ostream, treeish=None, prefix=None, **kwargs): + """ + Archive the tree at the given revision. + ``ostream`` + file compatible stream object to which the archive will be written + + ``treeish`` + is the treeish name/id, defaults to active branch + + ``prefix`` + is the optional prefix to prepend to each filename in the archive + + ``kwargs`` + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + + Examples:: + + >>> repo.archive(open("archive" + <String containing tar.gz archive> + + >>> repo.archive_tar_gz('a87ff14') + <String containing tar.gz archive for commit a87ff14> + + >>> repo.archive_tar_gz('master', 'myproject/') + <String containing tar.gz archive and prefixed with 'myproject/'> + + Raise + GitCommandError in case something went wrong + + """ + if treeish is None: + treeish = self.active_branch + if prefix and 'prefix' not in kwargs: + kwargs['prefix'] = prefix + kwargs['as_process'] = True + kwargs['output_stream'] = ostream + + proc = self.git.archive(treeish, **kwargs) + status = proc.wait() + if status != 0: + raise GitCommandError( "git-archive", status, proc.stderr.read() ) + + def __repr__(self): return '<git.Repo "%s">' % self.path |