summaryrefslogtreecommitdiff
path: root/lib/git
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2009-10-16 11:48:20 +0200
committerSebastian Thiel <byronimo@gmail.com>2009-10-16 11:48:20 +0200
commit05d2687afcc78cd192714ee3d71fdf36a37d110f (patch)
tree3e3760e5b46095458cf75446330ba2fc25fa23e5 /lib/git
parent6226720b0e6a5f7cb9223fc50363def487831315 (diff)
parentf2df1f56cccab13d5c92abbc6b18be725e7b4833 (diff)
downloadgitpython-05d2687afcc78cd192714ee3d71fdf36a37d110f.tar.gz
Merging latest improvements including a revamped Repo interface before more changes are to be done on the way diffing is handled
Merge branch 'improvements' * improvements: Improved archive function by allowing it to directly write to an output stream - previously it would cache everything to memory and try to provide zipping functionality itself repo: made init and clone methods less specific, previously they wanted to do it 'barely' only. New method names closely follow the default git command names repo.commit_delta_base: removed Object can now create objects of the proper type in case one attempts to create an object directly - this feature is used in several places now, allowing for additional type-checking repo: removed commits_between but added a note about how this can be achieved using the iter_commits method; reorganized methods within the type as a start for more interface changes Added Commit.iter_parents to iterate all parents repo: removed a few methods because of redundancy or because it will be obsolete once the interface overhaul is finished. This commit is just intermediate All times are not stored as time_struct, but as simple int to consume less memory
Diffstat (limited to 'lib/git')
-rw-r--r--lib/git/cmd.py54
-rw-r--r--lib/git/objects/base.py15
-rw-r--r--lib/git/objects/commit.py188
-rw-r--r--lib/git/objects/tag.py12
-rw-r--r--lib/git/objects/utils.py20
-rw-r--r--lib/git/refs.py10
-rw-r--r--lib/git/repo.py590
7 files changed, 437 insertions, 452 deletions
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index 2965eb8b..500fcd93 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -13,7 +13,8 @@ from errors import GitCommandError
GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
- 'with_exceptions', 'with_raw_output', 'as_process')
+ 'with_exceptions', 'with_raw_output', 'as_process',
+ 'output_stream' )
extra = {}
if sys.platform == 'win32':
@@ -102,7 +103,8 @@ class Git(object):
with_extended_output=False,
with_exceptions=True,
with_raw_output=False,
- as_process=False
+ as_process=False,
+ output_stream=None
):
"""
Handles executing the command on the shell and consumes and returns
@@ -130,16 +132,20 @@ class Git(object):
``with_raw_output``
Whether to avoid stripping off trailing whitespace.
- ``as_process``
- Whether to return the created process instance directly from which
- streams can be read on demand. This will render with_extended_output,
- with_exceptions and with_raw_output ineffective - the caller will have
- to deal with the details himself.
- It is important to note that the process will be placed into an AutoInterrupt
- wrapper that will interrupt the process once it goes out of scope. If you
- use the command in iterators, you should pass the whole process instance
- instead of a single stream.
-
+ ``as_process``
+ Whether to return the created process instance directly from which
+ streams can be read on demand. This will render with_extended_output,
+ with_exceptions and with_raw_output ineffective - the caller will have
+ to deal with the details himself.
+ It is important to note that the process will be placed into an AutoInterrupt
+ wrapper that will interrupt the process once it goes out of scope. If you
+ use the command in iterators, you should pass the whole process instance
+ instead of a single stream.
+ ``output_stream``
+ If set to a file-like object, data produced by the git command will be
+ output to the given stream directly.
+ Otherwise a new file will be opened.
+
Returns::
str(output) # extended_output = False (Default)
@@ -160,13 +166,17 @@ class Git(object):
cwd = os.getcwd()
else:
cwd=self.git_dir
+
+ ostream = subprocess.PIPE
+ if output_stream is not None:
+ ostream = output_stream
# Start the process
proc = subprocess.Popen(command,
cwd=cwd,
stdin=istream,
stderr=subprocess.PIPE,
- stdout=subprocess.PIPE,
+ stdout=ostream,
**extra
)
@@ -223,6 +233,21 @@ class Git(object):
args.append("--%s=%s" % (dashify(k), v))
return args
+ @classmethod
+ def __unpack_args(cls, arg_list):
+ if not isinstance(arg_list, (list,tuple)):
+ return [ str(arg_list) ]
+
+ outlist = list()
+ for arg in arg_list:
+ if isinstance(arg_list, (list, tuple)):
+ outlist.extend(cls.__unpack_args( arg ))
+ # END recursion
+ else:
+ outlist.append(str(arg))
+ # END for each arg
+ return outlist
+
def _call_process(self, method, *args, **kwargs):
"""
Run the given git command with the specified arguments and return
@@ -258,7 +283,8 @@ class Git(object):
# Prepare the argument list
opt_args = self.transform_kwargs(**kwargs)
- ext_args = map(str, args)
+
+ ext_args = self.__unpack_args(args)
args = opt_args + ext_args
call = ["git", dashify(method)]
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 07538ada..3b48e066 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -5,17 +5,32 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os
from git.utils import LazyMixin
+import utils
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
class Object(LazyMixin):
"""
Implements an Object which may be Blobs, Trees, Commits and Tags
+
+ This Object also serves as a constructor for instances of the correct type::
+
+ inst = Object(repo,id)
"""
TYPES = ("blob", "tree", "commit", "tag")
__slots__ = ("repo", "id", "size", "data" )
type = None # to be set by subclass
+ def __new__(cls, repo, id, *args, **kwargs):
+ if cls is Object:
+ hexsha, typename, size = repo.git.get_object_header(id)
+ obj_type = utils.get_object_type_by_name(typename)
+ inst = super(Object,cls).__new__(obj_type, repo, hexsha, *args, **kwargs)
+ inst.size = size
+ return inst
+ else:
+ return super(Object,cls).__new__(cls, repo, id, *args, **kwargs)
+
def __init__(self, repo, id):
"""
Initialize an object by identifying it by its id. All keyword arguments
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 101014ab..847f4dec 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -4,14 +4,12 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import re
-import time
from git.utils import Iterable
-from git.actor import Actor
import git.diff as diff
import git.stats as stats
from tree import Tree
import base
+import utils
class Commit(base.Object, Iterable):
"""
@@ -20,8 +18,6 @@ class Commit(base.Object, Iterable):
This class will act lazily on some of its attributes and will query the
value on demand only if it involves calling the git binary.
"""
- # precompiled regex
- re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$')
# object configuration
type = "commit"
@@ -48,14 +44,16 @@ class Commit(base.Object, Iterable):
``author`` : Actor
is the author string ( will be implicitly converted into an Actor object )
- ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst )
- is the authored DateTime
+ ``authored_date`` : int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
``committer`` : Actor
is the committer string
- ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst)
- is the committed DateTime
+ ``committed_date`` : int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
``message`` : string
is the commit message
@@ -102,45 +100,49 @@ class Commit(base.Object, Iterable):
First line of the commit message.
"""
return self.message.split('\n', 1)[0]
-
+
@classmethod
- def count(cls, repo, ref, path=''):
+ def count(cls, repo, rev, paths='', **kwargs):
"""
- Count the number of commits reachable from this ref
+ Count the number of commits reachable from this revision
``repo``
is the Repo
- ``ref``
- is the ref from which to begin (SHA1 or name)
+ ``rev``
+ revision specifier, see git-rev-parse for viable options
- ``path``
- is an optinal path
+ ``paths``
+ is an optinal path or a list of paths restricting the return value
+ to commits actually containing the paths
+ ``kwargs``
+ Additional options to be passed to git-rev-list
Returns
int
"""
- return len(repo.git.rev_list(ref, '--', path).strip().splitlines())
+ return len(repo.git.rev_list(rev, '--', paths, **kwargs).strip().splitlines())
@classmethod
- def iter_items(cls, repo, ref, path='', **kwargs):
+ def iter_items(cls, repo, rev, paths='', **kwargs):
"""
Find all commits matching the given criteria.
``repo``
is the Repo
- ``ref``
- is the ref from which to begin (SHA1, Head or name)
+ ``rev``
+ revision specifier, see git-rev-parse for viable options
- ``path``
- is an optinal path, if set only Commits that include the path
- will be considered
+ ``paths``
+ is an optinal path or list of paths, if set only Commits that include the path
+ or paths will be considered
``kwargs``
- optional keyword arguments to git where
+ optional keyword arguments to git rev-list where
``max_count`` is the maximum number of commits to fetch
``skip`` is the number of commits to skip
+ ``since`` all commits since i.e. '1970-01-01'
Returns
iterator yielding Commit items
@@ -149,61 +151,30 @@ class Commit(base.Object, Iterable):
options.update(kwargs)
# the test system might confront us with string values -
- proc = repo.git.rev_list(ref, '--', path, **options)
+ proc = repo.git.rev_list(rev, '--', paths, **options)
return cls._iter_from_process_or_stream(repo, proc)
-
- @classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream):
- """
- Parse out commit information into a list of Commit objects
-
- ``repo``
- is the Repo
-
- ``proc``
- git-rev-list process instance (raw format)
-
- Returns
- iterator returning Commit objects
+
+ def iter_parents(self, paths='', **kwargs):
"""
- stream = proc_or_stream
- if not hasattr(stream,'next'):
- stream = proc_or_stream.stdout
-
- for line in stream:
- id = line.split()[1]
- assert line.split()[0] == "commit"
- tree = stream.next().split()[1]
-
- parents = []
- next_line = None
- for parent_line in stream:
- if not parent_line.startswith('parent'):
- next_line = parent_line
- break
- # END abort reading parents
- parents.append(parent_line.split()[-1])
- # END for each parent line
-
- author, authored_date = cls._actor(next_line)
- committer, committed_date = cls._actor(stream.next())
-
- # empty line
- stream.next()
-
- message_lines = []
- next_line = None
- for msg_line in stream:
- if not msg_line.startswith(' '):
- break
- # END abort message reading
- message_lines.append(msg_line.strip())
- # END while there are message lines
- message = '\n'.join(message_lines)
+ Iterate _all_ parents of this commit.
+
+ ``paths``
+ Optional path or list of paths limiting the Commits to those that
+ contain at least one of the paths
+
+ ``kwargs``
+ All arguments allowed by git-rev-list
- yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
- committer=committer, committed_date=committed_date, message=message)
- # END for each line in stream
+ Return:
+ Iterator yielding Commit objects which are parents of self
+ """
+ # skip ourselves
+ skip = kwargs.get("skip", 1)
+ if skip == 0: # skip ourselves
+ skip = 1
+ kwargs['skip'] = skip
+
+ return self.iter_items( self.repo, self, paths, **kwargs )
@classmethod
def diff(cls, repo, a, b=None, paths=None):
@@ -279,6 +250,60 @@ class Commit(base.Object, Iterable):
text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True)
return stats.Stats._list_from_string(self.repo, text)
+ @classmethod
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ """
+ Parse out commit information into a list of Commit objects
+
+ ``repo``
+ is the Repo
+
+ ``proc``
+ git-rev-list process instance (raw format)
+
+ Returns
+ iterator returning Commit objects
+ """
+ stream = proc_or_stream
+ if not hasattr(stream,'next'):
+ stream = proc_or_stream.stdout
+
+ for line in stream:
+ id = line.split()[1]
+ assert line.split()[0] == "commit"
+ tree = stream.next().split()[1]
+
+ parents = []
+ next_line = None
+ for parent_line in stream:
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ parents.append(parent_line.split()[-1])
+ # END for each parent line
+
+ author, authored_date = utils.parse_actor_and_date(next_line)
+ committer, committed_date = utils.parse_actor_and_date(stream.next())
+
+ # empty line
+ stream.next()
+
+ message_lines = []
+ next_line = None
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ break
+ # END abort message reading
+ message_lines.append(msg_line.strip())
+ # END while there are message lines
+ message = '\n'.join(message_lines)
+
+ yield Commit(repo, id=id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date,
+ committer=committer, committed_date=committed_date, message=message)
+ # END for each line in stream
+
+
def __str__(self):
""" Convert commit to string which is SHA1 """
return self.id
@@ -286,14 +311,3 @@ class Commit(base.Object, Iterable):
def __repr__(self):
return '<git.Commit "%s">' % self.id
- @classmethod
- def _actor(cls, line):
- """
- Parse out the actor (author or committer) info
-
- Returns
- [Actor, gmtime(acted at time)]
- """
- m = cls.re_actor_epoch.search(line)
- actor, epoch = m.groups()
- return (Actor._from_string(actor), time.gmtime(int(epoch)))
diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py
index ecf6349d..f54d4b64 100644
--- a/lib/git/objects/tag.py
+++ b/lib/git/objects/tag.py
@@ -7,8 +7,7 @@
Module containing all object based types.
"""
import base
-import commit
-from utils import get_object_type_by_name
+import utils
class TagObject(base.Object):
"""
@@ -38,8 +37,9 @@ class TagObject(base.Object):
``tagger``
Actor identifying the tagger
- ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst)
- is the DateTime of the tag creation
+ ``tagged_date`` : int_seconds_since_epoch
+ is the DateTime of the tag creation - use time.gmtime to convert
+ it into a different format
"""
super(TagObject, self).__init__(repo, id )
self._set_self_from_args_(locals())
@@ -53,12 +53,12 @@ class TagObject(base.Object):
obj, hexsha = lines[0].split(" ") # object <hexsha>
type_token, type_name = lines[1].split(" ") # type <type_name>
- self.object = get_object_type_by_name(type_name)(self.repo, hexsha)
+ self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha)
self.tag = lines[2][4:] # tag <tag name>
tagger_info = lines[3][7:]# tagger <actor> <date>
- self.tagger, self.tagged_date = commit.Commit._actor(tagger_info)
+ self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info)
# line 4 empty - check git source to figure out purpose
self.message = "\n".join(lines[5:])
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 15c1d114..367ed2b7 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -6,7 +6,8 @@
"""
Module for general utility functions
"""
-import commit, tag, blob, tree
+import re
+from git.actor import Actor
def get_object_type_by_name(object_type_name):
"""
@@ -34,3 +35,20 @@ def get_object_type_by_name(object_type_name):
return tree.Tree
else:
raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+
+
+# precompiled regex
+_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$')
+
+def parse_actor_and_date(line):
+ """
+ Parse out the actor (author or committer) info from a line like::
+
+ author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+
+ Returns
+ [Actor, int_seconds_since_epoch]
+ """
+ m = _re_actor_epoch.search(line)
+ actor, epoch = m.groups()
+ return (Actor._from_string(actor), int(epoch))
diff --git a/lib/git/refs.py b/lib/git/refs.py
index 3c9eb817..a4d7bbb1 100644
--- a/lib/git/refs.py
+++ b/lib/git/refs.py
@@ -10,7 +10,7 @@ from objects.base import Object
from objects.utils import get_object_type_by_name
from utils import LazyMixin, Iterable
-class Ref(LazyMixin, Iterable):
+class Reference(LazyMixin, Iterable):
"""
Represents a named reference to any object
"""
@@ -71,8 +71,8 @@ class Ref(LazyMixin, Iterable):
always point to the actual object as it gets re-created on each query
"""
# have to be dynamic here as we may be a tag which can point to anything
- hexsha, typename, size = self.repo.git.get_object_header(self.path)
- return get_object_type_by_name(typename)(self.repo, hexsha)
+ # Our path will be resolved to the hexsha which will be used accordingly
+ return Object(self.repo, self.path)
@classmethod
def iter_items(cls, repo, common_path = "refs", **kwargs):
@@ -138,7 +138,7 @@ class Ref(LazyMixin, Iterable):
# return cls(repo, full_path, obj)
-class Head(Ref):
+class Head(Reference):
"""
A Head is a named reference to a Commit. Every Head instance contains a name
and a Commit object.
@@ -181,7 +181,7 @@ class Head(Ref):
-class TagRef(Ref):
+class TagRef(Reference):
"""
Class representing a lightweight tag reference which either points to a commit
or to a tag object. In the latter case additional information, like the signature
diff --git a/lib/git/repo.py b/lib/git/repo.py
index c74c7e8d..554c10cb 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -8,7 +8,6 @@ import os
import re
import gzip
import StringIO
-import time
from errors import InvalidGitRepositoryError, NoSuchPathError
from utils import touch, is_git_dir
@@ -117,212 +116,28 @@ class Repo(object):
"""
return Tag.list_items(self)
- def blame(self, commit, file):
+ def commit(self, rev=None):
"""
- The blame information for the given file at the given commit
+ The Commit object for the specified revision
- Returns
- list: [git.Commit, list: [<line>]]
- A list of tuples associating a Commit object with a list of lines that
- changed within the given commit. The Commit objects will be given in order
- of appearance.
- """
- data = self.git.blame(commit, '--', file, p=True)
- commits = {}
- blames = []
- info = None
-
- for line in data.splitlines(False):
- parts = self.re_whitespace.split(line, 1)
- firstpart = parts[0]
- if self.re_hexsha_only.search(firstpart):
- # handles
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
- digits = parts[-1].split(" ")
- if len(digits) == 3:
- info = {'id': firstpart}
- blames.append([None, []])
- # END blame data initialization
- else:
- m = self.re_author_committer_start.search(firstpart)
- if m:
- # handles:
- # author Tom Preston-Werner
- # author-mail <tom@mojombo.com>
- # author-time 1192271832
- # author-tz -0700
- # committer Tom Preston-Werner
- # committer-mail <tom@mojombo.com>
- # committer-time 1192271832
- # committer-tz -0700 - IGNORED BY US
- role = m.group(0)
- if firstpart.endswith('-mail'):
- info["%s_email" % role] = parts[-1]
- elif firstpart.endswith('-time'):
- info["%s_date" % role] = time.gmtime(int(parts[-1]))
- elif role == firstpart:
- info[role] = parts[-1]
- # END distinguish mail,time,name
- else:
- # handle
- # filename lib/grit.rb
- # summary add Blob
- # <and rest>
- if firstpart.startswith('filename'):
- info['filename'] = parts[-1]
- elif firstpart.startswith('summary'):
- info['summary'] = parts[-1]
- elif firstpart == '':
- if info:
- sha = info['id']
- c = commits.get(sha)
- if c is None:
- c = Commit( self, id=sha,
- author=Actor._from_string(info['author'] + ' ' + info['author_email']),
- authored_date=info['author_date'],
- committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
- committed_date=info['committer_date'],
- message=info['summary'])
- commits[sha] = c
- # END if commit objects needs initial creation
- m = self.re_tab_full_line.search(line)
- text, = m.groups()
- blames[-1][0] = c
- blames[-1][1].append( text )
- info = None
- # END if we collected commit info
- # END distinguish filename,summary,rest
- # END distinguish author|committer vs filename,summary,rest
- # END distinguish hexsha vs other information
- return blames
-
- def commits(self, start='master', path='', max_count=None, skip=0):
- """
- A list of Commit objects representing the history of a given ref/commit
-
- ``start``
- is the branch/commit name (default 'master')
-
- ``path``
- is an optional path to limit the returned commits to
- Commits that do not contain that path will not be returned.
-
- ``max_count``
- is the maximum number of commits to return (default None)
-
- ``skip``
- is the number of commits to skip (default 0) which will effectively
- move your commit-window by the given number.
-
- Returns
- ``git.Commit[]``
- """
- options = {'max_count': max_count,
- 'skip': skip}
-
- if max_count is None:
- options.pop('max_count')
-
- return Commit.list_items(self, start, path, **options)
-
- def commits_between(self, frm, to):
- """
- The Commits objects that are reachable via ``to`` but not via ``frm``
- Commits are returned in chronological order.
-
- ``from``
- is the branch/commit name of the younger item
-
- ``to``
- is the branch/commit name of the older item
-
- Returns
- ``git.Commit[]``
- """
- return reversed(Commit.list_items(self, "%s..%s" % (frm, to)))
-
- def commits_since(self, start='master', path='', since='1970-01-01'):
- """
- The Commits objects that are newer than the specified date.
- Commits are returned in chronological order.
-
- ``start``
- is the branch/commit name (default 'master')
-
- ``path``
- is an optinal path to limit the returned commits to.
-
-
- ``since``
- is a string represeting a date/time
-
- Returns
- ``git.Commit[]``
- """
- options = {'since': since}
-
- return Commit.list_items(self, start, path, **options)
-
- def commit_count(self, start='master', path=''):
- """
- The number of commits reachable by the given branch/commit
-
- ``start``
- is the branch/commit name (default 'master')
-
- ``path``
- is an optional path
- Commits that do not contain the path will not contribute to the count.
-
- Returns
- ``int``
- """
- return Commit.count(self, start, path)
-
- def commit(self, id=None, path = ''):
- """
- The Commit object for the specified id
-
- ``id``
- is the SHA1 identifier of the commit or a ref or a ref name
- if None, it defaults to the active branch
+ ``rev``
+ revision specifier, see git-rev-parse for viable options.
-
- ``path``
- is an optional path, if set the returned commit must contain the path.
-
Returns
``git.Commit``
"""
- if id is None:
- id = self.active_branch
- options = {'max_count': 1}
-
- commits = Commit.list_items(self, id, path, **options)
-
- if not commits:
- raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path )
- return commits[0]
-
- def commit_deltas_from(self, other_repo, ref='master', other_ref='master'):
- """
- Returns a list of commits that is in ``other_repo`` but not in self
-
- Returns
- git.Commit[]
- """
- repo_refs = self.git.rev_list(ref, '--').strip().splitlines()
- other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines()
-
- diff_refs = list(set(other_repo_refs) - set(repo_refs))
- return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs)
+ if rev is None:
+ rev = self.active_branch
+
+ c = Object(self, rev)
+ assert c.type == "commit", "Revision %s did not point to a commit, but to %s" % (rev, c)
+ return c
- def tree(self, treeish=None):
+ def tree(self, ref=None):
"""
The Tree object for the given treeish reference
- ``treeish``
+ ``ref``
is a Ref instance defaulting to the active_branch if None.
Examples::
@@ -336,166 +151,56 @@ class Repo(object):
A ref is requried here to assure you point to a commit or tag. Otherwise
it is not garantueed that you point to the root-level tree.
- If you need a non-root level tree, find it by iterating the root tree.
- """
- if treeish is None:
- treeish = self.active_branch
- if not isinstance(treeish, Ref):
- raise ValueError( "Treeish reference required, got %r" % treeish )
+ If you need a non-root level tree, find it by iterating the root tree. Otherwise
+ it cannot know about its path relative to the repository root and subsequent
+ operations might have unexpected results.
+ """
+ if ref is None:
+ ref = self.active_branch
+ if not isinstance(ref, Reference):
+ raise ValueError( "Reference required, got %r" % ref )
# As we are directly reading object information, we must make sure
# we truly point to a tree object. We resolve the ref to a sha in all cases
# to assure the returned tree can be compared properly. Except for
# heads, ids should always be hexshas
- hexsha, typename, size = self.git.get_object_header( treeish )
+ hexsha, typename, size = self.git.get_object_header( ref )
if typename != "tree":
- hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' )
+ # will raise if this is not a valid tree
+ hexsha, typename, size = self.git.get_object_header( str(ref)+'^{tree}' )
# END tree handling
- treeish = hexsha
+ ref = hexsha
# the root has an empty relative path and the default mode
- return Tree(self, treeish, 0, '')
-
-
- def diff(self, a, b, *paths):
- """
- The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s)
-
- ``a``
- is the base commit
- ``b``
- is the other commit
-
- ``paths``
- is an optional list of file paths on which to restrict the diff
-
- Returns
- ``str``
- """
- return self.git.diff(a, b, '--', *paths)
-
- def commit_diff(self, commit):
- """
- The commit diff for the given commit
- ``commit`` is the commit name/id
-
- Returns
- ``git.Diff[]``
- """
- return Commit.diff(self, commit)
-
- @classmethod
- def init_bare(self, path, mkdir=True, **kwargs):
- """
- Initialize a bare git repository at the given path
-
- ``path``
- is the full path to the repo (traditionally ends with /<name>.git)
-
- ``mkdir``
- if specified will create the repository directory if it doesn't
- already exists. Creates the directory with a mode=0755.
-
- ``kwargs``
- keyword arguments serving as additional options to the git init command
-
- Examples::
-
- git.Repo.init_bare('/var/git/myrepo.git')
-
- Returns
- ``git.Repo`` (the newly created repo)
- """
-
- if mkdir and not os.path.exists(path):
- os.makedirs(path, 0755)
-
- git = Git(path)
- output = git.init('--bare', **kwargs)
- return Repo(path)
- create = init_bare
-
- def fork_bare(self, path, **kwargs):
- """
- Fork a bare git repository from this repo
-
- ``path``
- is the full path of the new repo (traditionally ends with /<name>.git)
-
- ``kwargs``
- keyword arguments to be given to the git clone command
-
- Returns
- ``git.Repo`` (the newly forked repo)
- """
- options = {'bare': True}
- options.update(kwargs)
- self.git.clone(self.path, path, **options)
- return Repo(path)
-
- def archive_tar(self, treeish='master', prefix=None):
- """
- Archive the given treeish
-
- ``treeish``
- is the treeish name/id (default 'master')
+ return Tree(self, ref, 0, '')
- ``prefix``
- is the optional prefix to prepend to each filename in the archive
-
- Examples::
-
- >>> repo.archive_tar
- <String containing tar archive>
-
- >>> repo.archive_tar('a87ff14')
- <String containing tar archive for commit a87ff14>
-
- >>> repo.archive_tar('master', 'myproject/')
- <String containing tar bytes archive, whose files are prefixed with 'myproject/'>
-
- Returns
- str (containing bytes of tar archive)
- """
- options = {}
- if prefix:
- options['prefix'] = prefix
- return self.git.archive(treeish, **options)
-
- def archive_tar_gz(self, treeish='master', prefix=None):
+ def iter_commits(self, rev=None, paths='', **kwargs):
"""
- Archive and gzip the given treeish
-
- ``treeish``
- is the treeish name/id (default 'master')
-
- ``prefix``
- is the optional prefix to prepend to each filename in the archive
-
- Examples::
+ A list of Commit objects representing the history of a given ref/commit
- >>> repo.archive_tar_gz
- <String containing tar.gz archive>
+ ``rev``
+ revision specifier, see git-rev-parse for viable options.
+ If None, the active branch will be used.
- >>> repo.archive_tar_gz('a87ff14')
- <String containing tar.gz archive for commit a87ff14>
+ ``paths``
+ is an optional path or a list of paths to limit the returned commits to
+ Commits that do not contain that path or the paths will not be returned.
+
+ ``kwargs``
+ Arguments to be passed to git-rev-parse - common ones are
+ max_count and skip
- >>> repo.archive_tar_gz('master', 'myproject/')
- <String containing tar.gz archive and prefixed with 'myproject/'>
+ Note: to receive only commits between two named revisions, use the
+ "revA..revB" revision specifier
Returns
- str (containing the bytes of tar.gz archive)
+ ``git.Commit[]``
"""
- kwargs = {}
- if prefix:
- kwargs['prefix'] = prefix
- resultstr = self.git.archive(treeish, **kwargs)
- sio = StringIO.StringIO()
- gf = gzip.GzipFile(fileobj=sio, mode ='wb')
- gf.write(resultstr)
- gf.close()
- return sio.getvalue()
+ if rev is None:
+ rev = self.active_branch
+
+ return Commit.iter_items(self, rev, paths, **kwargs)
def _get_daemon_export(self):
filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE)
@@ -590,6 +295,213 @@ class Repo(object):
Head to the active branch
"""
return Head( self, self.git.symbolic_ref('HEAD').strip() )
+
+
+ def diff(self, a, b, *paths):
+ """
+ The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s)
+
+ ``a``
+ is the base commit
+ ``b``
+ is the other commit
+
+ ``paths``
+ is an optional list of file paths on which to restrict the diff
+
+ Returns
+ ``str``
+ """
+ return self.git.diff(a, b, '--', *paths)
+
+ def commit_diff(self, commit):
+ """
+ The commit diff for the given commit
+ ``commit`` is the commit name/id
+
+ Returns
+ ``git.Diff[]``
+ """
+ return Commit.diff(self, commit)
+
+ def blame(self, rev, file):
+ """
+ The blame information for the given file at the given revision.
+
+ ``rev``
+ revision specifier, see git-rev-parse for viable options.
+
+ Returns
+ list: [git.Commit, list: [<line>]]
+ A list of tuples associating a Commit object with a list of lines that
+ changed within the given commit. The Commit objects will be given in order
+ of appearance.
+ """
+ data = self.git.blame(rev, '--', file, p=True)
+ commits = {}
+ blames = []
+ info = None
+
+ for line in data.splitlines(False):
+ parts = self.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ if self.re_hexsha_only.search(firstpart):
+ # handles
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
+ digits = parts[-1].split(" ")
+ if len(digits) == 3:
+ info = {'id': firstpart}
+ blames.append([None, []])
+ # END blame data initialization
+ else:
+ m = self.re_author_committer_start.search(firstpart)
+ if m:
+ # handles:
+ # author Tom Preston-Werner
+ # author-mail <tom@mojombo.com>
+ # author-time 1192271832
+ # author-tz -0700
+ # committer Tom Preston-Werner
+ # committer-mail <tom@mojombo.com>
+ # committer-time 1192271832
+ # committer-tz -0700 - IGNORED BY US
+ role = m.group(0)
+ if firstpart.endswith('-mail'):
+ info["%s_email" % role] = parts[-1]
+ elif firstpart.endswith('-time'):
+ info["%s_date" % role] = int(parts[-1])
+ elif role == firstpart:
+ info[role] = parts[-1]
+ # END distinguish mail,time,name
+ else:
+ # handle
+ # filename lib/grit.rb
+ # summary add Blob
+ # <and rest>
+ if firstpart.startswith('filename'):
+ info['filename'] = parts[-1]
+ elif firstpart.startswith('summary'):
+ info['summary'] = parts[-1]
+ elif firstpart == '':
+ if info:
+ sha = info['id']
+ c = commits.get(sha)
+ if c is None:
+ c = Commit( self, id=sha,
+ author=Actor._from_string(info['author'] + ' ' + info['author_email']),
+ authored_date=info['author_date'],
+ committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
+ committed_date=info['committer_date'],
+ message=info['summary'])
+ commits[sha] = c
+ # END if commit objects needs initial creation
+ m = self.re_tab_full_line.search(line)
+ text, = m.groups()
+ blames[-1][0] = c
+ blames[-1][1].append( text )
+ info = None
+ # END if we collected commit info
+ # END distinguish filename,summary,rest
+ # END distinguish author|committer vs filename,summary,rest
+ # END distinguish hexsha vs other information
+ return blames
+
+ @classmethod
+ def init(cls, path=None, mkdir=True, **kwargs):
+ """
+ Initialize a git repository at the given path if specified
+
+ ``path``
+ is the full path to the repo (traditionally ends with /<name>.git)
+ or None in which case the repository will be created in the current
+ working directory
+
+ ``mkdir``
+ if specified will create the repository directory if it doesn't
+ already exists. Creates the directory with a mode=0755.
+ Only effective if a path is explicitly given
+
+ ``kwargs``
+ keyword arguments serving as additional options to the git-init command
+
+ Examples::
+
+ git.Repo.init('/var/git/myrepo.git',bare=True)
+
+ Returns
+ ``git.Repo`` (the newly created repo)
+ """
+
+ if mkdir and path and not os.path.exists(path):
+ os.makedirs(path, 0755)
+
+ git = Git(path)
+ output = git.init(path, **kwargs)
+ return Repo(path)
+
+ def clone(self, path, **kwargs):
+ """
+ Create a clone from this repository.
+
+ ``path``
+ is the full path of the new repo (traditionally ends with /<name>.git)
+
+ ``kwargs``
+ keyword arguments to be given to the git-clone command
+
+ Returns
+ ``git.Repo`` (the newly cloned repo)
+ """
+ self.git.clone(self.path, path, **kwargs)
+ return Repo(path)
+
+
+ def archive(self, ostream, treeish=None, prefix=None, **kwargs):
+ """
+ Archive the tree at the given revision.
+ ``ostream``
+ file compatible stream object to which the archive will be written
+
+ ``treeish``
+ is the treeish name/id, defaults to active branch
+
+ ``prefix``
+ is the optional prefix to prepend to each filename in the archive
+
+ ``kwargs``
+ Additional arguments passed to git-archive
+ NOTE: Use the 'format' argument to define the kind of format. Use
+ specialized ostreams to write any format supported by python
+
+ Examples::
+
+ >>> repo.archive(open("archive"
+ <String containing tar.gz archive>
+
+ >>> repo.archive_tar_gz('a87ff14')
+ <String containing tar.gz archive for commit a87ff14>
+
+ >>> repo.archive_tar_gz('master', 'myproject/')
+ <String containing tar.gz archive and prefixed with 'myproject/'>
+
+ Raise
+ GitCommandError in case something went wrong
+
+ """
+ if treeish is None:
+ treeish = self.active_branch
+ if prefix and 'prefix' not in kwargs:
+ kwargs['prefix'] = prefix
+ kwargs['as_process'] = True
+ kwargs['output_stream'] = ostream
+
+ proc = self.git.archive(treeish, **kwargs)
+ status = proc.wait()
+ if status != 0:
+ raise GitCommandError( "git-archive", status, proc.stderr.read() )
+
+
def __repr__(self):
return '<git.Repo "%s">' % self.path