# repo.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os import re import gzip import StringIO from errors import InvalidGitRepositoryError, NoSuchPathError from utils import touch, is_git_dir from cmd import Git from actor import Actor from refs import * from objects import * class Repo(object): """ Represents a git repository and allows you to query references, gather commit information, generate diffs, create and clone repositories query the log. """ DAEMON_EXPORT_FILE = 'git-daemon-export-ok' # precompiled regex re_whitespace = re.compile(r'\s+') re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') re_author_committer_start = re.compile(r'^(author|committer)') re_tab_full_line = re.compile(r'^\t(.*)$') def __init__(self, path=None): """ Create a new Repo instance ``path`` is the path to either the root git directory or the bare git repo Examples:: repo = Repo("/Users/mtrier/Development/git-python") repo = Repo("/Users/mtrier/Development/git-python.git") Raises InvalidGitRepositoryError or NoSuchPathError Returns ``git.Repo`` """ epath = os.path.abspath(os.path.expanduser(path or os.getcwd())) if not os.path.exists(epath): raise NoSuchPathError(epath) self.path = None curpath = epath while curpath: if is_git_dir(curpath): self.bare = True self.path = curpath self.wd = curpath break gitpath = os.path.join(curpath, '.git') if is_git_dir(gitpath): self.bare = False self.path = gitpath self.wd = curpath break curpath, dummy = os.path.split(curpath) if not dummy: break if self.path is None: raise InvalidGitRepositoryError(epath) self.git = Git(self.wd) # Description property def _get_description(self): filename = os.path.join(self.path, 'description') return file(filename).read().rstrip() def _set_description(self, descr): filename = os.path.join(self.path, 'description') file(filename, 'w').write(descr+'\n') description = property(_get_description, _set_description, doc="the project's description") del _get_description del _set_description @property def heads(self): """ A list of ``Head`` objects representing the branch heads in this repo Returns ``git.Head[]`` """ return Head.list_items(self) # alias heads branches = heads @property def tags(self): """ A list of ``Tag`` objects that are available in this repo Returns ``git.Tag[]`` """ return Tag.list_items(self) def blame(self, rev, file): """ The blame information for the given file at the given revision. ``rev`` revision specifier, see git-rev-parse for viable options. Returns list: [git.Commit, list: []] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance. """ data = self.git.blame(ref, '--', file, p=True) commits = {} blames = [] info = None for line in data.splitlines(False): parts = self.re_whitespace.split(line, 1) firstpart = parts[0] if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, id=sha, author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation m = self.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append( text ) info = None # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames def iter_commits(self, rev=None, paths='', **kwargs): """ A list of Commit objects representing the history of a given ref/commit ``rev`` revision specifier, see git-rev-parse for viable options. If None, the active branch will be used. ``paths`` is an optional path or a list of paths to limit the returned commits to Commits that do not contain that path or the paths will not be returned. ``kwargs`` Arguments to be passed to git-rev-parse - common ones are max_count and skip Returns ``git.Commit[]`` """ if rev is None: rev = self.active_branch return Commit.list_items(self, rev, paths, **kwargs) def commits_between(self, frm, to, *args, **kwargs): """ The Commits objects that are reachable via ``to`` but not via ``frm`` Commits are returned in chronological order. ``from`` is the Ref/Commit name of the younger item ``to`` is the Ref/Commit name of the older item Returns ``git.Commit[]`` """ return reversed(Commit.list_items(self, "%s..%s" % (frm, to))) def commit(self, rev=None): """ The Commit object for the specified revision ``rev`` revision specifier, see git-rev-parse for viable options. Returns ``git.Commit`` """ if rev is None: rev = self.active_branch # NOTE: currently we are not checking wheter rev really points to a commit # If not, the system will barf on access of the object, but we don't do that # here to safe cycles c = Commit(self, rev) return c def tree(self, ref=None): """ The Tree object for the given treeish reference ``ref`` is a Ref instance defaulting to the active_branch if None. Examples:: repo.tree(repo.heads[0]) Returns ``git.Tree`` NOTE A ref is requried here to assure you point to a commit or tag. Otherwise it is not garantueed that you point to the root-level tree. If you need a non-root level tree, find it by iterating the root tree. Otherwise it cannot know about its path relative to the repository root and subsequent operations might have unexpected results. """ if ref is None: ref = self.active_branch if not isinstance(ref, Reference): raise ValueError( "Reference required, got %r" % ref ) # As we are directly reading object information, we must make sure # we truly point to a tree object. We resolve the ref to a sha in all cases # to assure the returned tree can be compared properly. Except for # heads, ids should always be hexshas hexsha, typename, size = self.git.get_object_header( ref ) if typename != "tree": # will raise if this is not a valid tree hexsha, typename, size = self.git.get_object_header( str(ref)+'^{tree}' ) # END tree handling ref = hexsha # the root has an empty relative path and the default mode return Tree(self, ref, 0, '') def commit_deltas_from(self, other_repo, ref='master', other_ref='master'): """ Returns a list of commits that is in ``other_repo`` but not in self Returns git.Commit[] """ repo_refs = self.git.rev_list(ref, '--').strip().splitlines() other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() diff_refs = list(set(other_repo_refs) - set(repo_refs)) return map(lambda ref: Commit(other_repo, ref ), diff_refs) def diff(self, a, b, *paths): """ The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) ``a`` is the base commit ``b`` is the other commit ``paths`` is an optional list of file paths on which to restrict the diff Returns ``str`` """ return self.git.diff(a, b, '--', *paths) def commit_diff(self, commit): """ The commit diff for the given commit ``commit`` is the commit name/id Returns ``git.Diff[]`` """ return Commit.diff(self, commit) @classmethod def init_bare(self, path, mkdir=True, **kwargs): """ Initialize a bare git repository at the given path ``path`` is the full path to the repo (traditionally ends with /.git) ``mkdir`` if specified will create the repository directory if it doesn't already exists. Creates the directory with a mode=0755. ``kwargs`` keyword arguments serving as additional options to the git init command Examples:: git.Repo.init_bare('/var/git/myrepo.git') Returns ``git.Repo`` (the newly created repo) """ if mkdir and not os.path.exists(path): os.makedirs(path, 0755) git = Git(path) output = git.init('--bare', **kwargs) return Repo(path) create = init_bare def fork_bare(self, path, **kwargs): """ Fork a bare git repository from this repo ``path`` is the full path of the new repo (traditionally ends with /.git) ``kwargs`` keyword arguments to be given to the git clone command Returns ``git.Repo`` (the newly forked repo) """ options = {'bare': True} options.update(kwargs) self.git.clone(self.path, path, **options) return Repo(path) def archive_tar(self, treeish='master', prefix=None): """ Archive the given treeish ``treeish`` is the treeish name/id (default 'master') ``prefix`` is the optional prefix to prepend to each filename in the archive Examples:: >>> repo.archive_tar >>> repo.archive_tar('a87ff14') >>> repo.archive_tar('master', 'myproject/') Returns str (containing bytes of tar archive) """ options = {} if prefix: options['prefix'] = prefix return self.git.archive(treeish, **options) def archive_tar_gz(self, treeish='master', prefix=None): """ Archive and gzip the given treeish ``treeish`` is the treeish name/id (default 'master') ``prefix`` is the optional prefix to prepend to each filename in the archive Examples:: >>> repo.archive_tar_gz >>> repo.archive_tar_gz('a87ff14') >>> repo.archive_tar_gz('master', 'myproject/') Returns str (containing the bytes of tar.gz archive) """ kwargs = {} if prefix: kwargs['prefix'] = prefix resultstr = self.git.archive(treeish, **kwargs) sio = StringIO.StringIO() gf = gzip.GzipFile(fileobj=sio, mode ='wb') gf.write(resultstr) gf.close() return sio.getvalue() def _get_daemon_export(self): filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) return os.path.exists(filename) def _set_daemon_export(self, value): filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) fileexists = os.path.exists(filename) if value and not fileexists: touch(filename) elif not value and fileexists: os.unlink(filename) daemon_export = property(_get_daemon_export, _set_daemon_export, doc="If True, git-daemon may export this repository") del _get_daemon_export del _set_daemon_export def _get_alternates(self): """ The list of alternates for this repo from which objects can be retrieved Returns list of strings being pathnames of alternates """ alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates') if os.path.exists(alternates_path): try: f = open(alternates_path) alts = f.read() finally: f.close() return alts.strip().splitlines() else: return [] def _set_alternates(self, alts): """ Sets the alternates ``alts`` is the array of string paths representing the alternates at which git should look for objects, i.e. /home/user/repo/.git/objects Raises NoSuchPathError Returns None """ for alt in alts: if not os.path.exists(alt): raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt) if not alts: os.remove(os.path.join(self.path, 'objects', 'info', 'alternates')) else: try: f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w') f.write("\n".join(alts)) finally: f.close() alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") @property def is_dirty(self): """ Return the status of the index. Returns ``True``, if the index has any uncommitted changes, otherwise ``False`` NOTE Working tree changes that have not been staged will not be detected ! """ if self.bare: # Bare repositories with no associated working directory are # always consired to be clean. return False return len(self.git.diff('HEAD', '--').strip()) > 0 @property def active_branch(self): """ The name of the currently active branch. Returns Head to the active branch """ return Head( self, self.git.symbolic_ref('HEAD').strip() ) def __repr__(self): return '' % self.path