# repo.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os import re import gzip import StringIO from errors import InvalidGitRepositoryError, NoSuchPathError from utils import touch, is_git_dir from cmd import Git from actor import Actor from refs import * from objects import * class Repo(object): """ Represents a git repository and allows you to query references, gather commit information, generate diffs, create and clone repositories query the log. """ DAEMON_EXPORT_FILE = 'git-daemon-export-ok' # precompiled regex re_whitespace = re.compile(r'\s+') re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') re_author_committer_start = re.compile(r'^(author|committer)') re_tab_full_line = re.compile(r'^\t(.*)$') def __init__(self, path=None): """ Create a new Repo instance ``path`` is the path to either the root git directory or the bare git repo Examples:: repo = Repo("/Users/mtrier/Development/git-python") repo = Repo("/Users/mtrier/Development/git-python.git") Raises InvalidGitRepositoryError or NoSuchPathError Returns ``git.Repo`` """ epath = os.path.abspath(os.path.expanduser(path or os.getcwd())) if not os.path.exists(epath): raise NoSuchPathError(epath) self.path = None curpath = epath while curpath: if is_git_dir(curpath): self.bare = True self.path = curpath self.wd = curpath break gitpath = os.path.join(curpath, '.git') if is_git_dir(gitpath): self.bare = False self.path = gitpath self.wd = curpath break curpath, dummy = os.path.split(curpath) if not dummy: break if self.path is None: raise InvalidGitRepositoryError(epath) self.git = Git(self.wd) # Description property def _get_description(self): filename = os.path.join(self.path, 'description') return file(filename).read().rstrip() def _set_description(self, descr): filename = os.path.join(self.path, 'description') file(filename, 'w').write(descr+'\n') description = property(_get_description, _set_description, doc="the project's description") del _get_description del _set_description @property def heads(self): """ A list of ``Head`` objects representing the branch heads in this repo Returns ``git.Head[]`` """ return Head.list_items(self) # alias heads branches = heads @property def tags(self): """ A list of ``Tag`` objects that are available in this repo Returns ``git.Tag[]`` """ return Tag.list_items(self) def blame(self, ref, file): """ The blame information for the given file at the given ref. ``ref`` Ref object or Commit Returns list: [git.Commit, list: []] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance. """ data = self.git.blame(ref, '--', file, p=True) commits = {} blames = [] info = None for line in data.splitlines(False): parts = self.re_whitespace.split(line, 1) firstpart = parts[0] if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, id=sha, author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation m = self.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append( text ) info = None # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames def commits(self, start=None, paths='', max_count=None, skip=0): """ A list of Commit objects representing the history of a given ref/commit ``start`` is a Ref or Commit to start the commits from. If start is None, the active branch will be used ``paths`` is an optional path or a list of paths to limit the returned commits to Commits that do not contain that path or the paths will not be returned. ``max_count`` is the maximum number of commits to return (default None) ``skip`` is the number of commits to skip (default 0) which will effectively move your commit-window by the given number. Returns ``git.Commit[]`` """ options = {'max_count': max_count, 'skip': skip} if max_count is None: options.pop('max_count') if start is None: start = self.active_branch return Commit.list_items(self, start, paths, **options) def commits_between(self, frm, to, *args, **kwargs): """ The Commits objects that are reachable via ``to`` but not via ``frm`` Commits are returned in chronological order. ``from`` is the Ref/Commit name of the younger item ``to`` is the Ref/Commit name of the older item Returns ``git.Commit[]`` """ return reversed(Commit.list_items(self, "%s..%s" % (frm, to))) def commit(self, id=None, paths = ''): """ The Commit object for the specified id ``id`` is the SHA1 identifier of the commit or a ref or a ref name if None, it defaults to the active branch ``paths`` is an optional path or a list of paths, if set the returned commit must contain the path or paths Returns ``git.Commit`` """ if id is None: id = self.active_branch options = {'max_count': 1} commits = Commit.list_items(self, id, paths, **options) if not commits: raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) return commits[0] def commit_deltas_from(self, other_repo, ref='master', other_ref='master'): """ Returns a list of commits that is in ``other_repo`` but not in self Returns git.Commit[] """ repo_refs = self.git.rev_list(ref, '--').strip().splitlines() other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() diff_refs = list(set(other_repo_refs) - set(repo_refs)) return map(lambda ref: Commit(other_repo, ref ), diff_refs) def tree(self, treeish=None): """ The Tree object for the given treeish reference ``treeish`` is a Ref instance defaulting to the active_branch if None. Examples:: repo.tree(repo.heads[0]) Returns ``git.Tree`` NOTE A ref is requried here to assure you point to a commit or tag. Otherwise it is not garantueed that you point to the root-level tree. If you need a non-root level tree, find it by iterating the root tree. """ if treeish is None: treeish = self.active_branch if not isinstance(treeish, Ref): raise ValueError( "Treeish reference required, got %r" % treeish ) # As we are directly reading object information, we must make sure # we truly point to a tree object. We resolve the ref to a sha in all cases # to assure the returned tree can be compared properly. Except for # heads, ids should always be hexshas hexsha, typename, size = self.git.get_object_header( treeish ) if typename != "tree": hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' ) # END tree handling treeish = hexsha # the root has an empty relative path and the default mode return Tree(self, treeish, 0, '') def diff(self, a, b, *paths): """ The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) ``a`` is the base commit ``b`` is the other commit ``paths`` is an optional list of file paths on which to restrict the diff Returns ``str`` """ return self.git.diff(a, b, '--', *paths) def commit_diff(self, commit): """ The commit diff for the given commit ``commit`` is the commit name/id Returns ``git.Diff[]`` """ return Commit.diff(self, commit) @classmethod def init_bare(self, path, mkdir=True, **kwargs): """ Initialize a bare git repository at the given path ``path`` is the full path to the repo (traditionally ends with /.git) ``mkdir`` if specified will create the repository directory if it doesn't already exists. Creates the directory with a mode=0755. ``kwargs`` keyword arguments serving as additional options to the git init command Examples:: git.Repo.init_bare('/var/git/myrepo.git') Returns ``git.Repo`` (the newly created repo) """ if mkdir and not os.path.exists(path): os.makedirs(path, 0755) git = Git(path) output = git.init('--bare', **kwargs) return Repo(path) create = init_bare def fork_bare(self, path, **kwargs): """ Fork a bare git repository from this repo ``path`` is the full path of the new repo (traditionally ends with /.git) ``kwargs`` keyword arguments to be given to the git clone command Returns ``git.Repo`` (the newly forked repo) """ options = {'bare': True} options.update(kwargs) self.git.clone(self.path, path, **options) return Repo(path) def archive_tar(self, treeish='master', prefix=None): """ Archive the given treeish ``treeish`` is the treeish name/id (default 'master') ``prefix`` is the optional prefix to prepend to each filename in the archive Examples:: >>> repo.archive_tar >>> repo.archive_tar('a87ff14') >>> repo.archive_tar('master', 'myproject/') Returns str (containing bytes of tar archive) """ options = {} if prefix: options['prefix'] = prefix return self.git.archive(treeish, **options) def archive_tar_gz(self, treeish='master', prefix=None): """ Archive and gzip the given treeish ``treeish`` is the treeish name/id (default 'master') ``prefix`` is the optional prefix to prepend to each filename in the archive Examples:: >>> repo.archive_tar_gz >>> repo.archive_tar_gz('a87ff14') >>> repo.archive_tar_gz('master', 'myproject/') Returns str (containing the bytes of tar.gz archive) """ kwargs = {} if prefix: kwargs['prefix'] = prefix resultstr = self.git.archive(treeish, **kwargs) sio = StringIO.StringIO() gf = gzip.GzipFile(fileobj=sio, mode ='wb') gf.write(resultstr) gf.close() return sio.getvalue() def _get_daemon_export(self): filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) return os.path.exists(filename) def _set_daemon_export(self, value): filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) fileexists = os.path.exists(filename) if value and not fileexists: touch(filename) elif not value and fileexists: os.unlink(filename) daemon_export = property(_get_daemon_export, _set_daemon_export, doc="If True, git-daemon may export this repository") del _get_daemon_export del _set_daemon_export def _get_alternates(self): """ The list of alternates for this repo from which objects can be retrieved Returns list of strings being pathnames of alternates """ alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates') if os.path.exists(alternates_path): try: f = open(alternates_path) alts = f.read() finally: f.close() return alts.strip().splitlines() else: return [] def _set_alternates(self, alts): """ Sets the alternates ``alts`` is the array of string paths representing the alternates at which git should look for objects, i.e. /home/user/repo/.git/objects Raises NoSuchPathError Returns None """ for alt in alts: if not os.path.exists(alt): raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt) if not alts: os.remove(os.path.join(self.path, 'objects', 'info', 'alternates')) else: try: f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w') f.write("\n".join(alts)) finally: f.close() alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") @property def is_dirty(self): """ Return the status of the index. Returns ``True``, if the index has any uncommitted changes, otherwise ``False`` NOTE Working tree changes that have not been staged will not be detected ! """ if self.bare: # Bare repositories with no associated working directory are # always consired to be clean. return False return len(self.git.diff('HEAD', '--').strip()) > 0 @property def active_branch(self): """ The name of the currently active branch. Returns Head to the active branch """ return Head( self, self.git.symbolic_ref('HEAD').strip() ) def __repr__(self): return '' % self.path