diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2015-01-20 20:56:33 +0100 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2015-01-20 20:56:33 +0100 |
commit | 9fbae711b76a4f2fa9345f43da6d2cdedd75d6c3 (patch) | |
tree | e203a3ea4ab21dc4db72e7df3ec52237e7cd65ae /git | |
parent | ea29541e213df928d356b3c12d4d074001395d3c (diff) | |
download | gitpython-9fbae711b76a4f2fa9345f43da6d2cdedd75d6c3.tar.gz |
Greatly improved possible safety of Submodule.update(), which is used by default.
Previously, the implementation would gladly reset new commits in submodules,
and/or reset a dirty working tree.
Now the new force_reset/force flag has to be specified explicitly to get back
to the old behaviour.
All submodule tests except for one are working.
Diffstat (limited to 'git')
-rw-r--r-- | git/config.py | 4 | ||||
-rw-r--r-- | git/exc.py | 11 | ||||
-rw-r--r-- | git/objects/submodule/base.py | 71 | ||||
-rw-r--r-- | git/objects/submodule/root.py | 22 | ||||
-rw-r--r-- | git/repo/fun.py | 2 | ||||
-rw-r--r-- | git/test/test_submodule.py | 113 | ||||
-rw-r--r-- | git/util.py | 5 |
7 files changed, 170 insertions, 58 deletions
diff --git a/git/config.py b/git/config.py index ed61bf93..38dd1b44 100644 --- a/git/config.py +++ b/git/config.py @@ -546,10 +546,12 @@ class GitConfigParser(with_metaclass(MetaParserBuilder, cp.RawConfigParser, obje :param option: Name of the options whose value to set :param value: Value to set the option to. It must be a string or convertible - to a string""" + to a string + :return: this instance""" if not self.has_section(section): self.add_section(section) self.set(section, option, self._value_to_string(value)) + return self def rename_section(self, section, new_name): """rename the given section to new_name @@ -84,3 +84,14 @@ class HookExecutionError(Exception): def __str__(self): return ("'%s' hook returned with exit code %i\nstdout: '%s'\nstderr: '%s'" % (self.command, self.status, self.stdout, self.stderr)) + + +class RepositoryDirtyError(Exception): + """Thrown whenever an operation on a repository fails as it has uncommited changes that would be overwritten""" + + def __init__(self, repo, message): + self.repo = repo + self.message = message + + def __str__(self): + return "Operation cannot be performed on %r: %s" % (self.repo, self.message) diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 285d2423..64773411 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -24,7 +24,8 @@ from git.config import ( ) from git.exc import ( InvalidGitRepositoryError, - NoSuchPathError + NoSuchPathError, + RepositoryDirtyError ) from git.compat import ( string_types, @@ -36,6 +37,7 @@ import git import os import logging +import tempfile __all__ = ["Submodule", "UpdateProgress"] @@ -424,8 +426,8 @@ class Submodule(util.IndexObject, Iterable, Traversable): return sm - def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, - dry_run=False): + def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, dry_run=False, + force=False): """Update the repository of this submodule to point to the checkout we point at with the binsha of this instance. @@ -440,6 +442,12 @@ class Submodule(util.IndexObject, Iterable, Traversable): :param progress: UpdateProgress instance or None of no progress should be shown :param dry_run: if True, the operation will only be simulated, but not performed. All performed operations are read-only + :param force: + If True, we may reset heads even if the repository in question is dirty. Additinoally we will be allowed + to set a tracking branch which is ahead of its remote branch back into the past or the location of the + remote branch. This will essentially 'forget' commits. + If False, local tracking branches that are in the future of their respective remote branches will simply + not be moved. :note: does nothing in bare repositories :note: method is definitely not atomic if recurisve is True :return: self""" @@ -565,23 +573,45 @@ class Submodule(util.IndexObject, Iterable, Traversable): # update the working tree # handles dry_run if mrepo is not None and mrepo.head.commit.binsha != binsha: + # We must assure that our destination sha (the one to point to) is in the future of our current head. + # Otherwise, we will reset changes that might have been done on the submodule, but were not yet pushed + # We also handle the case that history has been rewritten, leaving no merge-base. In that case + # we behave conservatively, protecting possible changes the user had done + may_reset = True + if mrepo.head.commit.binsha != self.NULL_BIN_SHA: + base_commit = mrepo.merge_base(mrepo.head.commit, hexsha) + if len(base_commit) == 0 or base_commit[0].hexsha == hexsha: + if force: + log.debug("Will force checkout or reset on local branch that is possibly in the future of" + + "the commit it will be checked out to, effectively 'forgetting' new commits") + else: + log.info("Skipping %s on branch '%s' of submodule repo '%s' as it contains un-pushed commits", + is_detached and "checkout" or "reset", mrepo.head, mrepo) + may_reset = False + # end handle force + # end handle if we are in the future + + if may_reset and not force and mrepo.is_dirty(index=True, working_tree=True, untracked_files=True): + raise RepositoryDirtyError(mrepo, "Cannot reset a dirty repository") + # end handle force and dirty state + # end handle empty repo + + # end verify future/past progress.update(BEGIN | UPDWKTREE, 0, 1, prefix + "Updating working tree at %s for submodule %r to revision %s" % (self.path, self.name, hexsha)) - if not dry_run: + + if not dry_run and may_reset: if is_detached: # NOTE: for now we force, the user is no supposed to change detached # submodules anyway. Maybe at some point this becomes an option, to # properly handle user modifications - see below for future options # regarding rebase and merge. - mrepo.git.checkout(hexsha, force=True) + mrepo.git.checkout(hexsha, force=force) else: - # TODO: allow to specify a rebase, merge, or reset - # TODO: Warn if the hexsha forces the tracking branch off the remote - # branch - this should be prevented when setting the branch option mrepo.head.reset(hexsha, index=True, working_tree=True) # END handle checkout - # END handle dry_run + # if we may reset/checkout progress.update(END | UPDWKTREE, 0, 1, prefix + "Done updating working tree for submodule %r" % self.name) # END update to new commit only if needed @@ -591,7 +621,8 @@ class Submodule(util.IndexObject, Iterable, Traversable): # in dry_run mode, the module might not exist if mrepo is not None: for submodule in self.iter_items(self.module()): - submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run) + submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run, + force=force) # END handle recursive update # END handle dry run # END for each submodule @@ -748,7 +779,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): csm.remove(module, force, configuration, dry_run) del(csm) # end - if nc > 0: + if not dry_run and nc > 0: # Assure we don't leave the parent repository in a dirty state, and commit our changes # It's important for recursive, unforced, deletions to work as expected self.module().index.commit("Removed submodule '%s'" % self.name) @@ -854,8 +885,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): def set_parent_commit(self, commit, check=True): """Set this instance to use the given commit whose tree is supposed to contain the .gitmodules blob. - - :param commit: Commit'ish reference pointing at the root_tree + :param commit: Commit'ish reference pointing at the root_tree, or None always point to the most recent commit :param check: if True, relatively expensive checks will be performed to verify validity of the submodule. :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. @@ -939,7 +969,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): pw.release() # .gitmodules - cw = self.config_writer().config + cw = self.config_writer(write=False).config cw.rename_section(sm_section(self.name), sm_section(new_name)) cw.release() @@ -948,9 +978,16 @@ class Submodule(util.IndexObject, Iterable, Traversable): # .git/modules mod = self.module() if mod.has_separate_working_tree(): - module_abspath = self._module_abspath(self.repo, self.path, new_name) - os.renames(mod.git_dir, module_abspath) - self._write_git_file_and_module_config(mod.working_tree_dir, module_abspath) + destination_module_abspath = self._module_abspath(self.repo, self.path, new_name) + source_dir = mod.git_dir + # Let's be sure the submodule name is not so obviously tied to a directory + if destination_module_abspath.startswith(mod.git_dir): + tmp_dir = self._module_abspath(self.repo, self.path, os.path.basename(tempfile.mkdtemp())) + os.renames(source_dir, tmp_dir) + source_dir = tmp_dir + # end handle self-containment + os.renames(source_dir, destination_module_abspath) + self._write_git_file_and_module_config(mod.working_tree_dir, destination_module_abspath) # end move separate git repository return self diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py index 8c9afff1..352e0f8b 100644 --- a/git/objects/submodule/root.py +++ b/git/objects/submodule/root.py @@ -61,7 +61,7 @@ class RootModule(Submodule): #{ Interface def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, - to_latest_revision=False, progress=None, dry_run=False): + to_latest_revision=False, progress=None, dry_run=False, force_reset=False): """Update the submodules of this repository to the current HEAD commit. This method behaves smartly by determining changes of the path of a submodules repository, next to changes to the to-be-checked-out commit or the branch to be @@ -80,10 +80,16 @@ class RootModule(Submodule): :param init: If we encounter a new module which would need to be initialized, then do it. :param to_latest_revision: If True, instead of checking out the revision pointed to by this submodule's sha, the checked out tracking branch will be merged with the - newest remote branch fetched from the repository's origin + latest remote branch fetched from the repository's origin. + Unless force_reset is specified, a local tracking branch will never be reset into its past, therefore + the remote branch must be in the future for this to have an effect. + :param force_reset: if True, submodules may checkout or reset their branch even if the repository has + pending changes that would be overwritten, or if the local tracking branch is in the future of the + remote tracking branch and would be reset into its past. :param progress: RootUpdateProgress instance or None if no progress should be sent :param dry_run: if True, operations will not actually be performed. Progress messages - will change accordingly to indicate the WOULD DO state of the operation.""" + will change accordingly to indicate the WOULD DO state of the operation. + :return: self""" if self.repo.bare: raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") # END handle bare @@ -134,9 +140,7 @@ class RootModule(Submodule): # of previous module. Trigger the cache to be updated before that progress.update(op, i, len_rrsm, prefix + "Removing submodule %r at %s" % (rsm.name, rsm.abspath)) rsm._parent_commit = repo.head.commit - if not dry_run: - rsm.remove(configuration=False, module=True, force=force_remove) - # END handle dry-run + rsm.remove(configuration=False, module=True, force=force_remove, dry_run=dry_run) if i == len_rrsm - 1: op |= END @@ -311,7 +315,7 @@ class RootModule(Submodule): for sm in sms: # update the submodule using the default method sm.update(recursive=False, init=init, to_latest_revision=to_latest_revision, - progress=progress, dry_run=dry_run) + progress=progress, dry_run=dry_run, force=force_reset) # update recursively depth first - question is which inconsitent # state will be better in case it fails somewhere. Defective branch @@ -322,11 +326,13 @@ class RootModule(Submodule): if sm.module_exists(): type(self)(sm.module()).update(recursive=True, force_remove=force_remove, init=init, to_latest_revision=to_latest_revision, - progress=progress, dry_run=dry_run) + progress=progress, dry_run=dry_run, force_reset=force_reset) # END handle dry_run # END handle recursive # END for each submodule to update + return self + def module(self): """:return: the actual repository containing the submodules""" return self.repo diff --git a/git/repo/fun.py b/git/repo/fun.py index 1ee11ffc..2a1270be 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -296,7 +296,7 @@ def rev_parse(repo, rev): raise ValueError("Invalid token: %r" % token) # END end handle tag except (IndexError, AttributeError): - raise BadObject("Invalid Revision in %s" % rev) + raise BadName("Invalid revision spec '%s' - not enough parent commits to reach '%s%i'" % (rev, token, num)) # END exception handling # END parse loop diff --git a/git/test/test_submodule.py b/git/test/test_submodule.py index 767d8419..06036f53 100644 --- a/git/test/test_submodule.py +++ b/git/test/test_submodule.py @@ -10,12 +10,18 @@ from git.test.lib import ( with_rw_repo ) from gitdb.test.lib import with_rw_directory -from git.exc import InvalidGitRepositoryError +from git.exc import ( + InvalidGitRepositoryError, + RepositoryDirtyError +) from git.objects.submodule.base import Submodule from git.objects.submodule.root import RootModule, RootUpdateProgress from git.util import to_native_path_linux, join_path_native from git.compat import string_types -from git.repo.fun import find_git_dir +from git.repo.fun import ( + find_git_dir, + touch +) # Change the configuration if possible to prevent the underlying memory manager # to keep file handles open. On windows we get problems as they are not properly @@ -35,8 +41,8 @@ class TestRootProgress(RootUpdateProgress): """Just prints messages, for now without checking the correctness of the states""" - def update(self, op, index, max_count, message=''): - print(message) + def update(self, op, cur_count, max_count, message=''): + print(op, cur_count, max_count, message) prog = TestRootProgress() @@ -225,12 +231,14 @@ class TestSubmodule(TestBase): # END for each repo to reset # dry run does nothing - sm.update(recursive=True, dry_run=True, progress=prog) + self.failUnlessRaises(RepositoryDirtyError, sm.update, recursive=True, dry_run=True, progress=prog) + sm.update(recursive=True, dry_run=True, progress=prog, force=True) for repo in smods: assert repo.head.commit != repo.head.ref.tracking_branch().commit # END for each repo to check - sm.update(recursive=True, to_latest_revision=True) + self.failUnlessRaises(RepositoryDirtyError, sm.update, recursive=True, to_latest_revision=True) + sm.update(recursive=True, to_latest_revision=True, force=True) for repo in smods: assert repo.head.commit == repo.head.ref.tracking_branch().commit # END for each repo to check @@ -480,8 +488,8 @@ class TestSubmodule(TestBase): #================ nsmn = "newsubmodule" nsmp = "submrepo" - async_url = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1])) - nsm = Submodule.add(rwrepo, nsmn, nsmp, url=async_url) + subrepo_url = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1])) + nsm = Submodule.add(rwrepo, nsmn, nsmp, url=subrepo_url) csmadded = rwrepo.index.commit("Added submodule").hexsha # make sure we don't keep the repo reference nsm.set_parent_commit(csmadded) assert nsm.module_exists() @@ -507,7 +515,7 @@ class TestSubmodule(TestBase): # an update will remove the module # not in dry_run - rm.update(recursive=False, dry_run=True) + rm.update(recursive=False, dry_run=True, force_remove=True) assert os.path.isdir(smp) # when removing submodules, we may get new commits as nested submodules are auto-committing changes @@ -517,9 +525,29 @@ class TestSubmodule(TestBase): rm.update(recursive=False, force_remove=True) assert not os.path.isdir(smp) - # change url - #============= - # to the first repository, this way we have a fast checkout, and a completely different + # 'apply work' to the nested submodule and assure this is not removed/altered during updates + # Need to commit first, otherwise submodule.update wouldn't have a reason to change the head + nsm_file = os.path.join(nsm.module().working_tree_dir, 'new-file') + # We cannot expect is_dirty to even run as we wouldn't reset a head to the same location + assert nsm.module().head.commit.hexsha == nsm.hexsha + touch(nsm_file) + nsm.module().index.add([nsm]) + nsm.module().index.commit("added new file") + rm.update(recursive=False, dry_run=True, progress=prog) # would not change head, and thus doens't fail + # Everything we can do from now on will trigger the 'future' check, so no is_dirty() check will even run + # This would only run if our local branch is in the past and we have uncommitted changes + + prev_commit = nsm.module().head.commit + rm.update(recursive=False, dry_run=False, progress=prog) + assert prev_commit == nsm.module().head.commit, "head shouldn't change, as it is in future of remote branch" + + # this kills the new file + rm.update(recursive=True, progress=prog, force_reset=True) + assert prev_commit != nsm.module().head.commit, "head changed, as the remote url and its commit changed" + + # change url ... + #=============== + # ... to the first repository, this way we have a fast checkout, and a completely different # repository at the different url nsm.set_parent_commit(csmremoved) nsmurl = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0])) @@ -529,15 +557,17 @@ class TestSubmodule(TestBase): csmpathchange = rwrepo.index.commit("changed url") nsm.set_parent_commit(csmpathchange) + # Now nsm head is in the future of the tracked remote branch prev_commit = nsm.module().head.commit # dry-run does nothing rm.update(recursive=False, dry_run=True, progress=prog) assert nsm.module().remotes.origin.url != nsmurl - rm.update(recursive=False, progress=prog) + rm.update(recursive=False, progress=prog, force_reset=True) assert nsm.module().remotes.origin.url == nsmurl - # head changed, as the remote url and its commit changed - assert prev_commit != nsm.module().head.commit + assert prev_commit != nsm.module().head.commit, "Should now point to gitdb" + assert len(rwrepo.submodules) == 1 + assert not rwrepo.submodules[0].children()[0].module_exists(), "nested submodule should not be checked out" # add the submodule's changed commit to the index, which is what the # user would do @@ -584,7 +614,7 @@ class TestSubmodule(TestBase): # assure we pull locally only nsmc = nsm.children()[0] writer = nsmc.config_writer() - writer.set_value('url', async_url) + writer.set_value('url', subrepo_url) writer.release() rm.update(recursive=True, progress=prog, dry_run=True) # just to run the code rm.update(recursive=True, progress=prog) @@ -652,16 +682,15 @@ class TestSubmodule(TestBase): @with_rw_directory def test_git_submodule_compatibility(self, rwdir): parent = git.Repo.init(os.path.join(rwdir, 'parent')) - empty_file = os.path.join(parent.working_tree_dir, "empty") - with open(empty_file, 'wb') as fp: - fp.close() - parent.index.add([empty_file]) - parent.index.commit("initial commit - can't yet add submodules to empty parent dir") - sm_path = 'submodules/intermediate/one' sm = parent.create_submodule('mymodules/myname', sm_path, url=self._submodule_url()) parent.index.commit("added submodule") + def assert_exists(sm, value=True): + assert sm.exists() == value + assert sm.module_exists() == value + # end + # As git is backwards compatible itself, it would still recognize what we do here ... unless we really # muss it up. That's the only reason why the test is still here ... . assert len(parent.git.submodule().splitlines()) == 1 @@ -681,25 +710,47 @@ class TestSubmodule(TestBase): new_sm_path = 'submodules/one' sm.set_parent_commit(parent.commit()) sm.move(new_sm_path) - assert sm.exists() - assert sm.module_exists() + assert_exists(sm) # Add additional submodule level - sm.module().create_submodule('nested-submodule', 'nested-submodule/working-tree', - url=self._submodule_url()) + csm = sm.module().create_submodule('nested-submodule', 'nested-submodule/working-tree', + url=self._submodule_url()) sm.module().index.commit("added nested submodule") + sm_head_commit = sm.module().commit() + csm.set_parent_commit(sm_head_commit) + assert_exists(csm) + # Fails because there are new commits, compared to the remote we cloned from self.failUnlessRaises(InvalidGitRepositoryError, sm.remove, dry_run=True) - - # TODO: rename nested submodule + assert_exists(sm) + assert sm.module().commit() == sm_head_commit + assert_exists(csm) + + # rename nested submodule + # This name would move itself one level deeper - needs special handling internally + new_name = csm.name + '/mine' + assert csm.rename(new_name).name == new_name + assert_exists(csm) + + # keep_going evaluation + rsm = parent.submodule_update() + assert_exists(sm) + assert_exists(csm) + csm_writer = csm.config_writer().set_value('url', 'foo') + csm_writer.release() + csm.repo.index.commit("Have to commit submodule change for algorithm to pick it up") + csm.set_parent_commit(csm.repo.commit()) + assert csm.url == 'foo' + + self.failUnlessRaises(Exception, rsm.update, recursive=True, to_latest_revision=True, progress=prog) + assert_exists(csm) # remove sm_module_path = sm.module().git_dir for dry_run in (True, False): sm.remove(dry_run=dry_run, force=True) - assert sm.exists() == dry_run - assert sm.module_exists() == dry_run + assert_exists(sm, value=dry_run) assert os.path.isdir(sm_module_path) == dry_run # end for each dry-run mode @@ -712,12 +763,14 @@ class TestSubmodule(TestBase): assert sm._parent_commit is not None assert sm.rename(sm_name) is sm and sm.name == sm_name + assert not sm.repo.is_dirty(index=True, working_tree=False, untracked_files=False) new_path = 'renamed/myname' assert sm.move(new_path).name == new_path new_sm_name = "shortname" assert sm.rename(new_sm_name) is sm + assert sm.repo.is_dirty(index=True, working_tree=False, untracked_files=False) assert sm.exists() sm_mod = sm.module() diff --git a/git/util.py b/git/util.py index 5385455a..8cab0b99 100644 --- a/git/util.py +++ b/git/util.py @@ -251,7 +251,10 @@ class RemoteProgress(object): message = message[:-len(done_token)] # END end message handling - self.update(op_code, cur_count, max_count, message) + self.update(op_code, + cur_count and float(cur_count), + max_count and float(max_count), + message) # END for each sub line return failed_lines |