diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-08-05 16:22:51 +0000 |
commit | cf46733632c7279a9fd0fe6ce26f9185a4ae82a9 (patch) | |
tree | da27775a2161723ef342e91af41a8b51fedef405 /tools/client-side/svn-vendor.py | |
parent | bb0ef45f7c46b0ae221b26265ef98a768c33f820 (diff) | |
download | subversion-tarball-master.tar.gz |
subversion-1.9.7HEADsubversion-1.9.7master
Diffstat (limited to 'tools/client-side/svn-vendor.py')
-rwxr-xr-x | tools/client-side/svn-vendor.py | 1065 |
1 files changed, 1065 insertions, 0 deletions
diff --git a/tools/client-side/svn-vendor.py b/tools/client-side/svn-vendor.py new file mode 100755 index 0000000..d0c862c --- /dev/null +++ b/tools/client-side/svn-vendor.py @@ -0,0 +1,1065 @@ +#!/usr/bin/python3 +# vim: set sw=4 expandtab : +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== +# +############################################################################## +# svn-vendor.py +# +# Overview +# -------- +# Replacement for svn_load_dirs.pl (included as a 'contributed utility' in +# Subversion sources). Main difference is some heuristics in detection of +# the renames. Note that this script does not attempt to automate remote +# SVN operations (check-out, check-in and tagging), so it is possible to +# review the state of sources that are about to be checked in. Another +# difference is an ability to save the detected renames, review/re-apply +# them. +# +# This script requires Python 3.3.x or higher. Sorry, I was too lazy +# to write shell quoting routines that are already available in recent +# Python versions. +# +# Using this script +# ----------------- +# First, it is necessary to check out the working copy from the URL that +# will host the imported sources. E.g., if the versions of FOO are being +# imported into svn://example.com/vendor/FOO/current: +# +# svn co svn://example.com/vendor/FOO/current wc +# +# Then, unpack the sources of the version to be imported: +# +# tar xzf foo-1.1.tar.gz +# +# Examples below assume the command above created a `foo-1.1' directory. +# After that, there are three different modes of operation: +# +# 1. Fully automatic +# +# svn-vendor.py --auto wc foo-1.1 +# svn st wc +# svn ci wc +# +# In this mode, the script fully relies on its heuristics in detection of +# renames. In many cases, it "just works". There can be spurious moves +# detected in this mode, though. For example, consider a deleted header +# that consists of 50 lines of GPL text, 1 line of copyright, and +# 3 lines of declarations, and a similar unrelated header in the imported +# sources. From the script's point of view, the files are nearly identical +# (4 lines removed, 4 lines added, 50 lines unchanged). +# +# After the script completes, examine the working copy by doing 'svn diff' +# and/or 'svn status', paying particular attention to renames. If all the +# moves are detected correctly, check in the changes in the working copy. +# +# 2. Semi-automatic +# +# svn-vendor.py --detect moves-foo-1.1.txt wc foo-1.1 +# vi moves-foo-1.1.txt +# svn-vendor.py --apply moves-foo-1.1.txt wc foo-1.1 +# svn ci wc +# +# If the fully automatic mode mis-detected some spurious moves, or did not +# detect some renames you want to be performed, it is still possible to +# leverage what the script has detected automatically. First command above +# does the automatic detection, just as it does in fully automatic mode, +# but stops short of performing any modification of the working copy. +# The list of detected copies and renames is saved into a text file, +# `moves-foo-1.1.txt'. +# +# That file can be inspected after the script finishes. Spurious moves can +# be deleted from the file, and new copies/renames can be added. Then the +# changes can be applied to the working copy. +# +# 3. Manual +# +# svn-vendor.py wc foo-1.1 +# (svn-vendor) detect +# (svn-vendor) move x.c y.c +# (svn-vendor) move include/1.h include/2.h +# (svn-vendor) copy include/3.h include/3-copy.h +# (svn-vendor) lsprep +# (svn-vendor) save /tmp/renames-to-be-applied.txt +# (svn-vendor) apply +# +# If the automatic detection does not help, it is possible to do the renames +# manually (similarly to svn_load_dirs.pl). Use the 'help' command to get +# the list of supported commands and their description. Feel free to play +# around - since the script does not perform any remote SVN operation, +# there is no chance to commit the changes accidentally. +# +# Notes +# ----- +# I. The time for rename detection O(Fs*Fd) + O(Ds*Dd), where Fs is +# the number of files removed from current directory, Fd is number of files +# added in imported sources, and Ds/Dd is the same for directories. That is, +# the running time may become an issue if the numbers of added/removed files +# go into a few thousands (e.g. if updating Linux kernel 2.6.35 to 3.10). +# As a workaround, import interim releases first so that the number of +# renames remains sane at each step. That makes reviewing the renames +# performed by the script much easier. +# +# Enjoy! +# +############################################################################## + +import argparse +import cmd +import difflib +import filecmp +import os +import readline +import shlex +import shutil +import subprocess +import sys + +def name_similarity(n1, n2): + ''' + Function to be used as a key for sorting dirs/files by name matching + ''' + sm = difflib.SequenceMatcher(a=n1, b=n2) + return 1.0 - sm.ratio() + + +def filename_sort_key(s): + ''' + Function to sort filenames so that parent directory is always followed + by its children. Without it, [ "/a", "/a-b", "/a/b", "/a-b/c" ] would + not be sorted correctly. + ''' + return s.replace('/', '\001') + + +def descendant_or_self(path, ancestor): + ''' + Check if path is somewhere in hierarchy under ancestor. + ''' + return path == ancestor or path.startswith(ancestor + os.sep) + +def path_rebase(path, old_base, new_base): + ''' + Return a path name that has the same relative path to new_base as path + had to old_base. Assumes path is a descendant of old_base. + ''' + if path == old_base: + return new_base + return os.path.normpath(os.path.join(new_base, + os.path.relpath(path, old_base))) + + +def for_all_parents(path, func): + ''' + Invoke func for each parent path. + ''' + d = os.path.dirname(path) + while d != "": + func(d) + d = os.path.dirname(d) + +class InvalidUsageException(Exception): + ''' + Raised if command line arguments are invalid + ''' + def __init__(self, cmd, msg): + Exception.__init__(self, msg) + self.cmd = cmd + + +class NotImplementedException(Exception): + ''' + Raised if some code path is not implemented + ''' + pass + + +# Indexes into FSO.state +S_WC = 0 +S_IM = 1 + +class FSO(object): + ''' + File system object (file/dir either in imported dir or in WC) + ''' + def __init__(self): + self.wc_path = None + self.state = [ "-", "-" ] # '-': absent, 'F': file, 'D': dir + + def status(self): + return "[%s%s]" % (self.state[S_WC], self.state[S_IM]) + + def orig_reference(self, curpath): + if self.wc_path and self.wc_path != curpath: + return " (original: %s)" % shlex.quote(self.wc_path) + return "" + + +class FSOCollection(dict): + ''' + Collection of FSOs + ''' + def print(self): + print(" / Status in working copy (-:absent, F:file, D:dir)") + print(" |/ Status in imported sources (-:absent, F:file, D:dir)") + for k in sorted(self.keys(), key=filename_sort_key): + e = self[k] + print("%s %s%s" % (e.status(), shlex.quote(k), + e.orig_reference(k))) + + def get(self, path): + 'Get existing FSO or create a new one' + if path in self: + return self[path] + e = FSO() + self[path] = e + return e + + def add(self, path, where, kind): + 'Adding entries during initial scan' + path = os.path.normpath(path) + e = self.get(path) + e.state[where] = kind + if where == S_WC: + e.wc_path = path + + def wc_copy(self, src, dst): + 'Handle move in a working copy' + keys = list(self.keys()) + for k in keys: + if descendant_or_self(k, src): + esrc = self[k] + if esrc.state[S_WC] == "-": + continue + kn = path_rebase(k, src, dst) + edst = self.get(kn) + if edst.state[S_WC] != "-": + # Copying into existing destination. + # Caller should've checked this. + raise NotImplementedException + edst.wc_path = esrc.wc_path + edst.state[S_WC] = esrc.state[S_WC] + + def wc_remove(self, path): + 'Handle removal in a working copy' + keys = list(self.keys()) + for k in keys: + if descendant_or_self(k, path): + self[k].state[S_WC] = "-" + + +class ConfigOpt(object): + 'Helper class - single option (string)' + def __init__(self, value, helpmsg): + self.value = value + self.helpmsg = helpmsg + + def set(self, new_value): + self.value = new_value + + def __str__(self): + return "<none>" if self.value is None else "`%s'" % self.value + + +class ConfigOptInt(ConfigOpt): + 'Helper class - single option (integer)' + def set(self, new_value): + try: + self.value = int(new_value) + except ValueError: + raise InvalidUsageException(None, "Value must be integer") + + def __str__(self): + return "%d" % self.value + + +class Config(dict): + ''' + Store configuration options. + ''' + def add_option(self, name, cfgopt): + self[name] = cfgopt + + def set(self, name, value): + if name not in self: + raise InvalidUsageException(None, + "Unknown config variable '%s'" % name) + self[name].set(value) + + def get(self, name): + if name not in self: + raise NotImplementedException() + return self[name].value + + def print(self): + for k in sorted(self): + o = self[k] + print("# %s" % o.helpmsg) + print("%-20s: %s" % (k, str(o))) + print() + + +class SvnVndImport(cmd.Cmd): + ''' + Main driving class. + ''' + intro = "Welcome to SVN vendor import helper. " + \ + "Type help or ? to list commands.\n" + prompt = "(svn-vendor) " + prepare_ops = [] + + def __init__(self, wcdir, importdir, svninfo): + cmd.Cmd.__init__(self) + self.wcdir = wcdir + self.importdir = importdir + self.svninfo = svninfo + self.config = Config() + self.config.add_option('save-diff-copied', + ConfigOpt(None, "Save 'svn diff' output on the " + + "moved/copied files and directories to this " + + "file as part of 'apply'")) + self.config.add_option('dir-similarity', + ConfigOptInt(600, "Similarity between dirs to assume " + + "a copy/move [0..1000]")) + self.config.add_option('file-similarity', + ConfigOptInt(600, "Similarity between files to assume a " + + "copy/move [0..1000]")) + self.config.add_option('file-min-lines', + ConfigOptInt(10, "Minimal number of lines in a file for " + + "meaningful comparison")) + self.config.add_option('verbose', + ConfigOptInt(3, "Verbosity of the output [0..5]")) + try: + self.termwidth = os.get_terminal_size()[0] + except OSError: + # Not running in a terminal - probably redirected to file + self.termwidth = 150 # arbitrary number + + def info(self, level, msg): + 'Print message with specified verbosity' + if level <= self.config.get('verbose'): + print(msg, flush=True) + + def scan(self): + self.items = FSOCollection() + self.info(1, "Scanning working copy directory...") + self.get_lists(self.wcdir, S_WC) + self.info(1, "Scanning imported directory...") + self.get_lists(self.importdir, S_IM) + + def get_lists(self, top, where): + for d, dn, fn in os.walk(top, followlinks=True): + dr = os.path.relpath(d, top) + # If under .svn directory at the top (SVN 1.7+) or has .svn + # in the path (older SVN), ignore + if descendant_or_self(dr, '.svn') or \ + os.path.basename(dr) == '.svn' or \ + (os.sep + '.svn' + os.sep) in dr: + continue + if dr != '.': + self.items.add(dr, where, "D") + for f in fn: + fr = os.path.normpath(os.path.join(dr, f)) + self.items.add(fr, where, "F") + + def onecmd(self, str): + 'Override for checking number of arguments' + try: + return cmd.Cmd.onecmd(self, str) + except InvalidUsageException as e: + if e.cmd is not None: + print("!!! Invalid usage of `%s' command: %s" % (e.cmd, e)) + print() + self.onecmd("help " + e.cmd) + else: + print("!!! %s" % e) + + def parse_args(self, line, nargs, cmd): + 'Parse arguments for a command' + args = shlex.split(line) + if len(args) != nargs: + raise InvalidUsageException(cmd, "expect %d arguments" % nargs) + return args + + def run_svn(self, args_fixed, args_split=[]): + 'Run SVN command(s), potentially splitting long argument lists' + rv = True + pos = 0 + atatime = 100 + output = "" + while pos < len(args_split) or (pos == 0 and len(args_split) == 0): + svnargs = ['svn'] + args_fixed + args_split[pos : pos + atatime] + pos += atatime + self.info(5, "Running: " + " ".join(map(shlex.quote, svnargs))) + p = subprocess.Popen(args=svnargs, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, cwd=self.wcdir) + so, se = p.communicate() + if p.returncode != 0: + print("`%s' exited with %d status:" % + (" ".join(map(shlex.quote, svnargs)), p.returncode)) + print(se.decode()) + rv = False + else: + output += so.decode() + return rv, output + + def copy_or_move(self, op, src, dst): + 'Handle copy or move operation' + if src not in self.items or self.items[src].state[S_WC] == "-": + raise InvalidUsageException(None, + "Nothing known about `%s'" % src) + if dst in self.items and self.items[dst].state[S_WC] != "-": + raise InvalidUsageException(None, + "Destination path `%s' already exists" % dst) + # Check that we're not creating dst under a file (not a dir) + new_dirs = [] + def check_parent(d): + if d not in self.items or self.items[d].state[S_WC] == "-": + new_dirs.append(d) + elif self.items[d].state[S_WC] == "F": + raise InvalidUsageException(None, + "Destination path `%s' created under `%s' " + + "which is a file" % (dst, d)) + for_all_parents(dst, check_parent) + # All ok, record new directories that may be created + for d in new_dirs: + self.items.get(d).state[S_WC] = "D" + # Record the operation and update the FSO collection + self.prepare_ops.append((op, src, dst)) + self.items.wc_copy(src, dst) + if op == "mv": + self.items.wc_remove(src) + + def remove(self, path): + if path not in self.items or self.items[path].state[S_WC] == "-": + raise InvalidUsageException(None, + "Nothing known about `%s'" % path) + self.prepare_ops.append(("rm", path)) + self.items.wc_remove(path) + + def similarity_file(self, src, dst, threshold, lst_removal): + 'Compare two files, return similarity ratio on 0..1000 scale' + if self.items[src].state[S_WC] != "F": + return 0 + # Source is in working copy + fn1 = os.path.join(self.wcdir, self.items[src].wc_path) + # Destination is in imported dir + fn2 = os.path.join(self.importdir, dst) + minlines = self.config.get('file-min-lines') + try: + f1 = open(fn1, 'r') + l1 = f1.readlines() + f1.close() + if len(l1) < minlines: + return 0 + f2 = open(fn2, 'r') + l2 = f2.readlines() + f2.close() + if len(l2) < minlines: + return 0 + sm = difflib.SequenceMatcher(a=l1, b=l2) + return int(1000 * sm.quick_ratio()) + except UnicodeDecodeError: + # Oops, file seems to be binary. Fall back to comparing whole + # file contents. + if filecmp.cmp(fn1, fn2, shallow=False): + return 1000 + return 0 + + def _similarity_dir(self, src, dst, get_file_similarity, lst_removal): + 'Iterate over FSOs, using callback to compare file entries' + common = 0 + total = 0 + for xsrc in self.items: + if xsrc.startswith(src + os.sep): + esrc = self.items[xsrc] + if esrc.state[S_WC] == "-": + # Source not in WC - ignore for similarity calculation + continue + skip = False + if lst_removal is not None: + for i in lst_removal: + if descendant_or_self(xsrc, i): + skip = True + if skip: + # Moved to another place, do not consider in score + continue + total += 1000 + xdst = path_rebase(xsrc, src, dst) + if xdst not in self.items: + # Destination not in imported sources - non-similar item + continue + edst = self.items[xdst] + if edst.state[S_IM] == esrc.state[S_WC]: + if esrc.state[S_WC] == "D": + common += 1000 + else: + common += get_file_similarity(xsrc, xdst) + if total == 0: + # No files/subdirs in source directory - avoid copying empty dirs + return 0 + return 1000 * common / total + + def similarity_dir(self, src, dst, threshold, lst_removal): + ''' + Compare two dirs recursively, return similarity ratio on + 0..1000 scale. + ''' + common = 0 + total = 0 + # Quickly estimate upper boundary by comparing file names. Only + # concern ourselves with files in source directory. I.e., if + # files were added after the move in the destination directory, + # it's ok. If most of the files from the source directory were + # removed, the directory is not considered similar - instead, + # file move detection would move files one by one. + upper = self._similarity_dir(src, dst, lambda s, d: 1000, lst_removal) + if upper <= threshold: + # Even the best estimate is worse than current cut-off + return 0 + # Okay, looks roughly similar. Now redo the above procedure, but also + # compare the file content. + return self._similarity_dir(src, dst, + lambda s, d: self.similarity_file(s, d, 0, lst_removal), + lst_removal) + + def similar(self, src, dst, threshold=0, lst_removal=None): + 'Compare two FSOs, source in WC and destination in imported dir' + if src not in self.items: + print("Source `%s' not in the working copy" % src) + return + xsrc = self.items[src] + if xsrc.state[S_WC] == "-": + print("Source `%s' not in the working copy" % src) + return + if dst not in self.items: + print("Destination `%s' not in imported sources" % dst) + return + xdst = self.items[dst] + if xdst.state[S_IM] == "-": + print("Destination `%s' not in imported sources" % dst) + return + if xsrc.state[S_WC] != xdst.state[S_IM]: + # Different kinds - definitely not the same object + return 0 + if xsrc.state[S_WC] == "D": + return self.similarity_dir(src, dst, threshold, lst_removal) + else: + return self.similarity_file(src, dst, threshold, lst_removal) + + def handle_op(self, op_tuple): + 'Handle one SVN operation, recorded as a tuple' + def x_mv(src, dst): + self.info(2, " Move `%s' to `%s'" % (src, dst)) + self.copy_or_move("mv", src, dst) + def x_cp(src, dst): + self.info(2, " Copy `%s' to `%s'" % (src, dst)) + self.copy_or_move("cp", src, dst) + def x_rm(path): + self.info(2, " Remove `%s'" % path) + self.remove(path) + known_ops = { + # key: (nargs, handler) + 'cp' : (3, x_cp), + 'mv' : (3, x_mv), + 'rm' : (2, x_rm), + } + if len(op_tuple) == 0: + raise InvalidUsageException + op = op_tuple[0] + if op not in known_ops: + return False + nargs, func = known_ops[op] + if nargs != len(op_tuple): + return False + func(*op_tuple[1:]) + return True + + def detect(self, thresholds): + 'Helper for finding copy/move destinations' + ilst = [] + wlst = {} + ilst_map = {} + for p in self.items: + e = self.items[p] + if e.state[S_WC] != "-" and e.state[S_IM] == "-": + wlst[p] = [] # wlst hash stores copy destinations + elif e.state[S_WC] == "-" and e.state[S_IM] != "-": + # ilst just lists destination paths as tuples with node kind + ilst.append((e.state[S_IM], p)) + iteration = 0 + # Do not apply operations immediately - we'll need to post-process + # them to account for files/dirs moved inside a moved parent dir. + ops = [] + to_be_removed = [] + def get_renamed_name(path, rename_ops): + ''' + Check if path was renamed/removed in the recorded operations, + return new name. + ''' + for op_tuple in rename_ops: + # Since copies do not remove the source file, ignore them. + # We push no 'rm' ops in this function + if op_tuple[0] == "mv": + src = op_tuple[1] + dst = op_tuple[2] + if descendant_or_self(path, src): + path = path_rebase(path, src, dst) + return path + + while len(wlst): + iteration += 1 + self.info(2, ("Iteration %d: Possible sources: %d, " + + "possible destinations: %d") % + (iteration, len(wlst), len(ilst))) + ndst = len(ilst) + for idx, (nk, dst) in enumerate(sorted(ilst, + key=lambda s: filename_sort_key(s[1]))): + class SkipDestFile(Exception): + pass + # Check if moved as a part of a parent directory. + def check_moved_parent(xdst): + if xdst in ilst_map: + src = path_rebase(dst, xdst, ilst_map[xdst]) + # Did it exist in copied directory? + if src in self.items and \ + self.items[src].state[S_WC] == nk: + sim = self.similar(src, dst, thresholds[nk], + to_be_removed) + if sim > thresholds[nk]: + self.info(2, (" [%04d/%04d] Skipping `%s' " + + "(copied as part of `%s')") % + (idx, ndst, dst, xdst)) + raise SkipDestFile + # Copied, not similar - search for other sources + raise StopIteration + try: + for_all_parents(dst, check_moved_parent) + except SkipDestFile: + continue + except StopIteration: + pass + self.info(2, (" [%04d/%04d] Looking for possible source " + + "for `%s'") % (idx, ndst, dst)) + bestsrc = None + # Won't even consider those lower than threshold + bestsim = thresholds[nk] + for src in sorted(wlst.keys(), + key=lambda x: name_similarity(x, dst)): + sim = self.similar(src, dst, bestsim, to_be_removed) + if sim > bestsim: + self.info(3, " [similarity %4d] %s" % (sim, src)) + bestsim = sim + bestsrc = src + if bestsim == 1000: + # No chance we're finding anything better + break + if bestsrc is not None: + wlst[bestsrc].append(dst) + ilst_map[dst] = bestsrc + + # Discovered all copies/moves, now record them. + new_wlst = {} + for src in sorted(wlst.keys(), key=filename_sort_key): + dlist = wlst[src] + if len(dlist) == 0: + continue + if len(dlist) == 1: + ops.append(("mv", src, dlist[0])) + to_be_removed.append(src) + else: + # We don't remove the source here, it will be done when + # the changes are applied (it will remove all the WC files + # not found in imported sources). Avoiding removal here + # simplifies operation sorting below, since we would not + # be concerned with source file/dir disappearing before + # it is copied to its destination. + to_be_removed.append(src) + for d in dlist: + ops.append(("cp", src, d)) + # If we copied something - recheck parent source directories. + # Since some source file/dir was scheduled to be removed, + # this may have increased the similarity to some destination. + def recheck_parent(x): + if x in wlst and len(wlst) == 0: + new_wlst[x] = [] + for_all_parents(src, recheck_parent) + + # At this point, if we're going to have the next iteration, we + # are only concerned about directories (by the way new_wlst is + # created above). So, filter out all files from ilst as well. + wlst = new_wlst + ilst = list(filter(lambda t: t[0] == 'D', ilst)) + + # Finished collecting the operations - now can post-process and + # apply them. First, sort copies/moves by destination (so that + # parent directories are created before files/subdirs are + # copied/renamed inside) + ops = sorted(ops, key=lambda op: filename_sort_key(op[2])) + for i, op_tuple in enumerate(ops): + # For each operation, go over its precedents to see if the source + # has been renamed. If it is, find out new name. + op = op_tuple[0] + src = get_renamed_name(op_tuple[1], reversed(ops[:i])) + if src != op_tuple[2]: + # Unless it became the same file after renames + try: + # Try to remove the destination, if it existed + self.remove(op_tuple[2]) + except InvalidUsageException: + # Okay, it didn't exist + pass + self.handle_op((op, src, op_tuple[2])) + + def do_detect(self, arg): + ''' + detect : auto-detect possible moves (where source/destination name + is unique). If not all moves are applicable, save move list, + edit and load. + ''' + self.parse_args(arg, 0, "detect") + self.detect({ "D": self.config.get('dir-similarity'), + "F": self.config.get('file-similarity')}) + + def do_apply(self, arg): + ''' + apply : Perform copies/renames; then copy imported sources into + the working copy. Modifies working copy. Exits after + completion. + ''' + self.info(1, "Copying imported sources into working copy...") + # Perform the recorded copies/moves/removals + self.info(2, " Preparatory operations (copies/renames/removals)") + to_be_diffed = [] + for o in self.prepare_ops: + op = o[0] + if op == "mv": + self.run_svn(["mv", "--parents", o[1], o[2]]) + to_be_diffed.append(o[2]) + elif op == "cp": + self.run_svn(["cp", "--parents", o[1], o[2]]) + to_be_diffed.append(o[2]) + elif op == "rm": + # --force, as the removed path is likely created as a result + # of previous copy/rename + self.run_svn(["rm", "--force", o[1]]) + dirs_added = [] + dirs_removed = [] + files_added = [] + files_removed = [] + self.info(2, " Creating dirs and copying files...") + for i in sorted(self.items.keys()): + e = self.items[i] + nk_wc = e.state[S_WC] + nk_im = e.state[S_IM] + flg = None + if nk_wc == "-": + # Absent in working copy + if nk_im == "D": + # Directory added + os.mkdir(os.path.join(self.wcdir, i)) + dirs_added.append(i) + flg = "(added dir)" + elif nk_im == "F": + # New file added + shutil.copyfile(os.path.join(self.importdir, i), + os.path.join(self.wcdir, i)) + files_added.append(i) + flg = "(added file)" + else: + # Not in imported sources, not in WC (moved + # away/removed) - nothing to do + pass + elif nk_wc == "F": + # File in a working copy + if nk_im == "D": + # File replaced with a directory. See comment above. + self.run_svn(["rm", "--force", i]) + os.mkdir(os.path.join(self.wcdir, i)) + dirs_added.append(i) + flg = "(replaced file with dir)" + elif nk_im == "F": + # Was a file, is a file - just copy contents + shutil.copyfile(os.path.join(self.importdir, i), + os.path.join(self.wcdir, i)) + flg = "(copied)" + else: + # Was a file, removed + files_removed.append(i) + flg = "(removed file)" + elif nk_wc == "D": + # Directory in a working copy + if nk_im == "D": + # Was a directory, is a directory - nothing to do + pass + elif nk_im == "F": + # Directory replaced with file. Need to remove dir + # immediately, as bulk removals/additions assume new files + # and dirs already in place. + self.run_svn(["rm", "--force", i]) + shutil.copyfile(os.path.join(self.importdir, i), + os.path.join(self.wcdir, i)) + files_added.append(i) + flg = "(replaced dir with file)" + else: + # Directory removed + dirs_removed.append(i) + flg = "(removed dir)" + if flg is not None: + self.info(4, " %s %s %s" % (e.status(), i, flg)) + # Filter files/directories removed as a part of parent directory + files_removed = list(filter(lambda x: os.path.dirname(x) not in + dirs_removed, files_removed)) + dirs_removed = list(filter(lambda x: os.path.dirname(x) not in + dirs_removed, dirs_removed)) + files_added = list(filter(lambda x: os.path.dirname(x) not in + dirs_added, files_added)) + dirs_added = list(filter(lambda x: os.path.dirname(x) not in + dirs_added, dirs_added)) + self.info(2, " Running SVN add/rm commands"); + if len(dirs_added): + self.run_svn(["add"], dirs_added) + if len(files_added): + self.run_svn(["add"], files_added) + if len(dirs_removed): + self.run_svn(["rm"], dirs_removed) + if len(files_removed): + self.run_svn(["rm"], files_removed) + # Save the diff for the copied/moved items + diff_save = self.config.get('save-diff-copied') + if diff_save is not None: + self.info(2, " Saving 'svn diff' on copied files/dirs to `%s'" % + diff_save) + to_be_diffed = list(filter(lambda x: os.path.dirname(x) not in + to_be_diffed, to_be_diffed)) + if len(to_be_diffed): + try: + rv, out = self.run_svn(["diff"], to_be_diffed) + except UnicodeDecodeError: + # Some binary files not marked with appropriate MIME type, + # or broken text files + rv, out = (True, "WARNING: diff contained binary files\n") + else: + rv, out = (True, "") + if rv: + f = open(diff_save, "w") + f.write(out) + f.close() + # Exiting, as the resulting working copy can no longer be used + # for move analysis + self.info(1, "Done. Exiting; please examine the working copy " + + "and commit.") + return True + + def do_similarity(self, arg): + ''' + similarity SRD DST : estimate whether SRC could be potential source + for DST (0=no match, 1000=perfect match) + ''' + src, dst = self.parse_args(arg, 2, "similarity") + sim = self.similar(src, dst) + if sim is not None: + print("Similarity between source `%s' and destination `%s': %4d" % + (src, dst, sim)) + + def do_set(self, arg): + ''' + set : display current settings + set CFG VAL : set a config variable + ''' + if arg.strip() == '': + self.config.print() + else: + cfg, val = self.parse_args(arg, 2, "set") + self.config.set(cfg, val) + + def do_move(self, arg): + ''' + move SRC DST : Perform a move from source to destination + ''' + src, dst = self.parse_args(arg, 2, "move") + self.copy_or_move("mv", src, dst) + + def do_copy(self, arg): + ''' + copy SRC DST : Perform a copy from source to destination + ''' + src, dst = self.parse_args(arg, 2, "copy") + self.copy_or_move("cp", src, dst) + + def do_remove(self, arg): + ''' + remove PATH : Remove a path + ''' + path = self.parse_args(arg, 1, "remove")[0] + self.copy_or_move("rm", path) + + def do_lsprep(self, arg): + ''' + lsprep : List the currently recorded moves/copies/removals + ''' + self.parse_args(arg, 0, "lsprep") + colsz = int((self.termwidth - 14) / 2) + if len(self.prepare_ops): + print("Currently recorded preparatory operations:") + print() + print("%5s %s %-*s %-*s" % + ("#", "Op", colsz, "Source", colsz, "Destination")) + for id, o in enumerate(self.prepare_ops): + if id % 10 == 0: + print("%5s %s %*s %*s" % + ("-"*5, "--", colsz, "-"*colsz, colsz, "-"*colsz)) + if len(o) == 3: + print("%5d %s %-*s %-*s" % + (id, o[0], colsz, o[1], colsz, o[2])) + else: + print("%5d %s %-*s" % (id, o[0], colsz, o[1])) + print() + else: + print("No copies/moves/removals recorded") + print() + + def do_save(self, arg): + ''' + save FILENAME : Save current preparation operations to a file + ''' + fn = self.parse_args(arg, 1, "save")[0] + f = open(fn, 'w') + longestname = 0 + for o in self.prepare_ops: + if len(o[1]) > longestname: + longestname = len(o[1]) + if len(o) == 3 and len(o[2]) > longestname: + longestname = len(o[2]) + for o in self.prepare_ops: + if len(o) == 2: + f.write("svn %s %-*s\n" % + (o[0], longestname, shlex.quote(o[1]))) + else: + f.write("svn %s %-*s %-*s\n" % + (o[0], longestname, shlex.quote(o[1]), + longestname, shlex.quote(o[2]))) + pass + f.close() + + def do_load(self, arg): + ''' + load FILENAME : Load/append preparation operations from a file + ''' + fn = self.parse_args(arg, 1, "load")[0] + self.info(1, "Performing operations from `%s'" % fn) + f = open(fn, 'r') + for l in f.readlines(): + if l[0] == '#': + continue + args = shlex.split(l) + try: + if len(args) < 2 or args[0] != 'svn': + raise InvalidUsageException(None, "") + self.handle_op(args[1:]) + except InvalidUsageException as e: + # Rethrow + raise InvalidUsageException(None, + "Invalid line in file: %s(%s)" % (l, e)) + f.close() + + def do_svninfo(self, arg): + ''' + svninfo : Display SVN info on the working copy (debug) + ''' + self.parse_args(arg, 0, "svninfo") + print(str(self.svninfo)) + + def do_printlst(self, arg): + ''' + printlst WHAT : Print list of files; WHAT is one of {dir,file} (debug) + ''' + self.parse_args(arg, 0, "printlst") + self.items.print() + + def do_help(self, arg): + ''' + help [COMMAND] : Print the help message + ''' + cmd.Cmd.do_help(self, arg) + + def do_EOF(self, arg): + ''' + Quit the script + ''' + return True + + def do_quit(self, arg): + ''' + quit : Quit the script + ''' + return True + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Prepare a working copy for SVN vendor import.") + parser.add_argument('wcdir', + help="Path to working copy (destination of import)") + parser.add_argument('importdir', + help="Path to imported sources (source of import)") + grp = parser.add_mutually_exclusive_group() + grp.add_argument('--auto', action='store_true', + help="Automatic mode: detect moves, apply them and copy sources") + grp.add_argument('--detect', metavar='FILE', + help="Semi-automatic mode: detect moves and save them to FILE") + grp.add_argument('--apply', metavar='FILE', + help="Semi-automatic mode: apply the moves from FILE " + + "and copy the sources") + parser.add_argument('--save', metavar='FILE', + help="Automatic mode: save moves to FILE after detection, " + + "then proceed to apply the changes") + parser.add_argument('--config', metavar=('OPT','VALUE'), action='append', + nargs=2, help="Set configuration option OPT to VALUE") + args = parser.parse_args() + p = subprocess.Popen(args=['svn', 'info', args.wcdir], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + so, se = p.communicate() + if p.returncode != 0: + print("%s: does not appear to be SVN working copy." % args.wcdir) + print("`svn info' exited with status %d and returned:" % p.returncode) + print() + print(se.decode()) + sys.exit(1) + imp = SvnVndImport(args.wcdir, args.importdir, so.decode()) + if args.config: + try: + for o, v in args.config: + imp.config.set(o, v) + except InvalidUsageException as e: + parser.error(e) + imp.scan() + if args.auto: + imp.onecmd("detect") + if args.save: + imp.onecmd("save " + shlex.quote(args.save)) + imp.onecmd("apply") + elif args.detect: + imp.onecmd("detect") + imp.onecmd("save " + shlex.quote(args.detect)) + elif args.apply: + imp.onecmd("load " + shlex.quote(args.apply)) + imp.onecmd("apply") + else: + imp.cmdloop() |