diff options
Diffstat (limited to 'sphinx/versioning.py')
| -rw-r--r-- | sphinx/versioning.py | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/sphinx/versioning.py b/sphinx/versioning.py new file mode 100644 index 000000000..f50f80b0d --- /dev/null +++ b/sphinx/versioning.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +""" + sphinx.versioning + ~~~~~~~~~~~~~~~~~ + + Implements the low-level algorithms Sphinx uses for the versioning of + doctrees. + + :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +from uuid import uuid4 +from operator import itemgetter +from collections import defaultdict + +from sphinx.util.pycompat import product, zip_longest + + +# anything below that ratio is considered equal/changed +VERSIONING_RATIO = 65 + + +def add_uids(doctree, condition): + """Add a unique id to every node in the `doctree` which matches the + condition and yield the nodes. + + :param doctree: + A :class:`docutils.nodes.document` instance. + + :param condition: + A callable which returns either ``True`` or ``False`` for a given node. + """ + for node in doctree.traverse(condition): + node.uid = uuid4().hex + yield node + + +def merge_doctrees(old, new, condition): + """Merge the `old` doctree with the `new` one while looking at nodes + matching the `condition`. + + Each node which replaces another one or has been added to the `new` doctree + will be yielded. + + :param condition: + A callable which returns either ``True`` or ``False`` for a given node. + """ + old_iter = old.traverse(condition) + new_iter = new.traverse(condition) + old_nodes = [] + new_nodes = [] + ratios = defaultdict(list) + seen = set() + # compare the nodes each doctree in order + for old_node, new_node in zip_longest(old_iter, new_iter): + if old_node is None: + new_nodes.append(new_node) + continue + if new_node is None: + old_nodes.append(old_node) + continue + ratio = get_ratio(old_node.rawsource, new_node.rawsource) + if ratio == 0: + new_node.uid = old_node.uid + seen.add(new_node) + else: + ratios[old_node, new_node] = ratio + old_nodes.append(old_node) + new_nodes.append(new_node) + # calculate the ratios for each unequal pair of nodes, should we stumble + # on a pair which is equal we set the uid and add it to the seen ones + for old_node, new_node in product(old_nodes, new_nodes): + if new_node in seen or (old_node, new_node) in ratios: + continue + ratio = get_ratio(old_node.rawsource, new_node.rawsource) + if ratio == 0: + new_node.uid = old_node.uid + seen.add(new_node) + else: + ratios[old_node, new_node] = ratio + # choose the old node with the best ratio for each new node and set the uid + # as long as the ratio is under a certain value, in which case we consider + # them not changed but different + ratios = sorted(ratios.iteritems(), key=itemgetter(1)) + for (old_node, new_node), ratio in ratios: + if new_node in seen: + continue + else: + seen.add(new_node) + if ratio < VERSIONING_RATIO: + new_node.uid = old_node.uid + else: + new_node.uid = uuid4().hex + yield new_node + # create new uuids for any new node we left out earlier, this happens + # if one or more nodes are simply added. + for new_node in set(new_nodes) - seen: + new_node.uid = uuid4().hex + yield new_node + + +def get_ratio(old, new): + """Return a "similiarity ratio" (in percent) representing the similarity + between the two strings where 0 is equal and anything above less than equal. + """ + if not all([old, new]): + return VERSIONING_RATIO + return levenshtein_distance(old, new) / (len(old) / 100.0) + + +def levenshtein_distance(a, b): + """Return the Levenshtein edit distance between two strings *a* and *b*.""" + if a == b: + return 0 + if len(a) < len(b): + a, b = b, a + if not a: + return len(b) + previous_row = xrange(len(b) + 1) + for i, column1 in enumerate(a): + current_row = [i + 1] + for j, column2 in enumerate(b): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (column1 != column2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + return previous_row[-1] |
