diff options
Diffstat (limited to 'sphinx/versioning.py')
-rw-r--r-- | sphinx/versioning.py | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/sphinx/versioning.py b/sphinx/versioning.py new file mode 100644 index 000000000..d0ea18a77 --- /dev/null +++ b/sphinx/versioning.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +""" + sphinx.versioning + ~~~~~~~~~~~~~~~~~ + + Implements the low-level algorithms Sphinx uses for the versioning of + doctrees. + + :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" +from uuid import uuid4 +from itertools import product +try: + from itertools import izip_longest as zip_longest +except ImportError: + from itertools import zip_longest +from difflib import SequenceMatcher + +from sphinx.util import PeekableIterator + +def add_uids(doctree, condition): + """ + Adds a unique id to every node in the `doctree` which matches the condition + and yields it. + + :param doctree: + A :class:`docutils.nodes.document` instance. + + :param condition: + A callable which returns either ``True`` or ``False`` for a given node. + """ + for node in doctree.traverse(condition): + node.uid = uuid4().hex + yield node + +def merge_node(old, new): + """ + Merges the `old` node with the `new` one, if it's successful the `new` node + get's the unique identifier of the `new` one and ``True`` is returned. If + the merge is unsuccesful ``False`` is returned. + """ + equals, changed, replaced = make_diff(old.rawsource, + new.rawsource) + if equals or changed: + new.uid = old.uid + return True + return False + +def merge_doctrees(old, new, condition): + """ + Merges the `old` doctree with the `new` one while looking at nodes matching + the `condition`. + + Each node which replaces another one or has been added to the `new` doctree + will be yielded. + + :param condition: + A callable which returns either ``True`` or ``False`` for a given node. + """ + old_iter = PeekableIterator(old.traverse(condition)) + new_iter = PeekableIterator(new.traverse(condition)) + old_nodes = [] + new_nodes = [] + for old_node, new_node in zip_longest(old_iter, new_iter): + if old_node is None: + new_nodes.append(new_node) + continue + if new_node is None: + old_nodes.append(old_node) + continue + if not merge_node(old_node, new_node): + if old_nodes: + for i, very_old_node in enumerate(old_nodes): + if merge_node(very_old_node, new_node): + del old_nodes[i] + # If the last identified node which has not matched the + # unidentified node matches the current one, we have to + # assume that the last unidentified one has been + # inserted. + # + # As the required time multiplies with each insert, we + # want to avoid that by checking if the next + # unidentified node matches the current identified one + # and if so we make a shift. + if i == len(old_nodes): + next_new_node = new_iter.next() + if not merge_node(old_node, next_new_node): + new_iter.push(next_new_node) + break + else: + old_nodes.append(old_node) + new_nodes.append(new_node) + for (i, new_node), (j, old_node) in product(enumerate(new_nodes), + enumerate(old_nodes)): + if merge_node(old_node, new_node): + del new_nodes[i] + del old_nodes[j] + for node in new_nodes: + node.uid = uuid4().hex + # Yielding the new nodes here makes it possible to use this generator + # like add_uids + yield node + +def make_diff(old, new): + """ + Takes two strings `old` and `new` and returns a :class:`tuple` of boolean + values ``(equals, changed, replaced)``. + + equals + + ``True`` if the `old` string and the `new` one are equal. + + changed + + ``True`` if the `new` string is a changed version of the `old` one. + + replaced + + ``True`` if the `new` string and the `old` string are totally + different. + + .. note:: This assumes the two strings are human readable text or at least + something very similar to that, otherwise it can not detect if + the string has been changed or replaced. In any case the + detection should not be considered reliable. + """ + if old == new: + return True, False, False + if new in old or levenshtein_distance(old, new) / (len(old) / 100.0) < 70: + return False, True, False + return False, False, True + +def levenshtein_distance(a, b): + if len(a) < len(b): + a, b = b, a + if not a: + return len(b) + previous_row = xrange(len(b) + 1) + for i, column1 in enumerate(a): + current_row = [i + 1] + for j, column2 in enumerate(b): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (column1 != column2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + return previous_row[-1] |