summaryrefslogtreecommitdiff
path: root/sphinx/versioning.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/versioning.py')
-rw-r--r--sphinx/versioning.py128
1 files changed, 128 insertions, 0 deletions
diff --git a/sphinx/versioning.py b/sphinx/versioning.py
new file mode 100644
index 000000000..f50f80b0d
--- /dev/null
+++ b/sphinx/versioning.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+"""
+ sphinx.versioning
+ ~~~~~~~~~~~~~~~~~
+
+ Implements the low-level algorithms Sphinx uses for the versioning of
+ doctrees.
+
+ :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+from uuid import uuid4
+from operator import itemgetter
+from collections import defaultdict
+
+from sphinx.util.pycompat import product, zip_longest
+
+
+# anything below that ratio is considered equal/changed
+VERSIONING_RATIO = 65
+
+
+def add_uids(doctree, condition):
+ """Add a unique id to every node in the `doctree` which matches the
+ condition and yield the nodes.
+
+ :param doctree:
+ A :class:`docutils.nodes.document` instance.
+
+ :param condition:
+ A callable which returns either ``True`` or ``False`` for a given node.
+ """
+ for node in doctree.traverse(condition):
+ node.uid = uuid4().hex
+ yield node
+
+
+def merge_doctrees(old, new, condition):
+ """Merge the `old` doctree with the `new` one while looking at nodes
+ matching the `condition`.
+
+ Each node which replaces another one or has been added to the `new` doctree
+ will be yielded.
+
+ :param condition:
+ A callable which returns either ``True`` or ``False`` for a given node.
+ """
+ old_iter = old.traverse(condition)
+ new_iter = new.traverse(condition)
+ old_nodes = []
+ new_nodes = []
+ ratios = defaultdict(list)
+ seen = set()
+ # compare the nodes each doctree in order
+ for old_node, new_node in zip_longest(old_iter, new_iter):
+ if old_node is None:
+ new_nodes.append(new_node)
+ continue
+ if new_node is None:
+ old_nodes.append(old_node)
+ continue
+ ratio = get_ratio(old_node.rawsource, new_node.rawsource)
+ if ratio == 0:
+ new_node.uid = old_node.uid
+ seen.add(new_node)
+ else:
+ ratios[old_node, new_node] = ratio
+ old_nodes.append(old_node)
+ new_nodes.append(new_node)
+ # calculate the ratios for each unequal pair of nodes, should we stumble
+ # on a pair which is equal we set the uid and add it to the seen ones
+ for old_node, new_node in product(old_nodes, new_nodes):
+ if new_node in seen or (old_node, new_node) in ratios:
+ continue
+ ratio = get_ratio(old_node.rawsource, new_node.rawsource)
+ if ratio == 0:
+ new_node.uid = old_node.uid
+ seen.add(new_node)
+ else:
+ ratios[old_node, new_node] = ratio
+ # choose the old node with the best ratio for each new node and set the uid
+ # as long as the ratio is under a certain value, in which case we consider
+ # them not changed but different
+ ratios = sorted(ratios.iteritems(), key=itemgetter(1))
+ for (old_node, new_node), ratio in ratios:
+ if new_node in seen:
+ continue
+ else:
+ seen.add(new_node)
+ if ratio < VERSIONING_RATIO:
+ new_node.uid = old_node.uid
+ else:
+ new_node.uid = uuid4().hex
+ yield new_node
+ # create new uuids for any new node we left out earlier, this happens
+ # if one or more nodes are simply added.
+ for new_node in set(new_nodes) - seen:
+ new_node.uid = uuid4().hex
+ yield new_node
+
+
+def get_ratio(old, new):
+ """Return a "similiarity ratio" (in percent) representing the similarity
+ between the two strings where 0 is equal and anything above less than equal.
+ """
+ if not all([old, new]):
+ return VERSIONING_RATIO
+ return levenshtein_distance(old, new) / (len(old) / 100.0)
+
+
+def levenshtein_distance(a, b):
+ """Return the Levenshtein edit distance between two strings *a* and *b*."""
+ if a == b:
+ return 0
+ if len(a) < len(b):
+ a, b = b, a
+ if not a:
+ return len(b)
+ previous_row = xrange(len(b) + 1)
+ for i, column1 in enumerate(a):
+ current_row = [i + 1]
+ for j, column2 in enumerate(b):
+ insertions = previous_row[j + 1] + 1
+ deletions = current_row[j] + 1
+ substitutions = previous_row[j] + (column1 != column2)
+ current_row.append(min(insertions, deletions, substitutions))
+ previous_row = current_row
+ return previous_row[-1]