summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy/util/topological.py
blob: 5f516d67e4e4f35c6ad3ba1b505f5278bf7f560b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# util/topological.py
# Copyright (C) 2005-2018 the SQLAlchemy authors and contributors
# <see AUTHORS file>
#
# This module is part of SQLAlchemy and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

"""Topological sorting algorithms."""

from ..exc import CircularDependencyError
from .. import util

__all__ = ['sort', 'sort_as_subsets', 'find_cycles']


def sort_as_subsets(tuples, allitems, deterministic_order=False):

    edges = util.defaultdict(set)
    for parent, child in tuples:
        edges[child].add(parent)

    Set = util.OrderedSet if deterministic_order else set

    todo = Set(allitems)

    while todo:
        output = Set()
        for node in todo:
            if todo.isdisjoint(edges[node]):
                output.add(node)

        if not output:
            raise CircularDependencyError(
                "Circular dependency detected.",
                find_cycles(tuples, allitems),
                _gen_edges(edges)
            )

        todo.difference_update(output)
        yield output


def sort(tuples, allitems, deterministic_order=False):
    """sort the given list of items by dependency.

    'tuples' is a list of tuples representing a partial ordering.
    'deterministic_order' keeps items within a dependency tier in list order.
    """

    for set_ in sort_as_subsets(tuples, allitems, deterministic_order):
        for s in set_:
            yield s


def find_cycles(tuples, allitems):
    # adapted from:
    # http://neopythonic.blogspot.com/2009/01/detecting-cycles-in-directed-graph.html

    edges = util.defaultdict(set)
    for parent, child in tuples:
        edges[parent].add(child)
    nodes_to_test = set(edges)

    output = set()

    # we'd like to find all nodes that are
    # involved in cycles, so we do the full
    # pass through the whole thing for each
    # node in the original list.

    # we can go just through parent edge nodes.
    # if a node is only a child and never a parent,
    # by definition it can't be part of a cycle.  same
    # if it's not in the edges at all.
    for node in nodes_to_test:
        stack = [node]
        todo = nodes_to_test.difference(stack)
        while stack:
            top = stack[-1]
            for node in edges[top]:
                if node in stack:
                    cyc = stack[stack.index(node):]
                    todo.difference_update(cyc)
                    output.update(cyc)

                if node in todo:
                    stack.append(node)
                    todo.remove(node)
                    break
            else:
                node = stack.pop()
    return output


def _gen_edges(edges):
    return set([
        (right, left)
        for left in edges
        for right in edges[left]
    ])