test/test_graph/test_diff.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240

from dataclasses import dataclass, field
from test.utils import (
    COLLAPSED_BNODE,
    BNodeHandling,
    GHQuad,
    GHTriple,
    GraphHelper,
    MarksType,
    MarkType,
)
from typing import TYPE_CHECKING, Collection, Set, Tuple, Type, Union, cast

import pytest
from _pytest.mark.structures import ParameterSet

import rdflib
from rdflib import Graph
from rdflib.compare import graph_diff
from rdflib.graph import ConjunctiveGraph, Dataset
from rdflib.namespace import FOAF, RDF, Namespace
from rdflib.term import BNode, Literal

if TYPE_CHECKING:
    from rdflib.graph import _TripleType

"""Test for graph_diff - much more extensive testing
would certainly be possible"""

_TripleSetType = Set["_TripleType"]


class TestDiff:
    """Unicode literals for graph_diff test
    (issue 151)"""

    def test_a(self):
        """with bnode"""
        g = rdflib.Graph()
        g.add((rdflib.BNode(), rdflib.URIRef("urn:p"), rdflib.Literal("\xe9")))

        graph_diff(g, g)

    def test_b(self):
        """Curiously, this one passes, even before the fix in issue 151"""

        g = rdflib.Graph()
        g.add((rdflib.URIRef("urn:a"), rdflib.URIRef("urn:p"), rdflib.Literal("\xe9")))

        graph_diff(g, g)

    @pytest.mark.xfail()
    def test_subsets(self) -> None:
        """
        This test verifies that `graph_diff` returns the correct values
        for two graphs, `g0` and `g1` where the triples in `g0` is a
        subset of the triples in `g1`.

        The expectation is that graph_diff reports that there are no
        triples only in `g0`, and that there are triples that occur in both
        `g0` and `g1`, and that there are triples only in `g1`.
        """
        g0_ts: _TripleSetType = set()
        bnode = BNode()
        g0_ts.update(
            {
                (bnode, FOAF.name, Literal("Golan Trevize")),
                (bnode, RDF.type, FOAF.Person),
            }
        )
        g0 = Graph()
        g0 += g0_ts

        g1_ts: _TripleSetType = set()
        bnode = BNode()
        g1_ts.update(
            {
                *g0_ts,
                (bnode, FOAF.name, Literal("Janov Pelorat")),
                (bnode, RDF.type, FOAF.Person),
            }
        )
        g1 = Graph()
        g1 += g1_ts

        result = graph_diff(g0, g1)
        in_both, in_first, in_second = GraphHelper.triple_sets(result)
        assert in_first == set()
        assert len(in_second) > 0
        assert len(in_both) > 0


_ElementSetType = Union[Collection[GHTriple], Collection[GHQuad]]

_ElementSetTypeOrStr = Union[_ElementSetType, str]


@dataclass
class GraphDiffCase:
    graph_type: Type[Graph]
    format: str
    lhs: str
    rhs: str
    expected_result: Tuple[
        _ElementSetTypeOrStr, _ElementSetTypeOrStr, _ElementSetTypeOrStr
    ]
    marks: MarkType = field(default_factory=lambda: cast(MarksType, list()))

    def as_element_set(self, value: _ElementSetTypeOrStr) -> _ElementSetType:
        if isinstance(value, str):
            graph = self.graph_type()
            graph.parse(data=value, format=self.format)
            if isinstance(graph, ConjunctiveGraph):
                return GraphHelper.quad_set(graph, BNodeHandling.COLLAPSE)
            else:
                return GraphHelper.triple_set(graph, BNodeHandling.COLLAPSE)
        return value

    def expected_in_both_set(self) -> _ElementSetType:
        return self.as_element_set(self.expected_result[0])

    def expected_in_lhs_set(self) -> _ElementSetType:
        return self.as_element_set(self.expected_result[1])

    def expected_in_rhs_set(self) -> _ElementSetType:
        return self.as_element_set(self.expected_result[2])

    def as_params(self) -> ParameterSet:
        return pytest.param(self, marks=self.marks)


EGSCHEME = Namespace("example:")


@pytest.mark.parametrize(
    "test_case",
    [
        GraphDiffCase(
            Graph,
            format="turtle",
            lhs="""
            @prefix eg: <example:> .
            _:a _:b _:c .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
            rhs="""
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
            expected_result=(
                """
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
                {(COLLAPSED_BNODE, COLLAPSED_BNODE, COLLAPSED_BNODE)},
                "",
            ),
        ),
        GraphDiffCase(
            Graph,
            format="turtle",
            lhs="""
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
            rhs="""
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
            expected_result=(
                """
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
                "",
                "",
            ),
        ),
        GraphDiffCase(
            Dataset,
            format="trig",
            lhs="""
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
            rhs="""
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
            expected_result=(
                """
            @prefix eg: <example:> .
            eg:o0 eg:p0 eg:s0 .
            eg:o1 eg:p1 eg:s1 .
            """,
                "",
                "",
            ),
            marks=pytest.mark.xfail(
                reason="quads are not supported", raises=ValueError
            ),
        ).as_params(),
    ],
)
def test_assert_sets_equal(test_case: GraphDiffCase):
    """
    GraphHelper.sets_equals and related functions work correctly in both
    positive and negative cases.
    """
    lhs_graph: Graph = test_case.graph_type()
    lhs_graph.parse(data=test_case.lhs, format=test_case.format)

    rhs_graph: Graph = test_case.graph_type()
    rhs_graph.parse(data=test_case.rhs, format=test_case.format)

    in_both, in_lhs, in_rhs = graph_diff(lhs_graph, rhs_graph)
    in_both_set = GraphHelper.triple_or_quad_set(in_both, BNodeHandling.COLLAPSE)
    in_lhs_set = GraphHelper.triple_or_quad_set(in_lhs, BNodeHandling.COLLAPSE)
    in_rhs_set = GraphHelper.triple_or_quad_set(in_rhs, BNodeHandling.COLLAPSE)

    assert test_case.expected_in_both_set() == in_both_set
    assert test_case.expected_in_lhs_set() == in_lhs_set
    assert test_case.expected_in_rhs_set() == in_rhs_set

    # Diff should be symetric
    in_rboth, in_rlhs, in_rrhs = graph_diff(rhs_graph, lhs_graph)
    in_rboth_set = GraphHelper.triple_or_quad_set(in_rboth, BNodeHandling.COLLAPSE)
    in_rlhs_set = GraphHelper.triple_or_quad_set(in_rlhs, BNodeHandling.COLLAPSE)
    in_rrhs_set = GraphHelper.triple_or_quad_set(in_rrhs, BNodeHandling.COLLAPSE)

    assert test_case.expected_in_both_set() == in_rboth_set
    assert test_case.expected_in_rhs_set() == in_rlhs_set
    assert test_case.expected_in_lhs_set() == in_rrhs_set