1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
|
""" This module provides the functions for node classification problem.
The functions in this module are not imported
into the top level `networkx` namespace.
You can access these functions by importing
the `networkx.algorithms.node_classification` modules,
then accessing the functions as attributes of `node_classification`.
For example:
>>> from networkx.algorithms import node_classification
>>> G = nx.path_graph(4)
>>> G.edges()
EdgeView([(0, 1), (1, 2), (2, 3)])
>>> G.nodes[0]["label"] = "A"
>>> G.nodes[3]["label"] = "B"
>>> node_classification.harmonic_function(G)
['A', 'A', 'B', 'B']
References
----------
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
Semi-supervised learning using gaussian fields and harmonic functions.
In ICML (Vol. 3, pp. 912-919).
"""
import networkx as nx
__all__ = ["harmonic_function", "local_and_global_consistency"]
@nx.utils.not_implemented_for("directed")
def harmonic_function(G, max_iter=30, label_name="label"):
"""Node classification by Harmonic function
Function for computing Harmonic function algorithm by Zhu et al.
Parameters
----------
G : NetworkX Graph
max_iter : int
maximum number of iterations allowed
label_name : string
name of target labels to predict
Returns
-------
predicted : list
List of length ``len(G)`` with the predicted labels for each node.
Raises
------
NetworkXError
If no nodes in `G` have attribute `label_name`.
Examples
--------
>>> from networkx.algorithms import node_classification
>>> G = nx.path_graph(4)
>>> G.nodes[0]["label"] = "A"
>>> G.nodes[3]["label"] = "B"
>>> G.nodes(data=True)
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
>>> G.edges()
EdgeView([(0, 1), (1, 2), (2, 3)])
>>> predicted = node_classification.harmonic_function(G)
>>> predicted
['A', 'A', 'B', 'B']
References
----------
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
Semi-supervised learning using gaussian fields and harmonic functions.
In ICML (Vol. 3, pp. 912-919).
"""
import numpy as np
import scipy as sp
import scipy.sparse # call as sp.sparse
X = nx.to_scipy_sparse_array(G) # adjacency matrix
labels, label_dict = _get_label_info(G, label_name)
if labels.shape[0] == 0:
raise nx.NetworkXError(
f"No node on the input graph is labeled by '{label_name}'."
)
n_samples = X.shape[0]
n_classes = label_dict.shape[0]
F = np.zeros((n_samples, n_classes))
# Build propagation matrix
degrees = X.sum(axis=0)
degrees[degrees == 0] = 1 # Avoid division by 0
# TODO: csr_array
D = sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0))
P = (D @ X).tolil()
P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes
# Build base matrix
B = np.zeros((n_samples, n_classes))
B[labels[:, 0], labels[:, 1]] = 1
for _ in range(max_iter):
F = (P @ F) + B
return label_dict[np.argmax(F, axis=1)].tolist()
@nx.utils.not_implemented_for("directed")
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
"""Node classification by Local and Global Consistency
Function for computing Local and global consistency algorithm by Zhou et al.
Parameters
----------
G : NetworkX Graph
alpha : float
Clamping factor
max_iter : int
Maximum number of iterations allowed
label_name : string
Name of target labels to predict
Returns
-------
predicted : list
List of length ``len(G)`` with the predicted labels for each node.
Raises
------
NetworkXError
If no nodes in `G` have attribute `label_name`.
Examples
--------
>>> from networkx.algorithms import node_classification
>>> G = nx.path_graph(4)
>>> G.nodes[0]["label"] = "A"
>>> G.nodes[3]["label"] = "B"
>>> G.nodes(data=True)
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
>>> G.edges()
EdgeView([(0, 1), (1, 2), (2, 3)])
>>> predicted = node_classification.local_and_global_consistency(G)
>>> predicted
['A', 'A', 'B', 'B']
References
----------
Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
Learning with local and global consistency.
Advances in neural information processing systems, 16(16), 321-328.
"""
import numpy as np
import scipy as sp
import scipy.sparse # call as sp.sparse
X = nx.to_scipy_sparse_array(G) # adjacency matrix
labels, label_dict = _get_label_info(G, label_name)
if labels.shape[0] == 0:
raise nx.NetworkXError(
f"No node on the input graph is labeled by '{label_name}'."
)
n_samples = X.shape[0]
n_classes = label_dict.shape[0]
F = np.zeros((n_samples, n_classes))
# Build propagation matrix
degrees = X.sum(axis=0)
degrees[degrees == 0] = 1 # Avoid division by 0
# TODO: csr_array
D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)))
P = alpha * ((D2 @ X) @ D2)
# Build base matrix
B = np.zeros((n_samples, n_classes))
B[labels[:, 0], labels[:, 1]] = 1 - alpha
for _ in range(max_iter):
F = (P @ F) + B
return label_dict[np.argmax(F, axis=1)].tolist()
def _get_label_info(G, label_name):
"""Get and return information of labels from the input graph
Parameters
----------
G : Network X graph
label_name : string
Name of the target label
Returns
----------
labels : numpy array, shape = [n_labeled_samples, 2]
Array of pairs of labeled node ID and label ID
label_dict : numpy array, shape = [n_classes]
Array of labels
i-th element contains the label corresponding label ID `i`
"""
import numpy as np
labels = []
label_to_id = {}
lid = 0
for i, n in enumerate(G.nodes(data=True)):
if label_name in n[1]:
label = n[1][label_name]
if label not in label_to_id:
label_to_id[label] = lid
lid += 1
labels.append([i, label_to_id[label]])
labels = np.array(labels)
label_dict = np.array(
[label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])]
)
return (labels, label_dict)
|