|
1
|
"""Tests for video_processor.utils.visualization module.""" |
|
2
|
|
|
3
|
import pytest |
|
4
|
|
|
5
|
nx = pytest.importorskip("networkx", reason="networkx not installed") |
|
6
|
|
|
7
|
from video_processor.utils.visualization import ( # noqa: E402 |
|
8
|
compute_graph_stats, |
|
9
|
filter_graph, |
|
10
|
generate_mermaid, |
|
11
|
graph_to_d3_json, |
|
12
|
graph_to_dot, |
|
13
|
graph_to_networkx, |
|
14
|
) |
|
15
|
|
|
16
|
|
|
17
|
@pytest.fixture |
|
18
|
def sample_kg_data(): |
|
19
|
"""Mock knowledge graph data matching to_dict() format.""" |
|
20
|
return { |
|
21
|
"nodes": [ |
|
22
|
{ |
|
23
|
"id": "Alice", |
|
24
|
"name": "Alice", |
|
25
|
"type": "person", |
|
26
|
"descriptions": ["Project lead"], |
|
27
|
"occurrences": [{"source": "transcript_batch_0", "timestamp": 0.0}], |
|
28
|
}, |
|
29
|
{ |
|
30
|
"id": "Bob", |
|
31
|
"name": "Bob", |
|
32
|
"type": "person", |
|
33
|
"descriptions": ["Developer"], |
|
34
|
"occurrences": [], |
|
35
|
}, |
|
36
|
{ |
|
37
|
"id": "Python", |
|
38
|
"name": "Python", |
|
39
|
"type": "technology", |
|
40
|
"descriptions": ["Programming language"], |
|
41
|
"occurrences": [], |
|
42
|
}, |
|
43
|
{ |
|
44
|
"id": "Acme Corp", |
|
45
|
"name": "Acme Corp", |
|
46
|
"type": "organization", |
|
47
|
"descriptions": ["The company"], |
|
48
|
"occurrences": [], |
|
49
|
}, |
|
50
|
{ |
|
51
|
"id": "Microservices", |
|
52
|
"name": "Microservices", |
|
53
|
"type": "concept", |
|
54
|
"descriptions": ["Architecture pattern"], |
|
55
|
"occurrences": [], |
|
56
|
}, |
|
57
|
], |
|
58
|
"relationships": [ |
|
59
|
{ |
|
60
|
"source": "Alice", |
|
61
|
"target": "Python", |
|
62
|
"type": "uses", |
|
63
|
"content_source": "transcript_batch_0", |
|
64
|
"timestamp": 1.5, |
|
65
|
}, |
|
66
|
{ |
|
67
|
"source": "Bob", |
|
68
|
"target": "Python", |
|
69
|
"type": "uses", |
|
70
|
"content_source": "transcript_batch_0", |
|
71
|
"timestamp": 2.0, |
|
72
|
}, |
|
73
|
{ |
|
74
|
"source": "Alice", |
|
75
|
"target": "Bob", |
|
76
|
"type": "works_with", |
|
77
|
"content_source": "transcript_batch_0", |
|
78
|
"timestamp": 3.0, |
|
79
|
}, |
|
80
|
{ |
|
81
|
"source": "Alice", |
|
82
|
"target": "Acme Corp", |
|
83
|
"type": "employed_by", |
|
84
|
"content_source": "transcript_batch_1", |
|
85
|
"timestamp": 10.0, |
|
86
|
}, |
|
87
|
{ |
|
88
|
"source": "Acme Corp", |
|
89
|
"target": "Microservices", |
|
90
|
"type": "adopts", |
|
91
|
"content_source": "transcript_batch_1", |
|
92
|
"timestamp": 12.0, |
|
93
|
}, |
|
94
|
], |
|
95
|
} |
|
96
|
|
|
97
|
|
|
98
|
@pytest.fixture |
|
99
|
def sample_graph(sample_kg_data): |
|
100
|
"""Pre-built NetworkX graph from sample data.""" |
|
101
|
return graph_to_networkx(sample_kg_data) |
|
102
|
|
|
103
|
|
|
104
|
class TestGraphToNetworkx: |
|
105
|
def test_node_count(self, sample_graph): |
|
106
|
assert sample_graph.number_of_nodes() == 5 |
|
107
|
|
|
108
|
def test_edge_count(self, sample_graph): |
|
109
|
assert sample_graph.number_of_edges() == 5 |
|
110
|
|
|
111
|
def test_node_attributes(self, sample_graph): |
|
112
|
alice = sample_graph.nodes["Alice"] |
|
113
|
assert alice["type"] == "person" |
|
114
|
assert alice["descriptions"] == ["Project lead"] |
|
115
|
|
|
116
|
def test_edge_attributes(self, sample_graph): |
|
117
|
edge = sample_graph.edges["Alice", "Python"] |
|
118
|
assert edge["type"] == "uses" |
|
119
|
assert edge["content_source"] == "transcript_batch_0" |
|
120
|
assert edge["timestamp"] == 1.5 |
|
121
|
|
|
122
|
def test_empty_data(self): |
|
123
|
G = graph_to_networkx({}) |
|
124
|
assert G.number_of_nodes() == 0 |
|
125
|
assert G.number_of_edges() == 0 |
|
126
|
|
|
127
|
def test_nodes_only(self): |
|
128
|
data = {"nodes": [{"name": "X", "type": "concept"}]} |
|
129
|
G = graph_to_networkx(data) |
|
130
|
assert G.number_of_nodes() == 1 |
|
131
|
assert G.number_of_edges() == 0 |
|
132
|
|
|
133
|
def test_skips_empty_names(self): |
|
134
|
data = {"nodes": [{"name": "", "type": "concept"}, {"name": "A"}]} |
|
135
|
G = graph_to_networkx(data) |
|
136
|
assert G.number_of_nodes() == 1 |
|
137
|
|
|
138
|
def test_skips_empty_relationship_endpoints(self): |
|
139
|
data = { |
|
140
|
"nodes": [{"name": "A"}], |
|
141
|
"relationships": [{"source": "", "target": "A", "type": "x"}], |
|
142
|
} |
|
143
|
G = graph_to_networkx(data) |
|
144
|
assert G.number_of_edges() == 0 |
|
145
|
|
|
146
|
|
|
147
|
class TestComputeGraphStats: |
|
148
|
def test_basic_counts(self, sample_graph): |
|
149
|
stats = compute_graph_stats(sample_graph) |
|
150
|
assert stats["node_count"] == 5 |
|
151
|
assert stats["edge_count"] == 5 |
|
152
|
|
|
153
|
def test_density_range(self, sample_graph): |
|
154
|
stats = compute_graph_stats(sample_graph) |
|
155
|
assert 0.0 <= stats["density"] <= 1.0 |
|
156
|
|
|
157
|
def test_connected_components(self, sample_graph): |
|
158
|
stats = compute_graph_stats(sample_graph) |
|
159
|
assert stats["connected_components"] == 1 |
|
160
|
|
|
161
|
def test_type_breakdown(self, sample_graph): |
|
162
|
stats = compute_graph_stats(sample_graph) |
|
163
|
assert stats["type_breakdown"]["person"] == 2 |
|
164
|
assert stats["type_breakdown"]["technology"] == 1 |
|
165
|
assert stats["type_breakdown"]["organization"] == 1 |
|
166
|
assert stats["type_breakdown"]["concept"] == 1 |
|
167
|
|
|
168
|
def test_top_entities(self, sample_graph): |
|
169
|
stats = compute_graph_stats(sample_graph) |
|
170
|
top = stats["top_entities"] |
|
171
|
assert len(top) <= 10 |
|
172
|
# Alice has degree 4 (3 out + 0 in? No: 3 out-edges, 0 in-edges = degree 3 undirected... |
|
173
|
# Actually in DiGraph, degree = in + out. Alice: out=3 (Python, Bob, Acme), in=0 => 3 |
|
174
|
# Python: in=2, out=0 => 2 |
|
175
|
assert top[0]["name"] == "Alice" |
|
176
|
|
|
177
|
def test_empty_graph(self): |
|
178
|
import networkx as nx |
|
179
|
|
|
180
|
G = nx.DiGraph() |
|
181
|
stats = compute_graph_stats(G) |
|
182
|
assert stats["node_count"] == 0 |
|
183
|
assert stats["connected_components"] == 0 |
|
184
|
assert stats["top_entities"] == [] |
|
185
|
|
|
186
|
|
|
187
|
class TestFilterGraph: |
|
188
|
def test_filter_by_type(self, sample_graph): |
|
189
|
filtered = filter_graph(sample_graph, entity_types=["person"]) |
|
190
|
assert filtered.number_of_nodes() == 2 |
|
191
|
for _, data in filtered.nodes(data=True): |
|
192
|
assert data["type"] == "person" |
|
193
|
|
|
194
|
def test_filter_by_min_degree(self, sample_graph): |
|
195
|
# Alice has degree 3 (3 out-edges), Python has degree 2 (2 in-edges) |
|
196
|
filtered = filter_graph(sample_graph, min_degree=3) |
|
197
|
assert "Alice" in filtered.nodes |
|
198
|
assert filtered.number_of_nodes() >= 1 |
|
199
|
|
|
200
|
def test_filter_combined(self, sample_graph): |
|
201
|
filtered = filter_graph(sample_graph, entity_types=["person"], min_degree=1) |
|
202
|
assert all(filtered.nodes[n]["type"] == "person" for n in filtered.nodes) |
|
203
|
|
|
204
|
def test_filter_no_criteria(self, sample_graph): |
|
205
|
filtered = filter_graph(sample_graph) |
|
206
|
assert filtered.number_of_nodes() == sample_graph.number_of_nodes() |
|
207
|
|
|
208
|
def test_filter_nonexistent_type(self, sample_graph): |
|
209
|
filtered = filter_graph(sample_graph, entity_types=["alien"]) |
|
210
|
assert filtered.number_of_nodes() == 0 |
|
211
|
|
|
212
|
def test_filter_preserves_edges(self, sample_graph): |
|
213
|
filtered = filter_graph(sample_graph, entity_types=["person"]) |
|
214
|
# Alice -> Bob edge should be preserved |
|
215
|
assert filtered.has_edge("Alice", "Bob") |
|
216
|
|
|
217
|
def test_filter_returns_copy(self, sample_graph): |
|
218
|
filtered = filter_graph(sample_graph, entity_types=["person"]) |
|
219
|
# Modifying filtered should not affect original |
|
220
|
filtered.add_node("NewNode") |
|
221
|
assert "NewNode" not in sample_graph |
|
222
|
|
|
223
|
|
|
224
|
class TestGenerateMermaid: |
|
225
|
def test_output_starts_with_graph(self, sample_graph): |
|
226
|
mermaid = generate_mermaid(sample_graph) |
|
227
|
assert mermaid.startswith("graph LR") |
|
228
|
|
|
229
|
def test_custom_layout(self, sample_graph): |
|
230
|
mermaid = generate_mermaid(sample_graph, layout="TD") |
|
231
|
assert mermaid.startswith("graph TD") |
|
232
|
|
|
233
|
def test_contains_nodes(self, sample_graph): |
|
234
|
mermaid = generate_mermaid(sample_graph) |
|
235
|
assert "Alice" in mermaid |
|
236
|
assert "Python" in mermaid |
|
237
|
|
|
238
|
def test_contains_edges(self, sample_graph): |
|
239
|
mermaid = generate_mermaid(sample_graph) |
|
240
|
assert "uses" in mermaid |
|
241
|
|
|
242
|
def test_contains_class_defs(self, sample_graph): |
|
243
|
mermaid = generate_mermaid(sample_graph) |
|
244
|
assert "classDef person" in mermaid |
|
245
|
assert "classDef concept" in mermaid |
|
246
|
|
|
247
|
def test_max_nodes_limit(self, sample_graph): |
|
248
|
mermaid = generate_mermaid(sample_graph, max_nodes=2) |
|
249
|
# Should only have top-2 nodes by degree |
|
250
|
lines = [ln for ln in mermaid.split("\n") if '["' in ln] |
|
251
|
assert len(lines) <= 2 |
|
252
|
|
|
253
|
def test_empty_graph(self): |
|
254
|
import networkx as nx |
|
255
|
|
|
256
|
G = nx.DiGraph() |
|
257
|
mermaid = generate_mermaid(G) |
|
258
|
assert "graph LR" in mermaid |
|
259
|
|
|
260
|
def test_sanitizes_special_chars(self): |
|
261
|
import networkx as nx |
|
262
|
|
|
263
|
G = nx.DiGraph() |
|
264
|
G.add_node("foo bar/baz", type="concept") |
|
265
|
mermaid = generate_mermaid(G) |
|
266
|
# Node ID should be sanitized but label preserved |
|
267
|
assert "foo_bar_baz" in mermaid |
|
268
|
assert "foo bar/baz" in mermaid |
|
269
|
|
|
270
|
|
|
271
|
class TestGraphToD3Json: |
|
272
|
def test_structure(self, sample_graph): |
|
273
|
d3 = graph_to_d3_json(sample_graph) |
|
274
|
assert "nodes" in d3 |
|
275
|
assert "links" in d3 |
|
276
|
|
|
277
|
def test_node_format(self, sample_graph): |
|
278
|
d3 = graph_to_d3_json(sample_graph) |
|
279
|
node_ids = {n["id"] for n in d3["nodes"]} |
|
280
|
assert "Alice" in node_ids |
|
281
|
alice = next(n for n in d3["nodes"] if n["id"] == "Alice") |
|
282
|
assert alice["group"] == "person" |
|
283
|
|
|
284
|
def test_link_format(self, sample_graph): |
|
285
|
d3 = graph_to_d3_json(sample_graph) |
|
286
|
assert len(d3["links"]) == 5 |
|
287
|
link = d3["links"][0] |
|
288
|
assert "source" in link |
|
289
|
assert "target" in link |
|
290
|
assert "type" in link |
|
291
|
|
|
292
|
def test_empty_graph(self): |
|
293
|
import networkx as nx |
|
294
|
|
|
295
|
G = nx.DiGraph() |
|
296
|
d3 = graph_to_d3_json(G) |
|
297
|
assert d3 == {"nodes": [], "links": []} |
|
298
|
|
|
299
|
|
|
300
|
class TestGraphToDot: |
|
301
|
def test_starts_with_digraph(self, sample_graph): |
|
302
|
dot = graph_to_dot(sample_graph) |
|
303
|
assert dot.startswith("digraph KnowledgeGraph {") |
|
304
|
|
|
305
|
def test_ends_with_closing_brace(self, sample_graph): |
|
306
|
dot = graph_to_dot(sample_graph) |
|
307
|
assert dot.strip().endswith("}") |
|
308
|
|
|
309
|
def test_contains_nodes(self, sample_graph): |
|
310
|
dot = graph_to_dot(sample_graph) |
|
311
|
assert '"Alice"' in dot |
|
312
|
assert '"Python"' in dot |
|
313
|
|
|
314
|
def test_contains_edges(self, sample_graph): |
|
315
|
dot = graph_to_dot(sample_graph) |
|
316
|
assert '"Alice" -> "Python"' in dot |
|
317
|
|
|
318
|
def test_edge_labels(self, sample_graph): |
|
319
|
dot = graph_to_dot(sample_graph) |
|
320
|
assert 'label="uses"' in dot |
|
321
|
|
|
322
|
def test_node_colors(self, sample_graph): |
|
323
|
dot = graph_to_dot(sample_graph) |
|
324
|
assert 'fillcolor="#f9d5e5"' in dot # person color for Alice |
|
325
|
|
|
326
|
def test_empty_graph(self): |
|
327
|
import networkx as nx |
|
328
|
|
|
329
|
G = nx.DiGraph() |
|
330
|
dot = graph_to_dot(G) |
|
331
|
assert "digraph" in dot |
|
332
|
|
|
333
|
def test_special_chars_escaped(self): |
|
334
|
import networkx as nx |
|
335
|
|
|
336
|
G = nx.DiGraph() |
|
337
|
G.add_node('He said "hello"', type="person") |
|
338
|
dot = graph_to_dot(G) |
|
339
|
assert 'He said \\"hello\\"' in dot |
|
340
|
|