PlanOpticon

feat(viz): implement visualization.py with NetworkX graph utilities - graph_to_networkx, compute_graph_stats, filter_graph - generate_mermaid, graph_to_d3_json, graph_to_dot exports - Tests for all visualization functions

lmata 2026-03-07 22:09 trunk
Commit c169ccfa8df51038bb3ac8dd3f62b6a3855a2dd115e4db8f31ab95e8b81505b2
--- a/tests/test_visualization.py
+++ b/tests/test_visualization.py
@@ -0,0 +1,281 @@
1
+"""Tests for video_processor.utils.visualization module."""
2
+
3
+import pytest
4
+
5
+from video_proimport (
6
+ ple_graph):
7
+ 402
8
+ compute_graph_stats,
9
+ filter_graph,
10
+ generate_mermaid,
11
+ graph_to_d3_json,
12
+ graph_to_dot,
13
+ graph_to_networkx,
14
+)
15
+
16
+
17
+@pytest.fixture
18
+def sample_kg_data():
19
+ """Mock knowledge graph data matching to_dict() format."""
20
+ return {
21
+ "nodes": [
22
+ {
23
+ "id": "Alice",
24
+ "name": "Alice",
25
+ "type": "person",
26
+ "descriptions": ["Project lead"],
27
+ "occurrences": [{"source": "transcript_batch_0", "timestamp": 0.0}],
28
+ },
29
+ {
30
+ "id": "Bob",
31
+ "name": "Bob",
32
+ "type": "person",
33
+ "descriptions": ["Developer"],
34
+ "occurrences": [],
35
+ },
36
+ {
37
+ "id": "Python",
38
+ "name": "Python",
39
+ "type": "technology",
40
+ "descriptions": ["Programming language"],
41
+ "occurrences": [],
42
+ },
43
+ {
44
+ "id": "Acme Corp",
45
+ "name": "Acme Corp",
46
+ "type": "organization",
47
+ "descriptions": ["The company"],
48
+ "occurrences": [],
49
+ },
50
+ {
51
+ "id": "Microservices",
52
+ "name": "Microservices",
53
+ "type": "concept",
54
+ "descriptions": ["Architecture pattern"],
55
+ "occurrences": [],
56
+ },
57
+ ],
58
+ "relationships": [
59
+ {
60
+ "source": "Alice",
61
+ "target": "Python",
62
+ "type": "uses",
63
+ "content_source": "transcript_batch_0",
64
+ "timestamp": 1.5,
65
+ },
66
+ {
67
+ "source": "Bob",
68
+ "target": "Python",
69
+ "type": "uses",
70
+ "content_source": "transcript_batch_0",
71
+ "timestamp": 2.0,
72
+ },
73
+ {
74
+ "source": "Alice",
75
+ "target": "Bob",
76
+ "type": "works_with",
77
+ "content_source": "transcript_batch_0",
78
+ "timestamp": 3.0,
79
+ },
80
+ {
81
+ "source": "Alice",
82
+ "target": "Acme Corp",
83
+ "type": "employed_by",
84
+ "content_source": "transcript_batch_1",
85
+ "timestamp": 10.0,
86
+ },
87
+ {
88
+ "source": "Acme Corp",
89
+ "target": "Microservices",
90
+ "type": "adopts",
91
+ "content_source": "transcript_batch_1",
92
+ "timestamp": 12.0,
93
+ },
94
+ ],
95
+ }
96
+
97
+
98
+@pytest.fixture
99
+def sample_graph(sample_kg_data):
100
+ """Pre-built NetworkX graph from sample data."""
101
+ return graph_to_networkx(sample_kg_data)
102
+
103
+
104
+class TestGraphToNetworkx:
105
+ def test_node_count(self, sample_graph):
106
+ assert sample_graph.number_of_nodes() == 5
107
+
108
+ def test_edge_count(self, sample_graph):
109
+ assert sample_graph.number_of_edges() == 5
110
+
111
+ def test_node_attributes(self, sample_graph):
112
+ alice = sample_graph.nodes["Alice"]
113
+ assert alice["type"] == "person"
114
+ assert alice["descriptions"] == ["Project lead"]
115
+
116
+ def test_edge_attributes(self, sample_graph):
117
+ edge = sample_graph.edges["Alice", "Python"]
118
+ assert edge["type"] == "uses"
119
+ assert edge["content_source"] == "transcript_batch_0"
120
+ assert edge["timestamp"] == 1.5
121
+
122
+ def test_empty_data(self):
123
+ G = graph_to_networkx({})
124
+ assert G.number_of_nodes() == 0
125
+ assert G.number_of_edges() == 0
126
+
127
+ def test_nodes_only(self):
128
+ data = {"nodes": [{"name": "X", "type": "concept"}]}
129
+ G = graph_to_networkx(data)
130
+ assert G.number_of_nodes() == 1
131
+ assert G.number_of_edges() == 0
132
+
133
+ def test_skips_empty_names(self):
134
+ data = {"nodes": [{"name": "", "type": "concept"}, {"name": "A"}]}
135
+ G = graph_to_networkx(data)
136
+ assert G.number_of_nodes() == 1
137
+
138
+ def test_skips_empty_relationship_endpoints(self):
139
+ data = {
140
+ "nodes": [{"name": "A"}],
141
+ "relationships": [{"source": "", "target": "A", "type": "x"}],
142
+ }
143
+ G = graph_to_networkx(data)
144
+ assert G.number_of_edges() == 0
145
+
146
+
147
+class TestComputeGraphStats:
148
+ def test_basic_counts(self, sample_graph):
149
+ stats = compute_graph_stats(sample_graph)
150
+ assert stats["node_count"] == 5
151
+ assert stats["edge_count"] == 5
152
+
153
+ def test_density_range(self, sample_graph):
154
+ stats = compute_graph_stats(sample_graph)
155
+ assert 0.0 <= stats["density"] <= 1.0
156
+
157
+ def test_connected_components(self, sample_graph):
158
+ stats = compute_graph_stats(sample_graph)
159
+ assert stats["connected_components"] == 1
160
+
161
+ def test_type_breakdown(self, sample_graph):
162
+ stats = compute_graph_stats(sample_graph)
163
+ assert stats["type_breakdown"]["person"] == 2
164
+ assert stats["type_breakdown"]["technology"] == 1
165
+ assert stats["type_breakdown"]["organization"] == 1
166
+ assert stats["type_breakdown"]["concept"] == 1
167
+
168
+ def test_top_entities(self, sample_graph):
169
+ stats = compute_graph_stats(sample_graph)
170
+ top = stats["top_entities"]
171
+ assert len(top) <= 10
172
+ # Alice has degree 4 (3 out + 0 in? No: 3 out-edges, 0 in-edges = degree 3 undirected...
173
+ # Actually in DiGraph, degree = in + out. Alice: out=3 (Python, Bob, Acme), in=0 => 3
174
+ # Python: in=2, out=0 => 2
175
+ assert top[0]["name"] == "Alice"
176
+
177
+ def test_empty_graph(self):
178
+ import networkx as nx
179
+
180
+ G = nx.DiGraph()
181
+ stats = compute_graph_stats(G)
182
+ assert stats["node_count"] == 0
183
+ assert stats["connected_components"] == 0
184
+ assert stats["top_entities"] == []
185
+
186
+
187
+class TestFilterGraph:
188
+ def test_filter_by_type(self, sample_graph):
189
+ filtered = filter_graph(sample_graph, entity_types=["person"])
190
+ assert filtered.number_of_nodes() == 2
191
+ for _, data in filtered.nodes(data=True):
192
+ assert data["type"] == "person"
193
+
194
+ def test_filter_by_min_degree(self, sample_graph):
195
+ # Alice has degree 3 (3 out-edges), Python has degree 2 (2 in-edges)
196
+ filtered = filter_graph(sample_graph, min_degree=3)
197
+ assert "Alice" in filtered.nodes
198
+ assert filtered.number_of_nodes() >= 1
199
+
200
+ def test_filter_combined(self, sample_graph):
201
+ filtered = filter_graph(sample_graph, entity_types=["person"], min_degree=1)
202
+ assert all(filtered.nodes[n]["type"] == "person" for n in filtered.nodes)
203
+
204
+ def test_filter_no_criteria(self, sample_graph):
205
+ filtered = filter_graph(sample_graph)
206
+ assert filtered.number_of_nodes() == sample_graph.number_of_nodes()
207
+
208
+ def test_filter_nonexistent_type(self, sample_graph):
209
+ filtered = filter_graph(sample_graph, entity_types=["alien"])
210
+ assert filtered.number_of_nodes() == 0
211
+
212
+ def test_filter_preserves_edges(self, sample_graph):
213
+ filtered = filter_graph(sample_graph, entity_types=["person"])
214
+ # Alice -> Bob edge should be preserved
215
+ assert filtered.has_edge("Alice", "Bob")
216
+
217
+ def test_filter_returns_copy(self, sample_graph):
218
+ filtered = filter_graph(sample_graph, entity_types=["person"])
219
+ # Modifying filtered should not affect original
220
+ filtered.add_node("NewNode")
221
+ assert "NewNode" not in sample_graph
222
+
223
+
224
+class TestGenerateMermaid:
225
+ def test_output_starts_with_graph(self, sample_graph):
226
+ mermaid = generate_mermaid(sample_graph)
227
+ assert mermaid.startswith("graph LR")
228
+
229
+ def test_custom_layout(self, sample_graph):
230
+ mermaid = generate_mermaid(sample_graph, layout="TD")
231
+ assert mermaid.startswith("graph TD")
232
+
233
+ def test_contains_nodes(self, sample_graph):
234
+ mermaid = generate_mermaid(sample_graph)
235
+ assert "Alice" in mermaid
236
+ assert "Python" in mermaid
237
+
238
+ def test_contains_edges(self, sample_graph):
239
+ mermaid = generate_mermaid(sample_graph)
240
+ assert "uses" in mermaid
241
+
242
+ def test_contains_class_defs(self, sample_graph):
243
+ mermaid = generate_mermaid(sample_graph)
244
+ assert "classDef person" in mermaid
245
+ assert "classDef concept" in mermaid
246
+
247
+ def test_max_nodes_limit(self, sample_graph):
248
+ mermaid = generate_mermaid(sample_graph, max_nodes=2)
249
+ # Should only have top-2 nodes by degree
250
+ lines = [ln for ln in mermaid.split("\n") if '["' in ln]
251
+ assert len(lines) <= 2
252
+
253
+ def test_empty_graph(self):
254
+ import networkx as nx
255
+
256
+ G = nx.DiGraph()
257
+ mermaid = generate_mermaid(G)
258
+ assert "graph LR" in mermaid
259
+
260
+ def test_sanitizes_special_chars(self):
261
+ import networkx as nx
262
+
263
+ G = nx.DiGraph()
264
+ G.add_node("foo bar/baz", type="concept")
265
+ mermaid = generate_mermaid(G)
266
+ # Node ID should be sanitized but label preserved
267
+ assert "foo_bar_baz" in mermaid
268
+ assert "foo bar/baz" in mermaid
269
+
270
+
271
+class TestGraphToD3Json:
272
+ def test_structure(self, sample_graph):
273
+ d3 = graph_to_d3_json(sample_graph)
274
+ assert "nodes" in d3
275
+ assert "links" in d3
276
+
277
+ def test_node_format(self, sample_graph):
278
+ d3 = graph_to_d3_json(sample_graph)
279
+ node_ids = {n["id"] for n in d3["nodes"]}
280
+ assert "Alice" in node_ids
281
+ alice = next(n for n in d3["nodes"] i
--- a/tests/test_visualization.py
+++ b/tests/test_visualization.py
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_visualization.py
+++ b/tests/test_visualization.py
@@ -0,0 +1,281 @@
1 """Tests for video_processor.utils.visualization module."""
2
3 import pytest
4
5 from video_proimport (
6 ple_graph):
7 402
8 compute_graph_stats,
9 filter_graph,
10 generate_mermaid,
11 graph_to_d3_json,
12 graph_to_dot,
13 graph_to_networkx,
14 )
15
16
17 @pytest.fixture
18 def sample_kg_data():
19 """Mock knowledge graph data matching to_dict() format."""
20 return {
21 "nodes": [
22 {
23 "id": "Alice",
24 "name": "Alice",
25 "type": "person",
26 "descriptions": ["Project lead"],
27 "occurrences": [{"source": "transcript_batch_0", "timestamp": 0.0}],
28 },
29 {
30 "id": "Bob",
31 "name": "Bob",
32 "type": "person",
33 "descriptions": ["Developer"],
34 "occurrences": [],
35 },
36 {
37 "id": "Python",
38 "name": "Python",
39 "type": "technology",
40 "descriptions": ["Programming language"],
41 "occurrences": [],
42 },
43 {
44 "id": "Acme Corp",
45 "name": "Acme Corp",
46 "type": "organization",
47 "descriptions": ["The company"],
48 "occurrences": [],
49 },
50 {
51 "id": "Microservices",
52 "name": "Microservices",
53 "type": "concept",
54 "descriptions": ["Architecture pattern"],
55 "occurrences": [],
56 },
57 ],
58 "relationships": [
59 {
60 "source": "Alice",
61 "target": "Python",
62 "type": "uses",
63 "content_source": "transcript_batch_0",
64 "timestamp": 1.5,
65 },
66 {
67 "source": "Bob",
68 "target": "Python",
69 "type": "uses",
70 "content_source": "transcript_batch_0",
71 "timestamp": 2.0,
72 },
73 {
74 "source": "Alice",
75 "target": "Bob",
76 "type": "works_with",
77 "content_source": "transcript_batch_0",
78 "timestamp": 3.0,
79 },
80 {
81 "source": "Alice",
82 "target": "Acme Corp",
83 "type": "employed_by",
84 "content_source": "transcript_batch_1",
85 "timestamp": 10.0,
86 },
87 {
88 "source": "Acme Corp",
89 "target": "Microservices",
90 "type": "adopts",
91 "content_source": "transcript_batch_1",
92 "timestamp": 12.0,
93 },
94 ],
95 }
96
97
98 @pytest.fixture
99 def sample_graph(sample_kg_data):
100 """Pre-built NetworkX graph from sample data."""
101 return graph_to_networkx(sample_kg_data)
102
103
104 class TestGraphToNetworkx:
105 def test_node_count(self, sample_graph):
106 assert sample_graph.number_of_nodes() == 5
107
108 def test_edge_count(self, sample_graph):
109 assert sample_graph.number_of_edges() == 5
110
111 def test_node_attributes(self, sample_graph):
112 alice = sample_graph.nodes["Alice"]
113 assert alice["type"] == "person"
114 assert alice["descriptions"] == ["Project lead"]
115
116 def test_edge_attributes(self, sample_graph):
117 edge = sample_graph.edges["Alice", "Python"]
118 assert edge["type"] == "uses"
119 assert edge["content_source"] == "transcript_batch_0"
120 assert edge["timestamp"] == 1.5
121
122 def test_empty_data(self):
123 G = graph_to_networkx({})
124 assert G.number_of_nodes() == 0
125 assert G.number_of_edges() == 0
126
127 def test_nodes_only(self):
128 data = {"nodes": [{"name": "X", "type": "concept"}]}
129 G = graph_to_networkx(data)
130 assert G.number_of_nodes() == 1
131 assert G.number_of_edges() == 0
132
133 def test_skips_empty_names(self):
134 data = {"nodes": [{"name": "", "type": "concept"}, {"name": "A"}]}
135 G = graph_to_networkx(data)
136 assert G.number_of_nodes() == 1
137
138 def test_skips_empty_relationship_endpoints(self):
139 data = {
140 "nodes": [{"name": "A"}],
141 "relationships": [{"source": "", "target": "A", "type": "x"}],
142 }
143 G = graph_to_networkx(data)
144 assert G.number_of_edges() == 0
145
146
147 class TestComputeGraphStats:
148 def test_basic_counts(self, sample_graph):
149 stats = compute_graph_stats(sample_graph)
150 assert stats["node_count"] == 5
151 assert stats["edge_count"] == 5
152
153 def test_density_range(self, sample_graph):
154 stats = compute_graph_stats(sample_graph)
155 assert 0.0 <= stats["density"] <= 1.0
156
157 def test_connected_components(self, sample_graph):
158 stats = compute_graph_stats(sample_graph)
159 assert stats["connected_components"] == 1
160
161 def test_type_breakdown(self, sample_graph):
162 stats = compute_graph_stats(sample_graph)
163 assert stats["type_breakdown"]["person"] == 2
164 assert stats["type_breakdown"]["technology"] == 1
165 assert stats["type_breakdown"]["organization"] == 1
166 assert stats["type_breakdown"]["concept"] == 1
167
168 def test_top_entities(self, sample_graph):
169 stats = compute_graph_stats(sample_graph)
170 top = stats["top_entities"]
171 assert len(top) <= 10
172 # Alice has degree 4 (3 out + 0 in? No: 3 out-edges, 0 in-edges = degree 3 undirected...
173 # Actually in DiGraph, degree = in + out. Alice: out=3 (Python, Bob, Acme), in=0 => 3
174 # Python: in=2, out=0 => 2
175 assert top[0]["name"] == "Alice"
176
177 def test_empty_graph(self):
178 import networkx as nx
179
180 G = nx.DiGraph()
181 stats = compute_graph_stats(G)
182 assert stats["node_count"] == 0
183 assert stats["connected_components"] == 0
184 assert stats["top_entities"] == []
185
186
187 class TestFilterGraph:
188 def test_filter_by_type(self, sample_graph):
189 filtered = filter_graph(sample_graph, entity_types=["person"])
190 assert filtered.number_of_nodes() == 2
191 for _, data in filtered.nodes(data=True):
192 assert data["type"] == "person"
193
194 def test_filter_by_min_degree(self, sample_graph):
195 # Alice has degree 3 (3 out-edges), Python has degree 2 (2 in-edges)
196 filtered = filter_graph(sample_graph, min_degree=3)
197 assert "Alice" in filtered.nodes
198 assert filtered.number_of_nodes() >= 1
199
200 def test_filter_combined(self, sample_graph):
201 filtered = filter_graph(sample_graph, entity_types=["person"], min_degree=1)
202 assert all(filtered.nodes[n]["type"] == "person" for n in filtered.nodes)
203
204 def test_filter_no_criteria(self, sample_graph):
205 filtered = filter_graph(sample_graph)
206 assert filtered.number_of_nodes() == sample_graph.number_of_nodes()
207
208 def test_filter_nonexistent_type(self, sample_graph):
209 filtered = filter_graph(sample_graph, entity_types=["alien"])
210 assert filtered.number_of_nodes() == 0
211
212 def test_filter_preserves_edges(self, sample_graph):
213 filtered = filter_graph(sample_graph, entity_types=["person"])
214 # Alice -> Bob edge should be preserved
215 assert filtered.has_edge("Alice", "Bob")
216
217 def test_filter_returns_copy(self, sample_graph):
218 filtered = filter_graph(sample_graph, entity_types=["person"])
219 # Modifying filtered should not affect original
220 filtered.add_node("NewNode")
221 assert "NewNode" not in sample_graph
222
223
224 class TestGenerateMermaid:
225 def test_output_starts_with_graph(self, sample_graph):
226 mermaid = generate_mermaid(sample_graph)
227 assert mermaid.startswith("graph LR")
228
229 def test_custom_layout(self, sample_graph):
230 mermaid = generate_mermaid(sample_graph, layout="TD")
231 assert mermaid.startswith("graph TD")
232
233 def test_contains_nodes(self, sample_graph):
234 mermaid = generate_mermaid(sample_graph)
235 assert "Alice" in mermaid
236 assert "Python" in mermaid
237
238 def test_contains_edges(self, sample_graph):
239 mermaid = generate_mermaid(sample_graph)
240 assert "uses" in mermaid
241
242 def test_contains_class_defs(self, sample_graph):
243 mermaid = generate_mermaid(sample_graph)
244 assert "classDef person" in mermaid
245 assert "classDef concept" in mermaid
246
247 def test_max_nodes_limit(self, sample_graph):
248 mermaid = generate_mermaid(sample_graph, max_nodes=2)
249 # Should only have top-2 nodes by degree
250 lines = [ln for ln in mermaid.split("\n") if '["' in ln]
251 assert len(lines) <= 2
252
253 def test_empty_graph(self):
254 import networkx as nx
255
256 G = nx.DiGraph()
257 mermaid = generate_mermaid(G)
258 assert "graph LR" in mermaid
259
260 def test_sanitizes_special_chars(self):
261 import networkx as nx
262
263 G = nx.DiGraph()
264 G.add_node("foo bar/baz", type="concept")
265 mermaid = generate_mermaid(G)
266 # Node ID should be sanitized but label preserved
267 assert "foo_bar_baz" in mermaid
268 assert "foo bar/baz" in mermaid
269
270
271 class TestGraphToD3Json:
272 def test_structure(self, sample_graph):
273 d3 = graph_to_d3_json(sample_graph)
274 assert "nodes" in d3
275 assert "links" in d3
276
277 def test_node_format(self, sample_graph):
278 d3 = graph_to_d3_json(sample_graph)
279 node_ids = {n["id"] for n in d3["nodes"]}
280 assert "Alice" in node_ids
281 alice = next(n for n in d3["nodes"] i
--- video_processor/utils/visualization.py
+++ video_processor/utils/visualization.py
@@ -0,0 +1,194 @@
1
+"""Graph visualization and analysis utilities using NetworkX."""
2
+
3
+from typing import Dict, List, Optional
4
+
5
+try:
6
+ import networkx as nx
7
+except ImportError:
8
+ raise ImportError(
9
+ "networkx is required for graph visualization. Install it with: pip install networkx"
10
+ )
11
+
12
+
13
+def graph_to_networkx(kg_data: dict) -> "nx.DiGraph":
14
+ """Convert knowledge graph dict (from to_dict()) to NetworkX directed graph.
15
+
16
+ Nodes get attributes: type, descriptions, source, occurrences
17
+ Edges get attributes: type, content_source, timestamp
18
+ """
19
+ G = nx.DiGraph()
20
+
21
+ for node in kg_data.get("nodes", []):
22
+ name = node.get("name", node.get("id", ""))
23
+ if not name:
24
+ continue
25
+ G.add_node(
26
+ name,
27
+ type=node.get("type", "concept"),
28
+ descriptions=node.get("descriptions", []),
29
+ source=node.get("source"),
30
+ occurrences=node.get("occurrences", []),
31
+ )
32
+
33
+ for rel in kg_data.get("relationships", []):
34
+ src = rel.get("source", "")
35
+ tgt = rel.get("target", "")
36
+ if not src or not tgt:
37
+ continue
38
+ G.add_edge(
39
+ src,
40
+ tgt,
41
+ type=rel.get("type", "related_to"),
42
+ content_source=rel.get("content_source"),
43
+ timestamp=rel.get("timestamp"),
44
+ )
45
+
46
+ return G
47
+
48
+
49
+def compute_graph_stats(G: "nx.DiGraph") -> dict:
50
+ """Return graph statistics.
51
+
52
+ Keys: node_count, edge_count, density, connected_components,
53
+ type_breakdown, top_entities (by degree, top 10).
54
+ """
55
+ undirected = G.to_undirected()
56
+ components = nx.number_connected_components(undirected) if len(G) > 0 else 0
57
+
58
+ type_breakdown: Dict[str, int] = {}
59
+ for _, data in G.nodes(data=True):
60
+ ntype = data.get("type", "concept")
61
+ type_breakdown[ntype] = type_breakdown.get(ntype, 0) + 1
62
+
63
+ degree_list = sorted(G.degree(), key=lambda x: x[1], reverse=True)
64
+ top_entities = [{"name": name, "degree": deg} for name, deg in degree_list[:10]]
65
+
66
+ return {
67
+ "node_count": G.number_of_nodes(),
68
+ "edge_count": G.number_of_edges(),
69
+ "density": nx.density(G),
70
+ "connected_components": components,
71
+ "type_breakdown": type_breakdown,
72
+ "top_entities": top_entities,
73
+ }
74
+
75
+
76
+def filter_graph(
77
+ G: "nx.DiGraph",
78
+ entity_types: Optional[List[str]] = None,
79
+ min_degree: Optional[int] = None,
80
+) -> "nx.DiGraph":
81
+ """Return subgraph filtered by entity type list and/or minimum degree."""
82
+ nodes = set(G.nodes())
83
+
84
+ if entity_types is not None:
85
+ types_set = set(entity_types)
86
+ nodes = {n for n in nodes if G.nodes[n].get("type", "concept") in types_set}
87
+
88
+ if min_degree is not None:
89
+ nodes = {n for n in nodes if G.degree(n) >= min_degree}
90
+
91
+ return G.subgraph(nodes).copy()
92
+
93
+
94
+def _sanitize_id(name: str) -> str:
95
+ """Create a Mermaid-safe identifier from a node name."""
96
+ return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
97
+
98
+
99
+def generate_mermaid(G: "nx.DiGraph", max_nodes: int = 30, layout: str = "LR") -> str:
100
+ """Generate Mermaid diagram from NetworkX graph.
101
+
102
+ Selects top nodes by degree. Layout can be LR, TD, etc.
103
+ """
104
+ degree_sorted = sorted(G.degree(), key=lambda x: x[1], reverse=True)
105
+ top_nodes = {name for name, _ in degree_sorted[:max_nodes]}
106
+
107
+ lines = [f"graph {layout}"]
108
+
109
+ for name in top_nodes:
110
+ data = G.nodes[name]
111
+ ntype = data.get("type", "concept")
112
+ safe_id = _sanitize_id(name)
113
+ safe_name = name.replace('"', "'")
114
+ lines.append(f' {safe_id}["{safe_name}"]:::{ntype}')
115
+
116
+ added = set()
117
+ for src, tgt, data in G.edges(data=True):
118
+ if src in top_nodes and tgt in top_nodes:
119
+ rtype = data.get("type", "related_to")
120
+ key = (src, tgt, rtype)
121
+ if key not in added:
122
+ lines.append(f' {_sanitize_id(src)} -- "{rtype}" --> {_sanitize_id(tgt)}')
123
+ added.add(key)
124
+
125
+ lines.append(" classDef person fill:#f9d5e5,stroke:#333,stroke-width:1px")
126
+ lines.append(" classDef concept fill:#eeeeee,stroke:#333,stroke-width:1px")
127
+ lines.append(" classDef technology fill:#d5e5f9,stroke:#333,stroke-width:1px")
128
+ lines.append(" classDef organization fill:#f9f5d5,stroke:#333,stroke-width:1px")
129
+ lines.append(" classDef diagram fill:#d5f9e5,stroke:#333,stroke-width:1px")
130
+ lines.append(" classDef time fill:#e5d5f9,stroke:#333,stroke-width:1px")
131
+
132
+ return "\n".join(lines)
133
+
134
+
135
+def graph_to_d3_json(G: "nx.DiGraph") -> dict:
136
+ """Export to D3-compatible format.
137
+
138
+ Returns {"nodes": [{"id": ..., "group": ...}], "links": [...]}.
139
+ """
140
+ nodes = []
141
+ for name, data in G.nodes(data=True):
142
+ nodes.append(
143
+ {
144
+ "id": name,
145
+ "group": data.get("type", "concept"),
146
+ "descriptions": data.get("descriptions", []),
147
+ }
148
+ )
149
+
150
+ links = []
151
+ for src, tgt, data in G.edges(data=True):
152
+ links.append(
153
+ {
154
+ "source": src,
155
+ "target": tgt,
156
+ "type": data.get("type", "related_to"),
157
+ }
158
+ )
159
+
160
+ return {"nodes": nodes, "links": links}
161
+
162
+
163
+def graph_to_dot(G: "nx.DiGraph") -> str:
164
+ """Export to Graphviz DOT format."""
165
+ lines = ["digraph KnowledgeGraph {"]
166
+ lines.append(" rankdir=LR;")
167
+ lines.append(' node [shape=box, style="rounded,filled", fontname="Helvetica"];')
168
+ lines.append("")
169
+
170
+ type_colors = {
171
+ "person": "#f9d5e5",
172
+ "concept": "#eeeeee",
173
+ "technology": "#d5e5f9",
174
+ "organization": "#f9f5d5",
175
+ "diagram": "#d5f9e5",
176
+ "time": "#e5d5f9",
177
+ }
178
+
179
+ for name, data in G.nodes(data=True):
180
+ ntype = data.get("type", "concept")
181
+ color = type_colors.get(ntype, "#eeeeee")
182
+ escaped = name.replace('"', '\\"')
183
+ lines.append(f' "{escaped}" [fillcolor="{color}", label="{escaped}"];')
184
+
185
+ lines.append("")
186
+ for src, tgt, data in G.edges(data=True):
187
+ rtype = data.get("type", "related_to")
188
+ escaped_src = src.replace('"', '\\"')
189
+ escaped_tgt = tgt.replace('"', '\\"')
190
+ escaped_type = rtype.replace('"', '\\"')
191
+ lines.append(f' "{escaped_src}" -> "{escaped_tgt}" [label="{escaped_type}"];')
192
+
193
+ lines.append("}")
194
+ return "\n".join(lines)
0195
--- video_processor/utils/visualization.py
+++ video_processor/utils/visualization.py
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
--- video_processor/utils/visualization.py
+++ video_processor/utils/visualization.py
@@ -0,0 +1,194 @@
1 """Graph visualization and analysis utilities using NetworkX."""
2
3 from typing import Dict, List, Optional
4
5 try:
6 import networkx as nx
7 except ImportError:
8 raise ImportError(
9 "networkx is required for graph visualization. Install it with: pip install networkx"
10 )
11
12
13 def graph_to_networkx(kg_data: dict) -> "nx.DiGraph":
14 """Convert knowledge graph dict (from to_dict()) to NetworkX directed graph.
15
16 Nodes get attributes: type, descriptions, source, occurrences
17 Edges get attributes: type, content_source, timestamp
18 """
19 G = nx.DiGraph()
20
21 for node in kg_data.get("nodes", []):
22 name = node.get("name", node.get("id", ""))
23 if not name:
24 continue
25 G.add_node(
26 name,
27 type=node.get("type", "concept"),
28 descriptions=node.get("descriptions", []),
29 source=node.get("source"),
30 occurrences=node.get("occurrences", []),
31 )
32
33 for rel in kg_data.get("relationships", []):
34 src = rel.get("source", "")
35 tgt = rel.get("target", "")
36 if not src or not tgt:
37 continue
38 G.add_edge(
39 src,
40 tgt,
41 type=rel.get("type", "related_to"),
42 content_source=rel.get("content_source"),
43 timestamp=rel.get("timestamp"),
44 )
45
46 return G
47
48
49 def compute_graph_stats(G: "nx.DiGraph") -> dict:
50 """Return graph statistics.
51
52 Keys: node_count, edge_count, density, connected_components,
53 type_breakdown, top_entities (by degree, top 10).
54 """
55 undirected = G.to_undirected()
56 components = nx.number_connected_components(undirected) if len(G) > 0 else 0
57
58 type_breakdown: Dict[str, int] = {}
59 for _, data in G.nodes(data=True):
60 ntype = data.get("type", "concept")
61 type_breakdown[ntype] = type_breakdown.get(ntype, 0) + 1
62
63 degree_list = sorted(G.degree(), key=lambda x: x[1], reverse=True)
64 top_entities = [{"name": name, "degree": deg} for name, deg in degree_list[:10]]
65
66 return {
67 "node_count": G.number_of_nodes(),
68 "edge_count": G.number_of_edges(),
69 "density": nx.density(G),
70 "connected_components": components,
71 "type_breakdown": type_breakdown,
72 "top_entities": top_entities,
73 }
74
75
76 def filter_graph(
77 G: "nx.DiGraph",
78 entity_types: Optional[List[str]] = None,
79 min_degree: Optional[int] = None,
80 ) -> "nx.DiGraph":
81 """Return subgraph filtered by entity type list and/or minimum degree."""
82 nodes = set(G.nodes())
83
84 if entity_types is not None:
85 types_set = set(entity_types)
86 nodes = {n for n in nodes if G.nodes[n].get("type", "concept") in types_set}
87
88 if min_degree is not None:
89 nodes = {n for n in nodes if G.degree(n) >= min_degree}
90
91 return G.subgraph(nodes).copy()
92
93
94 def _sanitize_id(name: str) -> str:
95 """Create a Mermaid-safe identifier from a node name."""
96 return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
97
98
99 def generate_mermaid(G: "nx.DiGraph", max_nodes: int = 30, layout: str = "LR") -> str:
100 """Generate Mermaid diagram from NetworkX graph.
101
102 Selects top nodes by degree. Layout can be LR, TD, etc.
103 """
104 degree_sorted = sorted(G.degree(), key=lambda x: x[1], reverse=True)
105 top_nodes = {name for name, _ in degree_sorted[:max_nodes]}
106
107 lines = [f"graph {layout}"]
108
109 for name in top_nodes:
110 data = G.nodes[name]
111 ntype = data.get("type", "concept")
112 safe_id = _sanitize_id(name)
113 safe_name = name.replace('"', "'")
114 lines.append(f' {safe_id}["{safe_name}"]:::{ntype}')
115
116 added = set()
117 for src, tgt, data in G.edges(data=True):
118 if src in top_nodes and tgt in top_nodes:
119 rtype = data.get("type", "related_to")
120 key = (src, tgt, rtype)
121 if key not in added:
122 lines.append(f' {_sanitize_id(src)} -- "{rtype}" --> {_sanitize_id(tgt)}')
123 added.add(key)
124
125 lines.append(" classDef person fill:#f9d5e5,stroke:#333,stroke-width:1px")
126 lines.append(" classDef concept fill:#eeeeee,stroke:#333,stroke-width:1px")
127 lines.append(" classDef technology fill:#d5e5f9,stroke:#333,stroke-width:1px")
128 lines.append(" classDef organization fill:#f9f5d5,stroke:#333,stroke-width:1px")
129 lines.append(" classDef diagram fill:#d5f9e5,stroke:#333,stroke-width:1px")
130 lines.append(" classDef time fill:#e5d5f9,stroke:#333,stroke-width:1px")
131
132 return "\n".join(lines)
133
134
135 def graph_to_d3_json(G: "nx.DiGraph") -> dict:
136 """Export to D3-compatible format.
137
138 Returns {"nodes": [{"id": ..., "group": ...}], "links": [...]}.
139 """
140 nodes = []
141 for name, data in G.nodes(data=True):
142 nodes.append(
143 {
144 "id": name,
145 "group": data.get("type", "concept"),
146 "descriptions": data.get("descriptions", []),
147 }
148 )
149
150 links = []
151 for src, tgt, data in G.edges(data=True):
152 links.append(
153 {
154 "source": src,
155 "target": tgt,
156 "type": data.get("type", "related_to"),
157 }
158 )
159
160 return {"nodes": nodes, "links": links}
161
162
163 def graph_to_dot(G: "nx.DiGraph") -> str:
164 """Export to Graphviz DOT format."""
165 lines = ["digraph KnowledgeGraph {"]
166 lines.append(" rankdir=LR;")
167 lines.append(' node [shape=box, style="rounded,filled", fontname="Helvetica"];')
168 lines.append("")
169
170 type_colors = {
171 "person": "#f9d5e5",
172 "concept": "#eeeeee",
173 "technology": "#d5e5f9",
174 "organization": "#f9f5d5",
175 "diagram": "#d5f9e5",
176 "time": "#e5d5f9",
177 }
178
179 for name, data in G.nodes(data=True):
180 ntype = data.get("type", "concept")
181 color = type_colors.get(ntype, "#eeeeee")
182 escaped = name.replace('"', '\\"')
183 lines.append(f' "{escaped}" [fillcolor="{color}", label="{escaped}"];')
184
185 lines.append("")
186 for src, tgt, data in G.edges(data=True):
187 rtype = data.get("type", "related_to")
188 escaped_src = src.replace('"', '\\"')
189 escaped_tgt = tgt.replace('"', '\\"')
190 escaped_type = rtype.replace('"', '\\"')
191 lines.append(f' "{escaped_src}" -> "{escaped_tgt}" [label="{escaped_type}"];')
192
193 lines.append("}")
194 return "\n".join(lines)
195

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button