PlanOpticon

feat(viz): implement visualization.py with NetworkX graph utilities - graph_to_networkx, compute_graph_stats, filter_graph - generate_mermaid, graph_to_d3_json, graph_to_dot exports - Tests for all visualization functions

lmata 2026-03-07 22:09 trunk

Commit c169ccfa8df51038bb3ac8dd3f62b6a3855a2dd115e4db8f31ab95e8b81505b2

Parent 26bc92c6a4c3a57…

2 files changed +281 +194

+ tests/test_visualization.py ~ video_processor/utils/visualization.py

A tests/test_visualization.py

+281

		--- a/tests/test_visualization.py
		+++ b/tests/test_visualization.py
		@@ -0,0 +1,281 @@
	1	+"""Tests for video_processor.utils.visualization module."""
	2	+
	3	+import pytest
	4	+
	5	+from video_proimport (
	6	+ ple_graph):
	7	+ 402
	8	+ compute_graph_stats,
	9	+ filter_graph,
	10	+ generate_mermaid,
	11	+ graph_to_d3_json,
	12	+ graph_to_dot,
	13	+ graph_to_networkx,
	14	+)
	15	+
	16	+
	17	+@pytest.fixture
	18	+def sample_kg_data():
	19	+ """Mock knowledge graph data matching to_dict() format."""
	20	+ return {
	21	+ "nodes": [
	22	+ {
	23	+ "id": "Alice",
	24	+ "name": "Alice",
	25	+ "type": "person",
	26	+ "descriptions": ["Project lead"],
	27	+ "occurrences": [{"source": "transcript_batch_0", "timestamp": 0.0}],
	28	+ },
	29	+ {
	30	+ "id": "Bob",
	31	+ "name": "Bob",
	32	+ "type": "person",
	33	+ "descriptions": ["Developer"],
	34	+ "occurrences": [],
	35	+ },
	36	+ {
	37	+ "id": "Python",
	38	+ "name": "Python",
	39	+ "type": "technology",
	40	+ "descriptions": ["Programming language"],
	41	+ "occurrences": [],
	42	+ },
	43	+ {
	44	+ "id": "Acme Corp",
	45	+ "name": "Acme Corp",
	46	+ "type": "organization",
	47	+ "descriptions": ["The company"],
	48	+ "occurrences": [],
	49	+ },
	50	+ {
	51	+ "id": "Microservices",
	52	+ "name": "Microservices",
	53	+ "type": "concept",
	54	+ "descriptions": ["Architecture pattern"],
	55	+ "occurrences": [],
	56	+ },
	57	+ ],
	58	+ "relationships": [
	59	+ {
	60	+ "source": "Alice",
	61	+ "target": "Python",
	62	+ "type": "uses",
	63	+ "content_source": "transcript_batch_0",
	64	+ "timestamp": 1.5,
	65	+ },
	66	+ {
	67	+ "source": "Bob",
	68	+ "target": "Python",
	69	+ "type": "uses",
	70	+ "content_source": "transcript_batch_0",
	71	+ "timestamp": 2.0,
	72	+ },
	73	+ {
	74	+ "source": "Alice",
	75	+ "target": "Bob",
	76	+ "type": "works_with",
	77	+ "content_source": "transcript_batch_0",
	78	+ "timestamp": 3.0,
	79	+ },
	80	+ {
	81	+ "source": "Alice",
	82	+ "target": "Acme Corp",
	83	+ "type": "employed_by",
	84	+ "content_source": "transcript_batch_1",
	85	+ "timestamp": 10.0,
	86	+ },
	87	+ {
	88	+ "source": "Acme Corp",
	89	+ "target": "Microservices",
	90	+ "type": "adopts",
	91	+ "content_source": "transcript_batch_1",
	92	+ "timestamp": 12.0,
	93	+ },
	94	+ ],
	95	+ }
	96	+
	97	+
	98	+@pytest.fixture
	99	+def sample_graph(sample_kg_data):
	100	+ """Pre-built NetworkX graph from sample data."""
	101	+ return graph_to_networkx(sample_kg_data)
	102	+
	103	+
	104	+class TestGraphToNetworkx:
	105	+ def test_node_count(self, sample_graph):
	106	+ assert sample_graph.number_of_nodes() == 5
	107	+
	108	+ def test_edge_count(self, sample_graph):
	109	+ assert sample_graph.number_of_edges() == 5
	110	+
	111	+ def test_node_attributes(self, sample_graph):
	112	+ alice = sample_graph.nodes["Alice"]
	113	+ assert alice["type"] == "person"
	114	+ assert alice["descriptions"] == ["Project lead"]
	115	+
	116	+ def test_edge_attributes(self, sample_graph):
	117	+ edge = sample_graph.edges["Alice", "Python"]
	118	+ assert edge["type"] == "uses"
	119	+ assert edge["content_source"] == "transcript_batch_0"
	120	+ assert edge["timestamp"] == 1.5
	121	+
	122	+ def test_empty_data(self):
	123	+ G = graph_to_networkx({})
	124	+ assert G.number_of_nodes() == 0
	125	+ assert G.number_of_edges() == 0
	126	+
	127	+ def test_nodes_only(self):
	128	+ data = {"nodes": [{"name": "X", "type": "concept"}]}
	129	+ G = graph_to_networkx(data)
	130	+ assert G.number_of_nodes() == 1
	131	+ assert G.number_of_edges() == 0
	132	+
	133	+ def test_skips_empty_names(self):
	134	+ data = {"nodes": [{"name": "", "type": "concept"}, {"name": "A"}]}
	135	+ G = graph_to_networkx(data)
	136	+ assert G.number_of_nodes() == 1
	137	+
	138	+ def test_skips_empty_relationship_endpoints(self):
	139	+ data = {
	140	+ "nodes": [{"name": "A"}],
	141	+ "relationships": [{"source": "", "target": "A", "type": "x"}],
	142	+ }
	143	+ G = graph_to_networkx(data)
	144	+ assert G.number_of_edges() == 0
	145	+
	146	+
	147	+class TestComputeGraphStats:
	148	+ def test_basic_counts(self, sample_graph):
	149	+ stats = compute_graph_stats(sample_graph)
	150	+ assert stats["node_count"] == 5
	151	+ assert stats["edge_count"] == 5
	152	+
	153	+ def test_density_range(self, sample_graph):
	154	+ stats = compute_graph_stats(sample_graph)
	155	+ assert 0.0 <= stats["density"] <= 1.0
	156	+
	157	+ def test_connected_components(self, sample_graph):
	158	+ stats = compute_graph_stats(sample_graph)
	159	+ assert stats["connected_components"] == 1
	160	+
	161	+ def test_type_breakdown(self, sample_graph):
	162	+ stats = compute_graph_stats(sample_graph)
	163	+ assert stats["type_breakdown"]["person"] == 2
	164	+ assert stats["type_breakdown"]["technology"] == 1
	165	+ assert stats["type_breakdown"]["organization"] == 1
	166	+ assert stats["type_breakdown"]["concept"] == 1
	167	+
	168	+ def test_top_entities(self, sample_graph):
	169	+ stats = compute_graph_stats(sample_graph)
	170	+ top = stats["top_entities"]
	171	+ assert len(top) <= 10
	172	+ # Alice has degree 4 (3 out + 0 in? No: 3 out-edges, 0 in-edges = degree 3 undirected...
	173	+ # Actually in DiGraph, degree = in + out. Alice: out=3 (Python, Bob, Acme), in=0 => 3
	174	+ # Python: in=2, out=0 => 2
	175	+ assert top[0]["name"] == "Alice"
	176	+
	177	+ def test_empty_graph(self):
	178	+ import networkx as nx
	179	+
	180	+ G = nx.DiGraph()
	181	+ stats = compute_graph_stats(G)
	182	+ assert stats["node_count"] == 0
	183	+ assert stats["connected_components"] == 0
	184	+ assert stats["top_entities"] == []
	185	+
	186	+
	187	+class TestFilterGraph:
	188	+ def test_filter_by_type(self, sample_graph):
	189	+ filtered = filter_graph(sample_graph, entity_types=["person"])
	190	+ assert filtered.number_of_nodes() == 2
	191	+ for _, data in filtered.nodes(data=True):
	192	+ assert data["type"] == "person"
	193	+
	194	+ def test_filter_by_min_degree(self, sample_graph):
	195	+ # Alice has degree 3 (3 out-edges), Python has degree 2 (2 in-edges)
	196	+ filtered = filter_graph(sample_graph, min_degree=3)
	197	+ assert "Alice" in filtered.nodes
	198	+ assert filtered.number_of_nodes() >= 1
	199	+
	200	+ def test_filter_combined(self, sample_graph):
	201	+ filtered = filter_graph(sample_graph, entity_types=["person"], min_degree=1)
	202	+ assert all(filtered.nodes[n]["type"] == "person" for n in filtered.nodes)
	203	+
	204	+ def test_filter_no_criteria(self, sample_graph):
	205	+ filtered = filter_graph(sample_graph)
	206	+ assert filtered.number_of_nodes() == sample_graph.number_of_nodes()
	207	+
	208	+ def test_filter_nonexistent_type(self, sample_graph):
	209	+ filtered = filter_graph(sample_graph, entity_types=["alien"])
	210	+ assert filtered.number_of_nodes() == 0
	211	+
	212	+ def test_filter_preserves_edges(self, sample_graph):
	213	+ filtered = filter_graph(sample_graph, entity_types=["person"])
	214	+ # Alice -> Bob edge should be preserved
	215	+ assert filtered.has_edge("Alice", "Bob")
	216	+
	217	+ def test_filter_returns_copy(self, sample_graph):
	218	+ filtered = filter_graph(sample_graph, entity_types=["person"])
	219	+ # Modifying filtered should not affect original
	220	+ filtered.add_node("NewNode")
	221	+ assert "NewNode" not in sample_graph
	222	+
	223	+
	224	+class TestGenerateMermaid:
	225	+ def test_output_starts_with_graph(self, sample_graph):
	226	+ mermaid = generate_mermaid(sample_graph)
	227	+ assert mermaid.startswith("graph LR")
	228	+
	229	+ def test_custom_layout(self, sample_graph):
	230	+ mermaid = generate_mermaid(sample_graph, layout="TD")
	231	+ assert mermaid.startswith("graph TD")
	232	+
	233	+ def test_contains_nodes(self, sample_graph):
	234	+ mermaid = generate_mermaid(sample_graph)
	235	+ assert "Alice" in mermaid
	236	+ assert "Python" in mermaid
	237	+
	238	+ def test_contains_edges(self, sample_graph):
	239	+ mermaid = generate_mermaid(sample_graph)
	240	+ assert "uses" in mermaid
	241	+
	242	+ def test_contains_class_defs(self, sample_graph):
	243	+ mermaid = generate_mermaid(sample_graph)
	244	+ assert "classDef person" in mermaid
	245	+ assert "classDef concept" in mermaid
	246	+
	247	+ def test_max_nodes_limit(self, sample_graph):
	248	+ mermaid = generate_mermaid(sample_graph, max_nodes=2)
	249	+ # Should only have top-2 nodes by degree
	250	+ lines = [ln for ln in mermaid.split("\n") if '["' in ln]
	251	+ assert len(lines) <= 2
	252	+
	253	+ def test_empty_graph(self):
	254	+ import networkx as nx
	255	+
	256	+ G = nx.DiGraph()
	257	+ mermaid = generate_mermaid(G)
	258	+ assert "graph LR" in mermaid
	259	+
	260	+ def test_sanitizes_special_chars(self):
	261	+ import networkx as nx
	262	+
	263	+ G = nx.DiGraph()
	264	+ G.add_node("foo bar/baz", type="concept")
	265	+ mermaid = generate_mermaid(G)
	266	+ # Node ID should be sanitized but label preserved
	267	+ assert "foo_bar_baz" in mermaid
	268	+ assert "foo bar/baz" in mermaid
	269	+
	270	+
	271	+class TestGraphToD3Json:
	272	+ def test_structure(self, sample_graph):
	273	+ d3 = graph_to_d3_json(sample_graph)
	274	+ assert "nodes" in d3
	275	+ assert "links" in d3
	276	+
	277	+ def test_node_format(self, sample_graph):
	278	+ d3 = graph_to_d3_json(sample_graph)
	279	+ node_ids = {n["id"] for n in d3["nodes"]}
	280	+ assert "Alice" in node_ids
	281	+ alice = next(n for n in d3["nodes"] i

	--- a/tests/test_visualization.py
	+++ b/tests/test_visualization.py
	@@ -0,0 +1,281 @@

	--- a/tests/test_visualization.py
	+++ b/tests/test_visualization.py
	@@ -0,0 +1,281 @@
1	"""Tests for video_processor.utils.visualization module."""
2
3	import pytest
4
5	from video_proimport (
6	ple_graph):
7	402
8	compute_graph_stats,
9	filter_graph,
10	generate_mermaid,
11	graph_to_d3_json,
12	graph_to_dot,
13	graph_to_networkx,
14	)
15
16
17	@pytest.fixture
18	def sample_kg_data():
19	"""Mock knowledge graph data matching to_dict() format."""
20	return {
21	"nodes": [
22	{
23	"id": "Alice",
24	"name": "Alice",
25	"type": "person",
26	"descriptions": ["Project lead"],
27	"occurrences": [{"source": "transcript_batch_0", "timestamp": 0.0}],
28	},
29	{
30	"id": "Bob",
31	"name": "Bob",
32	"type": "person",
33	"descriptions": ["Developer"],
34	"occurrences": [],
35	},
36	{
37	"id": "Python",
38	"name": "Python",
39	"type": "technology",
40	"descriptions": ["Programming language"],
41	"occurrences": [],
42	},
43	{
44	"id": "Acme Corp",
45	"name": "Acme Corp",
46	"type": "organization",
47	"descriptions": ["The company"],
48	"occurrences": [],
49	},
50	{
51	"id": "Microservices",
52	"name": "Microservices",
53	"type": "concept",
54	"descriptions": ["Architecture pattern"],
55	"occurrences": [],
56	},
57	],
58	"relationships": [
59	{
60	"source": "Alice",
61	"target": "Python",
62	"type": "uses",
63	"content_source": "transcript_batch_0",
64	"timestamp": 1.5,
65	},
66	{
67	"source": "Bob",
68	"target": "Python",
69	"type": "uses",
70	"content_source": "transcript_batch_0",
71	"timestamp": 2.0,
72	},
73	{
74	"source": "Alice",
75	"target": "Bob",
76	"type": "works_with",
77	"content_source": "transcript_batch_0",
78	"timestamp": 3.0,
79	},
80	{
81	"source": "Alice",
82	"target": "Acme Corp",
83	"type": "employed_by",
84	"content_source": "transcript_batch_1",
85	"timestamp": 10.0,
86	},
87	{
88	"source": "Acme Corp",
89	"target": "Microservices",
90	"type": "adopts",
91	"content_source": "transcript_batch_1",
92	"timestamp": 12.0,
93	},
94	],
95	}
96
97
98	@pytest.fixture
99	def sample_graph(sample_kg_data):
100	"""Pre-built NetworkX graph from sample data."""
101	return graph_to_networkx(sample_kg_data)
102
103
104	class TestGraphToNetworkx:
105	def test_node_count(self, sample_graph):
106	assert sample_graph.number_of_nodes() == 5
107
108	def test_edge_count(self, sample_graph):
109	assert sample_graph.number_of_edges() == 5
110
111	def test_node_attributes(self, sample_graph):
112	alice = sample_graph.nodes["Alice"]
113	assert alice["type"] == "person"
114	assert alice["descriptions"] == ["Project lead"]
115
116	def test_edge_attributes(self, sample_graph):
117	edge = sample_graph.edges["Alice", "Python"]
118	assert edge["type"] == "uses"
119	assert edge["content_source"] == "transcript_batch_0"
120	assert edge["timestamp"] == 1.5
121
122	def test_empty_data(self):
123	G = graph_to_networkx({})
124	assert G.number_of_nodes() == 0
125	assert G.number_of_edges() == 0
126
127	def test_nodes_only(self):
128	data = {"nodes": [{"name": "X", "type": "concept"}]}
129	G = graph_to_networkx(data)
130	assert G.number_of_nodes() == 1
131	assert G.number_of_edges() == 0
132
133	def test_skips_empty_names(self):
134	data = {"nodes": [{"name": "", "type": "concept"}, {"name": "A"}]}
135	G = graph_to_networkx(data)
136	assert G.number_of_nodes() == 1
137
138	def test_skips_empty_relationship_endpoints(self):
139	data = {
140	"nodes": [{"name": "A"}],
141	"relationships": [{"source": "", "target": "A", "type": "x"}],
142	}
143	G = graph_to_networkx(data)
144	assert G.number_of_edges() == 0
145
146
147	class TestComputeGraphStats:
148	def test_basic_counts(self, sample_graph):
149	stats = compute_graph_stats(sample_graph)
150	assert stats["node_count"] == 5
151	assert stats["edge_count"] == 5
152
153	def test_density_range(self, sample_graph):
154	stats = compute_graph_stats(sample_graph)
155	assert 0.0 <= stats["density"] <= 1.0
156
157	def test_connected_components(self, sample_graph):
158	stats = compute_graph_stats(sample_graph)
159	assert stats["connected_components"] == 1
160
161	def test_type_breakdown(self, sample_graph):
162	stats = compute_graph_stats(sample_graph)
163	assert stats["type_breakdown"]["person"] == 2
164	assert stats["type_breakdown"]["technology"] == 1
165	assert stats["type_breakdown"]["organization"] == 1
166	assert stats["type_breakdown"]["concept"] == 1
167
168	def test_top_entities(self, sample_graph):
169	stats = compute_graph_stats(sample_graph)
170	top = stats["top_entities"]
171	assert len(top) <= 10
172	# Alice has degree 4 (3 out + 0 in? No: 3 out-edges, 0 in-edges = degree 3 undirected...
173	# Actually in DiGraph, degree = in + out. Alice: out=3 (Python, Bob, Acme), in=0 => 3
174	# Python: in=2, out=0 => 2
175	assert top[0]["name"] == "Alice"
176
177	def test_empty_graph(self):
178	import networkx as nx
179
180	G = nx.DiGraph()
181	stats = compute_graph_stats(G)
182	assert stats["node_count"] == 0
183	assert stats["connected_components"] == 0
184	assert stats["top_entities"] == []
185
186
187	class TestFilterGraph:
188	def test_filter_by_type(self, sample_graph):
189	filtered = filter_graph(sample_graph, entity_types=["person"])
190	assert filtered.number_of_nodes() == 2
191	for _, data in filtered.nodes(data=True):
192	assert data["type"] == "person"
193
194	def test_filter_by_min_degree(self, sample_graph):
195	# Alice has degree 3 (3 out-edges), Python has degree 2 (2 in-edges)
196	filtered = filter_graph(sample_graph, min_degree=3)
197	assert "Alice" in filtered.nodes
198	assert filtered.number_of_nodes() >= 1
199
200	def test_filter_combined(self, sample_graph):
201	filtered = filter_graph(sample_graph, entity_types=["person"], min_degree=1)
202	assert all(filtered.nodes[n]["type"] == "person" for n in filtered.nodes)
203
204	def test_filter_no_criteria(self, sample_graph):
205	filtered = filter_graph(sample_graph)
206	assert filtered.number_of_nodes() == sample_graph.number_of_nodes()
207
208	def test_filter_nonexistent_type(self, sample_graph):
209	filtered = filter_graph(sample_graph, entity_types=["alien"])
210	assert filtered.number_of_nodes() == 0
211
212	def test_filter_preserves_edges(self, sample_graph):
213	filtered = filter_graph(sample_graph, entity_types=["person"])
214	# Alice -> Bob edge should be preserved
215	assert filtered.has_edge("Alice", "Bob")
216
217	def test_filter_returns_copy(self, sample_graph):
218	filtered = filter_graph(sample_graph, entity_types=["person"])
219	# Modifying filtered should not affect original
220	filtered.add_node("NewNode")
221	assert "NewNode" not in sample_graph
222
223
224	class TestGenerateMermaid:
225	def test_output_starts_with_graph(self, sample_graph):
226	mermaid = generate_mermaid(sample_graph)
227	assert mermaid.startswith("graph LR")
228
229	def test_custom_layout(self, sample_graph):
230	mermaid = generate_mermaid(sample_graph, layout="TD")
231	assert mermaid.startswith("graph TD")
232
233	def test_contains_nodes(self, sample_graph):
234	mermaid = generate_mermaid(sample_graph)
235	assert "Alice" in mermaid
236	assert "Python" in mermaid
237
238	def test_contains_edges(self, sample_graph):
239	mermaid = generate_mermaid(sample_graph)
240	assert "uses" in mermaid
241
242	def test_contains_class_defs(self, sample_graph):
243	mermaid = generate_mermaid(sample_graph)
244	assert "classDef person" in mermaid
245	assert "classDef concept" in mermaid
246
247	def test_max_nodes_limit(self, sample_graph):
248	mermaid = generate_mermaid(sample_graph, max_nodes=2)
249	# Should only have top-2 nodes by degree
250	lines = [ln for ln in mermaid.split("\n") if '["' in ln]
251	assert len(lines) <= 2
252
253	def test_empty_graph(self):
254	import networkx as nx
255
256	G = nx.DiGraph()
257	mermaid = generate_mermaid(G)
258	assert "graph LR" in mermaid
259
260	def test_sanitizes_special_chars(self):
261	import networkx as nx
262
263	G = nx.DiGraph()
264	G.add_node("foo bar/baz", type="concept")
265	mermaid = generate_mermaid(G)
266	# Node ID should be sanitized but label preserved
267	assert "foo_bar_baz" in mermaid
268	assert "foo bar/baz" in mermaid
269
270
271	class TestGraphToD3Json:
272	def test_structure(self, sample_graph):
273	d3 = graph_to_d3_json(sample_graph)
274	assert "nodes" in d3
275	assert "links" in d3
276
277	def test_node_format(self, sample_graph):
278	d3 = graph_to_d3_json(sample_graph)
279	node_ids = {n["id"] for n in d3["nodes"]}
280	assert "Alice" in node_ids
281	alice = next(n for n in d3["nodes"] i

M video_processor/utils/visualization.py

+194

		--- video_processor/utils/visualization.py
		+++ video_processor/utils/visualization.py
		@@ -0,0 +1,194 @@
	1	+"""Graph visualization and analysis utilities using NetworkX."""
	2	+
	3	+from typing import Dict, List, Optional
	4	+
	5	+try:
	6	+ import networkx as nx
	7	+except ImportError:
	8	+ raise ImportError(
	9	+ "networkx is required for graph visualization. Install it with: pip install networkx"
	10	+ )
	11	+
	12	+
	13	+def graph_to_networkx(kg_data: dict) -> "nx.DiGraph":
	14	+ """Convert knowledge graph dict (from to_dict()) to NetworkX directed graph.
	15	+
	16	+ Nodes get attributes: type, descriptions, source, occurrences
	17	+ Edges get attributes: type, content_source, timestamp
	18	+ """
	19	+ G = nx.DiGraph()
	20	+
	21	+ for node in kg_data.get("nodes", []):
	22	+ name = node.get("name", node.get("id", ""))
	23	+ if not name:
	24	+ continue
	25	+ G.add_node(
	26	+ name,
	27	+ type=node.get("type", "concept"),
	28	+ descriptions=node.get("descriptions", []),
	29	+ source=node.get("source"),
	30	+ occurrences=node.get("occurrences", []),
	31	+ )
	32	+
	33	+ for rel in kg_data.get("relationships", []):
	34	+ src = rel.get("source", "")
	35	+ tgt = rel.get("target", "")
	36	+ if not src or not tgt:
	37	+ continue
	38	+ G.add_edge(
	39	+ src,
	40	+ tgt,
	41	+ type=rel.get("type", "related_to"),
	42	+ content_source=rel.get("content_source"),
	43	+ timestamp=rel.get("timestamp"),
	44	+ )
	45	+
	46	+ return G
	47	+
	48	+
	49	+def compute_graph_stats(G: "nx.DiGraph") -> dict:
	50	+ """Return graph statistics.
	51	+
	52	+ Keys: node_count, edge_count, density, connected_components,
	53	+ type_breakdown, top_entities (by degree, top 10).
	54	+ """
	55	+ undirected = G.to_undirected()
	56	+ components = nx.number_connected_components(undirected) if len(G) > 0 else 0
	57	+
	58	+ type_breakdown: Dict[str, int] = {}
	59	+ for _, data in G.nodes(data=True):
	60	+ ntype = data.get("type", "concept")
	61	+ type_breakdown[ntype] = type_breakdown.get(ntype, 0) + 1
	62	+
	63	+ degree_list = sorted(G.degree(), key=lambda x: x[1], reverse=True)
	64	+ top_entities = [{"name": name, "degree": deg} for name, deg in degree_list[:10]]
	65	+
	66	+ return {
	67	+ "node_count": G.number_of_nodes(),
	68	+ "edge_count": G.number_of_edges(),
	69	+ "density": nx.density(G),
	70	+ "connected_components": components,
	71	+ "type_breakdown": type_breakdown,
	72	+ "top_entities": top_entities,
	73	+ }
	74	+
	75	+
	76	+def filter_graph(
	77	+ G: "nx.DiGraph",
	78	+ entity_types: Optional[List[str]] = None,
	79	+ min_degree: Optional[int] = None,
	80	+) -> "nx.DiGraph":
	81	+ """Return subgraph filtered by entity type list and/or minimum degree."""
	82	+ nodes = set(G.nodes())
	83	+
	84	+ if entity_types is not None:
	85	+ types_set = set(entity_types)
	86	+ nodes = {n for n in nodes if G.nodes[n].get("type", "concept") in types_set}
	87	+
	88	+ if min_degree is not None:
	89	+ nodes = {n for n in nodes if G.degree(n) >= min_degree}
	90	+
	91	+ return G.subgraph(nodes).copy()
	92	+
	93	+
	94	+def _sanitize_id(name: str) -> str:
	95	+ """Create a Mermaid-safe identifier from a node name."""
	96	+ return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
	97	+
	98	+
	99	+def generate_mermaid(G: "nx.DiGraph", max_nodes: int = 30, layout: str = "LR") -> str:
	100	+ """Generate Mermaid diagram from NetworkX graph.
	101	+
	102	+ Selects top nodes by degree. Layout can be LR, TD, etc.
	103	+ """
	104	+ degree_sorted = sorted(G.degree(), key=lambda x: x[1], reverse=True)
	105	+ top_nodes = {name for name, _ in degree_sorted[:max_nodes]}
	106	+
	107	+ lines = [f"graph {layout}"]
	108	+
	109	+ for name in top_nodes:
	110	+ data = G.nodes[name]
	111	+ ntype = data.get("type", "concept")
	112	+ safe_id = _sanitize_id(name)
	113	+ safe_name = name.replace('"', "'")
	114	+ lines.append(f' {safe_id}["{safe_name}"]:::{ntype}')
	115	+
	116	+ added = set()
	117	+ for src, tgt, data in G.edges(data=True):
	118	+ if src in top_nodes and tgt in top_nodes:
	119	+ rtype = data.get("type", "related_to")
	120	+ key = (src, tgt, rtype)
	121	+ if key not in added:
	122	+ lines.append(f' {_sanitize_id(src)} -- "{rtype}" --> {_sanitize_id(tgt)}')
	123	+ added.add(key)
	124	+
	125	+ lines.append(" classDef person fill:#f9d5e5,stroke:#333,stroke-width:1px")
	126	+ lines.append(" classDef concept fill:#eeeeee,stroke:#333,stroke-width:1px")
	127	+ lines.append(" classDef technology fill:#d5e5f9,stroke:#333,stroke-width:1px")
	128	+ lines.append(" classDef organization fill:#f9f5d5,stroke:#333,stroke-width:1px")
	129	+ lines.append(" classDef diagram fill:#d5f9e5,stroke:#333,stroke-width:1px")
	130	+ lines.append(" classDef time fill:#e5d5f9,stroke:#333,stroke-width:1px")
	131	+
	132	+ return "\n".join(lines)
	133	+
	134	+
	135	+def graph_to_d3_json(G: "nx.DiGraph") -> dict:
	136	+ """Export to D3-compatible format.
	137	+
	138	+ Returns {"nodes": [{"id": ..., "group": ...}], "links": [...]}.
	139	+ """
	140	+ nodes = []
	141	+ for name, data in G.nodes(data=True):
	142	+ nodes.append(
	143	+ {
	144	+ "id": name,
	145	+ "group": data.get("type", "concept"),
	146	+ "descriptions": data.get("descriptions", []),
	147	+ }
	148	+ )
	149	+
	150	+ links = []
	151	+ for src, tgt, data in G.edges(data=True):
	152	+ links.append(
	153	+ {
	154	+ "source": src,
	155	+ "target": tgt,
	156	+ "type": data.get("type", "related_to"),
	157	+ }
	158	+ )
	159	+
	160	+ return {"nodes": nodes, "links": links}
	161	+
	162	+
	163	+def graph_to_dot(G: "nx.DiGraph") -> str:
	164	+ """Export to Graphviz DOT format."""
	165	+ lines = ["digraph KnowledgeGraph {"]
	166	+ lines.append(" rankdir=LR;")
	167	+ lines.append(' node [shape=box, style="rounded,filled", fontname="Helvetica"];')
	168	+ lines.append("")
	169	+
	170	+ type_colors = {
	171	+ "person": "#f9d5e5",
	172	+ "concept": "#eeeeee",
	173	+ "technology": "#d5e5f9",
	174	+ "organization": "#f9f5d5",
	175	+ "diagram": "#d5f9e5",
	176	+ "time": "#e5d5f9",
	177	+ }
	178	+
	179	+ for name, data in G.nodes(data=True):
	180	+ ntype = data.get("type", "concept")
	181	+ color = type_colors.get(ntype, "#eeeeee")
	182	+ escaped = name.replace('"', '\\"')
	183	+ lines.append(f' "{escaped}" [fillcolor="{color}", label="{escaped}"];')
	184	+
	185	+ lines.append("")
	186	+ for src, tgt, data in G.edges(data=True):
	187	+ rtype = data.get("type", "related_to")
	188	+ escaped_src = src.replace('"', '\\"')
	189	+ escaped_tgt = tgt.replace('"', '\\"')
	190	+ escaped_type = rtype.replace('"', '\\"')
	191	+ lines.append(f' "{escaped_src}" -> "{escaped_tgt}" [label="{escaped_type}"];')
	192	+
	193	+ lines.append("}")
	194	+ return "\n".join(lines)
0	195

	--- video_processor/utils/visualization.py
	+++ video_processor/utils/visualization.py
	@@ -0,0 +1,194 @@


































































































































































































0

	--- video_processor/utils/visualization.py
	+++ video_processor/utils/visualization.py
	@@ -0,0 +1,194 @@
1	"""Graph visualization and analysis utilities using NetworkX."""
2
3	from typing import Dict, List, Optional
4
5	try:
6	import networkx as nx
7	except ImportError:
8	raise ImportError(
9	"networkx is required for graph visualization. Install it with: pip install networkx"
10	)
11
12
13	def graph_to_networkx(kg_data: dict) -> "nx.DiGraph":
14	"""Convert knowledge graph dict (from to_dict()) to NetworkX directed graph.
15
16	Nodes get attributes: type, descriptions, source, occurrences
17	Edges get attributes: type, content_source, timestamp
18	"""
19	G = nx.DiGraph()
20
21	for node in kg_data.get("nodes", []):
22	name = node.get("name", node.get("id", ""))
23	if not name:
24	continue
25	G.add_node(
26	name,
27	type=node.get("type", "concept"),
28	descriptions=node.get("descriptions", []),
29	source=node.get("source"),
30	occurrences=node.get("occurrences", []),
31	)
32
33	for rel in kg_data.get("relationships", []):
34	src = rel.get("source", "")
35	tgt = rel.get("target", "")
36	if not src or not tgt:
37	continue
38	G.add_edge(
39	src,
40	tgt,
41	type=rel.get("type", "related_to"),
42	content_source=rel.get("content_source"),
43	timestamp=rel.get("timestamp"),
44	)
45
46	return G
47
48
49	def compute_graph_stats(G: "nx.DiGraph") -> dict:
50	"""Return graph statistics.
51
52	Keys: node_count, edge_count, density, connected_components,
53	type_breakdown, top_entities (by degree, top 10).
54	"""
55	undirected = G.to_undirected()
56	components = nx.number_connected_components(undirected) if len(G) > 0 else 0
57
58	type_breakdown: Dict[str, int] = {}
59	for _, data in G.nodes(data=True):
60	ntype = data.get("type", "concept")
61	type_breakdown[ntype] = type_breakdown.get(ntype, 0) + 1
62
63	degree_list = sorted(G.degree(), key=lambda x: x[1], reverse=True)
64	top_entities = [{"name": name, "degree": deg} for name, deg in degree_list[:10]]
65
66	return {
67	"node_count": G.number_of_nodes(),
68	"edge_count": G.number_of_edges(),
69	"density": nx.density(G),
70	"connected_components": components,
71	"type_breakdown": type_breakdown,
72	"top_entities": top_entities,
73	}
74
75
76	def filter_graph(
77	G: "nx.DiGraph",
78	entity_types: Optional[List[str]] = None,
79	min_degree: Optional[int] = None,
80	) -> "nx.DiGraph":
81	"""Return subgraph filtered by entity type list and/or minimum degree."""
82	nodes = set(G.nodes())
83
84	if entity_types is not None:
85	types_set = set(entity_types)
86	nodes = {n for n in nodes if G.nodes[n].get("type", "concept") in types_set}
87
88	if min_degree is not None:
89	nodes = {n for n in nodes if G.degree(n) >= min_degree}
90
91	return G.subgraph(nodes).copy()
92
93
94	def _sanitize_id(name: str) -> str:
95	"""Create a Mermaid-safe identifier from a node name."""
96	return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
97
98
99	def generate_mermaid(G: "nx.DiGraph", max_nodes: int = 30, layout: str = "LR") -> str:
100	"""Generate Mermaid diagram from NetworkX graph.
101
102	Selects top nodes by degree. Layout can be LR, TD, etc.
103	"""
104	degree_sorted = sorted(G.degree(), key=lambda x: x[1], reverse=True)
105	top_nodes = {name for name, _ in degree_sorted[:max_nodes]}
106
107	lines = [f"graph {layout}"]
108
109	for name in top_nodes:
110	data = G.nodes[name]
111	ntype = data.get("type", "concept")
112	safe_id = _sanitize_id(name)
113	safe_name = name.replace('"', "'")
114	lines.append(f' {safe_id}["{safe_name}"]:::{ntype}')
115
116	added = set()
117	for src, tgt, data in G.edges(data=True):
118	if src in top_nodes and tgt in top_nodes:
119	rtype = data.get("type", "related_to")
120	key = (src, tgt, rtype)
121	if key not in added:
122	lines.append(f' {_sanitize_id(src)} -- "{rtype}" --> {_sanitize_id(tgt)}')
123	added.add(key)
124
125	lines.append(" classDef person fill:#f9d5e5,stroke:#333,stroke-width:1px")
126	lines.append(" classDef concept fill:#eeeeee,stroke:#333,stroke-width:1px")
127	lines.append(" classDef technology fill:#d5e5f9,stroke:#333,stroke-width:1px")
128	lines.append(" classDef organization fill:#f9f5d5,stroke:#333,stroke-width:1px")
129	lines.append(" classDef diagram fill:#d5f9e5,stroke:#333,stroke-width:1px")
130	lines.append(" classDef time fill:#e5d5f9,stroke:#333,stroke-width:1px")
131
132	return "\n".join(lines)
133
134
135	def graph_to_d3_json(G: "nx.DiGraph") -> dict:
136	"""Export to D3-compatible format.
137
138	Returns {"nodes": [{"id": ..., "group": ...}], "links": [...]}.
139	"""
140	nodes = []
141	for name, data in G.nodes(data=True):
142	nodes.append(
143	{
144	"id": name,
145	"group": data.get("type", "concept"),
146	"descriptions": data.get("descriptions", []),
147	}
148	)
149
150	links = []
151	for src, tgt, data in G.edges(data=True):
152	links.append(
153	{
154	"source": src,
155	"target": tgt,
156	"type": data.get("type", "related_to"),
157	}
158	)
159
160	return {"nodes": nodes, "links": links}
161
162
163	def graph_to_dot(G: "nx.DiGraph") -> str:
164	"""Export to Graphviz DOT format."""
165	lines = ["digraph KnowledgeGraph {"]
166	lines.append(" rankdir=LR;")
167	lines.append(' node [shape=box, style="rounded,filled", fontname="Helvetica"];')
168	lines.append("")
169
170	type_colors = {
171	"person": "#f9d5e5",
172	"concept": "#eeeeee",
173	"technology": "#d5e5f9",
174	"organization": "#f9f5d5",
175	"diagram": "#d5f9e5",
176	"time": "#e5d5f9",
177	}
178
179	for name, data in G.nodes(data=True):
180	ntype = data.get("type", "concept")
181	color = type_colors.get(ntype, "#eeeeee")
182	escaped = name.replace('"', '\\"')
183	lines.append(f' "{escaped}" [fillcolor="{color}", label="{escaped}"];')
184
185	lines.append("")
186	for src, tgt, data in G.edges(data=True):
187	rtype = data.get("type", "related_to")
188	escaped_src = src.replace('"', '\\"')
189	escaped_tgt = tgt.replace('"', '\\"')
190	escaped_type = rtype.replace('"', '\\"')
191	lines.append(f' "{escaped_src}" -> "{escaped_tgt}" [label="{escaped_type}"];')
192
193	lines.append("}")
194	return "\n".join(lines)
195

PlanOpticon

Keyboard Shortcuts