PlanOpticon

planopticon / tests / test_knowledge_graph.py
Blame History Raw 539 lines
1
"""Tests for the KnowledgeGraph class."""
2
3
import json
4
from unittest.mock import MagicMock, patch
5
6
import pytest
7
8
from video_processor.integrators.knowledge_graph import KnowledgeGraph
9
10
11
@pytest.fixture
12
def mock_pm():
13
"""A mock ProviderManager that returns predictable JSON from chat()."""
14
pm = MagicMock()
15
pm.chat.return_value = json.dumps(
16
{
17
"entities": [
18
{"name": "Python", "type": "technology", "description": "A programming language"},
19
{"name": "Alice", "type": "person", "description": "Lead developer"},
20
],
21
"relationships": [
22
{"source": "Alice", "target": "Python", "type": "uses"},
23
],
24
}
25
)
26
return pm
27
28
29
@pytest.fixture
30
def kg_no_provider():
31
"""KnowledgeGraph with no provider (in-memory store)."""
32
return KnowledgeGraph()
33
34
35
@pytest.fixture
36
def kg_with_provider(mock_pm):
37
"""KnowledgeGraph with a mock provider (in-memory store)."""
38
return KnowledgeGraph(provider_manager=mock_pm)
39
40
41
class TestCreation:
42
def test_create_without_db_path(self):
43
kg = KnowledgeGraph()
44
assert kg.pm is None
45
assert kg._store.get_entity_count() == 0
46
assert kg._store.get_relationship_count() == 0
47
48
def test_create_with_db_path(self, tmp_path):
49
db_path = tmp_path / "test.db"
50
kg = KnowledgeGraph(db_path=db_path)
51
assert kg._store.get_entity_count() == 0
52
assert db_path.exists()
53
54
def test_create_with_provider(self, mock_pm):
55
kg = KnowledgeGraph(provider_manager=mock_pm)
56
assert kg.pm is mock_pm
57
58
59
class TestProcessTranscript:
60
def test_process_transcript_extracts_entities(self, kg_with_provider, mock_pm):
61
transcript = {
62
"segments": [
63
{"text": "Alice is using Python for the project", "start": 0.0, "speaker": "Alice"},
64
{"text": "It works great for data processing", "start": 5.0},
65
]
66
}
67
kg_with_provider.process_transcript(transcript)
68
69
# The mock returns Python and Alice as entities
70
nodes = kg_with_provider.nodes
71
assert "Python" in nodes
72
assert "Alice" in nodes
73
assert nodes["Python"]["type"] == "technology"
74
75
def test_process_transcript_registers_speakers(self, kg_with_provider):
76
transcript = {
77
"segments": [
78
{"text": "Hello everyone", "start": 0.0, "speaker": "Bob"},
79
]
80
}
81
kg_with_provider.process_transcript(transcript)
82
assert kg_with_provider._store.has_entity("Bob")
83
84
def test_process_transcript_missing_segments(self, kg_with_provider):
85
"""Should log warning and return without error."""
86
kg_with_provider.process_transcript({})
87
assert kg_with_provider._store.get_entity_count() == 0
88
89
def test_process_transcript_empty_text_skipped(self, kg_with_provider, mock_pm):
90
transcript = {
91
"segments": [
92
{"text": " ", "start": 0.0},
93
]
94
}
95
kg_with_provider.process_transcript(transcript)
96
# chat should not be called for empty batches (speaker registration may still happen)
97
mock_pm.chat.assert_not_called()
98
99
def test_process_transcript_batching(self, kg_with_provider, mock_pm):
100
"""With batch_size=2, 5 segments should produce 3 batches."""
101
segments = [{"text": f"Segment {i}", "start": float(i)} for i in range(5)]
102
transcript = {"segments": segments}
103
kg_with_provider.process_transcript(transcript, batch_size=2)
104
assert mock_pm.chat.call_count == 3
105
106
107
class TestProcessDiagrams:
108
def test_process_diagrams_with_text(self, kg_with_provider, mock_pm):
109
diagrams = [
110
{"text_content": "Architecture shows Python microservices", "frame_index": 0},
111
]
112
kg_with_provider.process_diagrams(diagrams)
113
114
# Should have called chat once for the text content
115
assert mock_pm.chat.call_count == 1
116
# diagram_0 entity should exist
117
assert kg_with_provider._store.has_entity("diagram_0")
118
119
def test_process_diagrams_without_text(self, kg_with_provider, mock_pm):
120
diagrams = [
121
{"text_content": "", "frame_index": 5},
122
]
123
kg_with_provider.process_diagrams(diagrams)
124
# No chat call for empty text
125
mock_pm.chat.assert_not_called()
126
# But diagram entity still created
127
assert kg_with_provider._store.has_entity("diagram_0")
128
129
def test_process_multiple_diagrams(self, kg_with_provider, mock_pm):
130
diagrams = [
131
{"text_content": "Diagram A content", "frame_index": 0},
132
{"text_content": "Diagram B content", "frame_index": 10},
133
]
134
kg_with_provider.process_diagrams(diagrams)
135
assert kg_with_provider._store.has_entity("diagram_0")
136
assert kg_with_provider._store.has_entity("diagram_1")
137
138
139
class TestProcessScreenshots:
140
@pytest.fixture
141
def mock_pm(self):
142
pm = MagicMock()
143
pm.chat.return_value = json.dumps(
144
[
145
{"name": "Python", "type": "technology", "description": "Language"},
146
{"name": "Flask", "type": "technology", "description": "Framework"},
147
]
148
)
149
return pm
150
151
@pytest.fixture
152
def kg_with_provider(self, mock_pm):
153
return KnowledgeGraph(provider_manager=mock_pm)
154
155
def test_process_screenshots_with_text(self, kg_with_provider, mock_pm):
156
screenshots = [
157
{
158
"text_content": "import flask\napp = Flask(__name__)",
159
"content_type": "code",
160
"entities": ["Flask", "Python"],
161
"frame_index": 3,
162
},
163
]
164
kg_with_provider.process_screenshots(screenshots)
165
# LLM extraction from text_content
166
mock_pm.chat.assert_called()
167
# Explicitly listed entities should be added
168
assert kg_with_provider._store.has_entity("Flask")
169
assert kg_with_provider._store.has_entity("Python")
170
171
def test_process_screenshots_without_text(self, kg_with_provider, mock_pm):
172
screenshots = [
173
{
174
"text_content": "",
175
"content_type": "other",
176
"entities": ["Docker"],
177
"frame_index": 5,
178
},
179
]
180
kg_with_provider.process_screenshots(screenshots)
181
# No chat call for empty text
182
mock_pm.chat.assert_not_called()
183
# But explicit entities still added
184
assert kg_with_provider._store.has_entity("Docker")
185
186
def test_process_screenshots_empty_entities(self, kg_with_provider):
187
screenshots = [
188
{
189
"text_content": "",
190
"content_type": "slide",
191
"entities": [],
192
"frame_index": 0,
193
},
194
]
195
kg_with_provider.process_screenshots(screenshots)
196
# No crash, no entities added
197
198
def test_process_screenshots_filters_short_names(self, kg_with_provider):
199
screenshots = [
200
{
201
"text_content": "",
202
"entities": ["A", "Go", "Python"],
203
"frame_index": 0,
204
},
205
]
206
kg_with_provider.process_screenshots(screenshots)
207
# "A" is too short (< 2 chars), filtered out
208
assert not kg_with_provider._store.has_entity("A")
209
assert kg_with_provider._store.has_entity("Go")
210
assert kg_with_provider._store.has_entity("Python")
211
212
213
class TestToDictFromDict:
214
def test_round_trip_empty(self):
215
kg = KnowledgeGraph()
216
data = kg.to_dict()
217
kg2 = KnowledgeGraph.from_dict(data)
218
assert kg2._store.get_entity_count() == 0
219
assert kg2._store.get_relationship_count() == 0
220
221
def test_round_trip_with_entities(self, kg_with_provider, mock_pm):
222
# Add some content to populate the graph
223
kg_with_provider.add_content("Alice uses Python", "test_source")
224
original = kg_with_provider.to_dict()
225
226
restored = KnowledgeGraph.from_dict(original)
227
restored_dict = restored.to_dict()
228
229
assert len(restored_dict["nodes"]) == len(original["nodes"])
230
assert len(restored_dict["relationships"]) == len(original["relationships"])
231
232
original_names = {n["name"] for n in original["nodes"]}
233
restored_names = {n["name"] for n in restored_dict["nodes"]}
234
assert original_names == restored_names
235
236
def test_round_trip_with_sources(self):
237
kg = KnowledgeGraph()
238
kg.register_source(
239
{
240
"source_id": "src1",
241
"source_type": "video",
242
"title": "Test Video",
243
"ingested_at": "2025-01-01T00:00:00",
244
}
245
)
246
data = kg.to_dict()
247
assert "sources" in data
248
assert data["sources"][0]["source_id"] == "src1"
249
250
kg2 = KnowledgeGraph.from_dict(data)
251
sources = kg2._store.get_sources()
252
assert len(sources) == 1
253
assert sources[0]["source_id"] == "src1"
254
255
def test_from_dict_with_db_path(self, tmp_path):
256
data = {
257
"nodes": [
258
{"name": "TestEntity", "type": "concept", "descriptions": ["A test"]},
259
],
260
"relationships": [],
261
}
262
db_path = tmp_path / "restored.db"
263
kg = KnowledgeGraph.from_dict(data, db_path=db_path)
264
assert kg._store.has_entity("TestEntity")
265
assert db_path.exists()
266
267
268
class TestSave:
269
def test_save_json(self, tmp_path, kg_with_provider, mock_pm):
270
kg_with_provider.add_content("Alice uses Python", "source1")
271
path = tmp_path / "graph.json"
272
result = kg_with_provider.save(path)
273
274
assert result == path
275
assert path.exists()
276
data = json.loads(path.read_text())
277
assert "nodes" in data
278
assert "relationships" in data
279
280
def test_save_db(self, tmp_path, kg_with_provider, mock_pm):
281
kg_with_provider.add_content("Alice uses Python", "source1")
282
path = tmp_path / "graph.db"
283
result = kg_with_provider.save(path)
284
285
assert result == path
286
assert path.exists()
287
288
def test_save_no_suffix_defaults_to_db(self, tmp_path, kg_with_provider, mock_pm):
289
kg_with_provider.add_content("Alice uses Python", "source1")
290
path = tmp_path / "graph"
291
result = kg_with_provider.save(path)
292
assert result.suffix == ".db"
293
assert result.exists()
294
295
def test_save_creates_parent_dirs(self, tmp_path, kg_with_provider, mock_pm):
296
kg_with_provider.add_content("Alice uses Python", "source1")
297
path = tmp_path / "nested" / "dir" / "graph.json"
298
result = kg_with_provider.save(path)
299
assert result.exists()
300
301
def test_save_unknown_suffix_falls_back_to_json(self, tmp_path):
302
kg = KnowledgeGraph()
303
kg._store.merge_entity("TestNode", "concept", ["test"])
304
path = tmp_path / "graph.xyz"
305
result = kg.save(path)
306
assert result.exists()
307
# Should be valid JSON
308
data = json.loads(path.read_text())
309
assert "nodes" in data
310
311
312
class TestMerge:
313
def test_merge_disjoint(self):
314
kg1 = KnowledgeGraph()
315
kg1._store.merge_entity("Alice", "person", ["Developer"])
316
317
kg2 = KnowledgeGraph()
318
kg2._store.merge_entity("Bob", "person", ["Manager"])
319
320
kg1.merge(kg2)
321
assert kg1._store.has_entity("Alice")
322
assert kg1._store.has_entity("Bob")
323
assert kg1._store.get_entity_count() == 2
324
325
def test_merge_overlapping_entities_descriptions_merged(self):
326
kg1 = KnowledgeGraph()
327
kg1._store.merge_entity("Python", "concept", ["A language"])
328
329
kg2 = KnowledgeGraph()
330
kg2._store.merge_entity("Python", "technology", ["Programming language"])
331
332
kg1.merge(kg2)
333
entity = kg1._store.get_entity("Python")
334
# Descriptions from both should be present
335
descs = entity["descriptions"]
336
if isinstance(descs, set):
337
descs = list(descs)
338
assert "A language" in descs
339
assert "Programming language" in descs
340
341
def test_merge_overlapping_entities_with_sqlite(self, tmp_path):
342
"""SQLiteStore does update type on merge_entity, so type resolution works there."""
343
kg1 = KnowledgeGraph(db_path=tmp_path / "kg1.db")
344
kg1._store.merge_entity("Python", "concept", ["A language"])
345
346
kg2 = KnowledgeGraph(db_path=tmp_path / "kg2.db")
347
kg2._store.merge_entity("Python", "technology", ["Programming language"])
348
349
kg1.merge(kg2)
350
entity = kg1._store.get_entity("Python")
351
# SQLiteStore overwrites type — merge resolves to more specific
352
# (The merge method computes the resolved type and passes it to merge_entity,
353
# but InMemoryStore ignores type for existing entities while SQLiteStore does not)
354
assert entity is not None
355
assert kg1._store.get_entity_count() == 1
356
357
def test_merge_fuzzy_match(self):
358
kg1 = KnowledgeGraph()
359
kg1._store.merge_entity("JavaScript", "technology", ["A language"])
360
361
kg2 = KnowledgeGraph()
362
kg2._store.merge_entity("Javascript", "technology", ["Web language"])
363
364
kg1.merge(kg2)
365
# Should fuzzy-match and merge, not create two entities
366
assert kg1._store.get_entity_count() == 1
367
entity = kg1._store.get_entity("JavaScript")
368
assert entity is not None
369
370
def test_merge_relationships(self):
371
kg1 = KnowledgeGraph()
372
kg1._store.merge_entity("Alice", "person", [])
373
374
kg2 = KnowledgeGraph()
375
kg2._store.merge_entity("Bob", "person", [])
376
kg2._store.add_relationship("Alice", "Bob", "collaborates_with")
377
378
kg1.merge(kg2)
379
rels = kg1._store.get_all_relationships()
380
assert len(rels) == 1
381
assert rels[0]["type"] == "collaborates_with"
382
383
def test_merge_sources(self):
384
kg1 = KnowledgeGraph()
385
kg2 = KnowledgeGraph()
386
kg2.register_source(
387
{
388
"source_id": "vid2",
389
"source_type": "video",
390
"title": "Video 2",
391
"ingested_at": "2025-01-01T00:00:00",
392
}
393
)
394
kg1.merge(kg2)
395
sources = kg1._store.get_sources()
396
assert len(sources) == 1
397
assert sources[0]["source_id"] == "vid2"
398
399
def test_merge_type_specificity_with_sqlite(self, tmp_path):
400
"""Type specificity resolution works with SQLiteStore which updates type."""
401
kg1 = KnowledgeGraph(db_path=tmp_path / "kg1.db")
402
kg1._store.merge_entity("React", "concept", [])
403
404
kg2 = KnowledgeGraph(db_path=tmp_path / "kg2.db")
405
kg2._store.merge_entity("React", "technology", [])
406
407
kg1.merge(kg2)
408
entity = kg1._store.get_entity("React")
409
assert entity is not None
410
assert kg1._store.get_entity_count() == 1
411
412
413
class TestRegisterSource:
414
def test_register_and_retrieve(self):
415
kg = KnowledgeGraph()
416
source = {
417
"source_id": "src123",
418
"source_type": "video",
419
"title": "Meeting Recording",
420
"path": "/tmp/meeting.mp4",
421
"ingested_at": "2025-06-01T10:00:00",
422
}
423
kg.register_source(source)
424
sources = kg._store.get_sources()
425
assert len(sources) == 1
426
assert sources[0]["source_id"] == "src123"
427
assert sources[0]["title"] == "Meeting Recording"
428
429
def test_register_multiple_sources(self):
430
kg = KnowledgeGraph()
431
for i in range(3):
432
kg.register_source(
433
{
434
"source_id": f"src{i}",
435
"source_type": "video",
436
"title": f"Video {i}",
437
"ingested_at": "2025-01-01",
438
}
439
)
440
assert len(kg._store.get_sources()) == 3
441
442
443
class TestClassifyForPlanning:
444
@patch("video_processor.integrators.knowledge_graph.TaxonomyClassifier", create=True)
445
def test_classify_calls_taxonomy(self, mock_cls):
446
"""classify_for_planning should delegate to TaxonomyClassifier."""
447
mock_instance = MagicMock()
448
mock_instance.classify_entities.return_value = {"goals": [], "risks": []}
449
450
with patch(
451
"video_processor.integrators.taxonomy.TaxonomyClassifier",
452
return_value=mock_instance,
453
):
454
kg = KnowledgeGraph()
455
kg._store.merge_entity("Ship MVP", "concept", ["Launch the product"])
456
kg.classify_for_planning()
457
458
mock_instance.classify_entities.assert_called_once()
459
460
461
class TestExtractEntitiesAndRelationships:
462
def test_returns_entities_and_relationships(self, kg_with_provider):
463
entities, rels = kg_with_provider.extract_entities_and_relationships("Alice uses Python")
464
assert len(entities) == 2
465
assert len(rels) == 1
466
assert entities[0].name == "Python"
467
assert rels[0].source == "Alice"
468
assert rels[0].target == "Python"
469
470
def test_no_provider_returns_empty(self, kg_no_provider):
471
entities, rels = kg_no_provider.extract_entities_and_relationships("Some text")
472
assert entities == []
473
assert rels == []
474
475
def test_handles_flat_list_response(self, mock_pm):
476
"""If the model returns a flat entity list, it should still parse entities."""
477
mock_pm.chat.return_value = json.dumps(
478
[
479
{"name": "Docker", "type": "technology", "description": "Container platform"},
480
]
481
)
482
kg = KnowledgeGraph(provider_manager=mock_pm)
483
entities, rels = kg.extract_entities_and_relationships("Using Docker")
484
assert len(entities) == 1
485
assert entities[0].name == "Docker"
486
assert rels == []
487
488
def test_handles_malformed_json(self, mock_pm):
489
mock_pm.chat.return_value = "not valid json at all"
490
kg = KnowledgeGraph(provider_manager=mock_pm)
491
entities, rels = kg.extract_entities_and_relationships("text")
492
assert entities == []
493
assert rels == []
494
495
496
class TestNodeAndRelationshipProperties:
497
def test_nodes_property(self, kg_with_provider, mock_pm):
498
kg_with_provider.add_content("Alice uses Python", "src")
499
nodes = kg_with_provider.nodes
500
assert isinstance(nodes, dict)
501
for name, node in nodes.items():
502
assert "name" in node
503
assert "type" in node
504
assert "descriptions" in node
505
506
def test_relationships_property(self, kg_with_provider, mock_pm):
507
kg_with_provider.add_content("Alice uses Python", "src")
508
rels = kg_with_provider.relationships
509
assert isinstance(rels, list)
510
if rels:
511
assert "source" in rels[0]
512
assert "target" in rels[0]
513
assert "type" in rels[0]
514
515
516
class TestToData:
517
def test_to_data_returns_pydantic_model(self, kg_with_provider, mock_pm):
518
kg_with_provider.add_content("Alice uses Python", "src")
519
data = kg_with_provider.to_data()
520
from video_processor.models import KnowledgeGraphData
521
522
assert isinstance(data, KnowledgeGraphData)
523
assert len(data.nodes) > 0
524
assert all(hasattr(n, "name") for n in data.nodes)
525
526
def test_to_data_includes_sources(self):
527
kg = KnowledgeGraph()
528
kg.register_source(
529
{
530
"source_id": "s1",
531
"source_type": "video",
532
"title": "Test",
533
"ingested_at": "2025-01-01",
534
}
535
)
536
data = kg.to_data()
537
assert len(data.sources) == 1
538
assert data.sources[0].source_id == "s1"
539

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button