|
09a0b7a…
|
leo
|
1 |
"""Tests for batch processing and knowledge graph merging.""" |
|
09a0b7a…
|
leo
|
2 |
|
|
09a0b7a…
|
leo
|
3 |
import json |
|
09a0b7a…
|
leo
|
4 |
|
|
09a0b7a…
|
leo
|
5 |
from video_processor.integrators.knowledge_graph import KnowledgeGraph |
|
09a0b7a…
|
leo
|
6 |
from video_processor.integrators.plan_generator import PlanGenerator |
|
09a0b7a…
|
leo
|
7 |
from video_processor.models import ( |
|
09a0b7a…
|
leo
|
8 |
ActionItem, |
|
09a0b7a…
|
leo
|
9 |
BatchManifest, |
|
09a0b7a…
|
leo
|
10 |
BatchVideoEntry, |
|
09a0b7a…
|
leo
|
11 |
DiagramResult, |
|
09a0b7a…
|
leo
|
12 |
KeyPoint, |
|
09a0b7a…
|
leo
|
13 |
VideoManifest, |
|
09a0b7a…
|
leo
|
14 |
VideoMetadata, |
|
09a0b7a…
|
leo
|
15 |
) |
|
09a0b7a…
|
leo
|
16 |
from video_processor.output_structure import ( |
|
09a0b7a…
|
leo
|
17 |
create_batch_output_dirs, |
|
09a0b7a…
|
leo
|
18 |
read_batch_manifest, |
|
09a0b7a…
|
leo
|
19 |
write_batch_manifest, |
|
09a0b7a…
|
leo
|
20 |
) |
|
09a0b7a…
|
leo
|
21 |
|
|
09a0b7a…
|
leo
|
22 |
|
|
0ad36b7…
|
noreply
|
23 |
def _make_kg_with_entity(name, entity_type="concept", descriptions=None, occurrences=None): |
|
0ad36b7…
|
noreply
|
24 |
"""Helper to build a KnowledgeGraph with entities via the store API.""" |
|
0ad36b7…
|
noreply
|
25 |
kg = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
26 |
descs = list(descriptions) if descriptions else [] |
|
0ad36b7…
|
noreply
|
27 |
kg._store.merge_entity(name, entity_type, descs) |
|
0ad36b7…
|
noreply
|
28 |
for occ in occurrences or []: |
|
0ad36b7…
|
noreply
|
29 |
kg._store.add_occurrence(name, occ.get("source", ""), occ.get("timestamp"), occ.get("text")) |
|
0ad36b7…
|
noreply
|
30 |
return kg |
|
0ad36b7…
|
noreply
|
31 |
|
|
0ad36b7…
|
noreply
|
32 |
|
|
09a0b7a…
|
leo
|
33 |
class TestKnowledgeGraphMerge: |
|
09a0b7a…
|
leo
|
34 |
def test_merge_new_nodes(self): |
|
09a0b7a…
|
leo
|
35 |
kg1 = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
36 |
kg1._store.merge_entity("Python", "concept", ["A programming language"]) |
|
0ad36b7…
|
noreply
|
37 |
kg1._store.add_occurrence("Python", "video1") |
|
09a0b7a…
|
leo
|
38 |
|
|
09a0b7a…
|
leo
|
39 |
kg2 = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
40 |
kg2._store.merge_entity("Rust", "concept", ["A systems language"]) |
|
0ad36b7…
|
noreply
|
41 |
kg2._store.add_occurrence("Rust", "video2") |
|
09a0b7a…
|
leo
|
42 |
|
|
09a0b7a…
|
leo
|
43 |
kg1.merge(kg2) |
|
09a0b7a…
|
leo
|
44 |
assert "Python" in kg1.nodes |
|
09a0b7a…
|
leo
|
45 |
assert "Rust" in kg1.nodes |
|
09a0b7a…
|
leo
|
46 |
assert len(kg1.nodes) == 2 |
|
09a0b7a…
|
leo
|
47 |
|
|
09a0b7a…
|
leo
|
48 |
def test_merge_overlapping_nodes_case_insensitive(self): |
|
09a0b7a…
|
leo
|
49 |
kg1 = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
50 |
kg1._store.merge_entity("Python", "concept", ["Language A"]) |
|
0ad36b7…
|
noreply
|
51 |
kg1._store.add_occurrence("Python", "v1") |
|
09a0b7a…
|
leo
|
52 |
|
|
09a0b7a…
|
leo
|
53 |
kg2 = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
54 |
kg2._store.merge_entity("python", "concept", ["Language B"]) |
|
0ad36b7…
|
noreply
|
55 |
kg2._store.add_occurrence("python", "v2") |
|
09a0b7a…
|
leo
|
56 |
|
|
09a0b7a…
|
leo
|
57 |
kg1.merge(kg2) |
|
09a0b7a…
|
leo
|
58 |
# Should merge into existing node, not create duplicate |
|
09a0b7a…
|
leo
|
59 |
assert len(kg1.nodes) == 1 |
|
09a0b7a…
|
leo
|
60 |
assert "Python" in kg1.nodes |
|
09a0b7a…
|
leo
|
61 |
assert len(kg1.nodes["Python"]["occurrences"]) == 2 |
|
09a0b7a…
|
leo
|
62 |
assert "Language B" in kg1.nodes["Python"]["descriptions"] |
|
09a0b7a…
|
leo
|
63 |
|
|
09a0b7a…
|
leo
|
64 |
def test_merge_relationships(self): |
|
09a0b7a…
|
leo
|
65 |
kg1 = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
66 |
kg1._store.merge_entity("A", "concept", []) |
|
0ad36b7…
|
noreply
|
67 |
kg1._store.merge_entity("B", "concept", []) |
|
0ad36b7…
|
noreply
|
68 |
kg1._store.add_relationship("A", "B", "uses") |
|
09a0b7a…
|
leo
|
69 |
|
|
09a0b7a…
|
leo
|
70 |
kg2 = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
71 |
kg2._store.merge_entity("C", "concept", []) |
|
0ad36b7…
|
noreply
|
72 |
kg2._store.merge_entity("D", "concept", []) |
|
0ad36b7…
|
noreply
|
73 |
kg2._store.add_relationship("C", "D", "calls") |
|
09a0b7a…
|
leo
|
74 |
|
|
09a0b7a…
|
leo
|
75 |
kg1.merge(kg2) |
|
09a0b7a…
|
leo
|
76 |
assert len(kg1.relationships) == 2 |
|
09a0b7a…
|
leo
|
77 |
|
|
09a0b7a…
|
leo
|
78 |
def test_merge_empty_into_populated(self): |
|
09a0b7a…
|
leo
|
79 |
kg1 = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
80 |
kg1._store.merge_entity("X", "concept", []) |
|
0ad36b7…
|
noreply
|
81 |
|
|
09a0b7a…
|
leo
|
82 |
kg2 = KnowledgeGraph() |
|
09a0b7a…
|
leo
|
83 |
kg1.merge(kg2) |
|
09a0b7a…
|
leo
|
84 |
assert len(kg1.nodes) == 1 |
|
09a0b7a…
|
leo
|
85 |
|
|
09a0b7a…
|
leo
|
86 |
|
|
09a0b7a…
|
leo
|
87 |
class TestKnowledgeGraphFromDict: |
|
09a0b7a…
|
leo
|
88 |
def test_round_trip(self): |
|
09a0b7a…
|
leo
|
89 |
kg = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
90 |
kg._store.merge_entity("Alice", "person", ["Team lead"]) |
|
0ad36b7…
|
noreply
|
91 |
kg._store.add_occurrence("Alice", "transcript") |
|
0ad36b7…
|
noreply
|
92 |
kg._store.merge_entity("Bob", "person", []) |
|
0ad36b7…
|
noreply
|
93 |
kg._store.add_relationship("Alice", "Bob", "manages") |
|
09a0b7a…
|
leo
|
94 |
|
|
09a0b7a…
|
leo
|
95 |
data = kg.to_dict() |
|
09a0b7a…
|
leo
|
96 |
restored = KnowledgeGraph.from_dict(data) |
|
09a0b7a…
|
leo
|
97 |
assert "Alice" in restored.nodes |
|
09a0b7a…
|
leo
|
98 |
assert restored.nodes["Alice"]["type"] == "person" |
|
09a0b7a…
|
leo
|
99 |
assert len(restored.relationships) == 1 |
|
09a0b7a…
|
leo
|
100 |
|
|
09a0b7a…
|
leo
|
101 |
def test_from_dict_with_list_descriptions(self): |
|
09a0b7a…
|
leo
|
102 |
data = { |
|
09a0b7a…
|
leo
|
103 |
"nodes": [ |
|
09a0b7a…
|
leo
|
104 |
{ |
|
09a0b7a…
|
leo
|
105 |
"id": "X", |
|
09a0b7a…
|
leo
|
106 |
"name": "X", |
|
09a0b7a…
|
leo
|
107 |
"type": "concept", |
|
09a0b7a…
|
leo
|
108 |
"descriptions": ["desc1", "desc2"], |
|
09a0b7a…
|
leo
|
109 |
"occurrences": [], |
|
09a0b7a…
|
leo
|
110 |
} |
|
09a0b7a…
|
leo
|
111 |
], |
|
09a0b7a…
|
leo
|
112 |
"relationships": [], |
|
09a0b7a…
|
leo
|
113 |
} |
|
09a0b7a…
|
leo
|
114 |
kg = KnowledgeGraph.from_dict(data) |
|
09a0b7a…
|
leo
|
115 |
assert "X" in kg.nodes |
|
09a0b7a…
|
leo
|
116 |
assert "desc1" in kg.nodes["X"]["descriptions"] |
|
09a0b7a…
|
leo
|
117 |
|
|
09a0b7a…
|
leo
|
118 |
def test_from_empty_dict(self): |
|
09a0b7a…
|
leo
|
119 |
kg = KnowledgeGraph.from_dict({}) |
|
09a0b7a…
|
leo
|
120 |
assert len(kg.nodes) == 0 |
|
09a0b7a…
|
leo
|
121 |
assert len(kg.relationships) == 0 |
|
09a0b7a…
|
leo
|
122 |
|
|
09a0b7a…
|
leo
|
123 |
|
|
09a0b7a…
|
leo
|
124 |
class TestKnowledgeGraphSave: |
|
09a0b7a…
|
leo
|
125 |
def test_save_as_pydantic(self, tmp_path): |
|
09a0b7a…
|
leo
|
126 |
kg = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
127 |
kg._store.merge_entity("Test", "concept", ["A test entity"]) |
|
0ad36b7…
|
noreply
|
128 |
|
|
09a0b7a…
|
leo
|
129 |
path = kg.save(tmp_path / "kg.json") |
|
09a0b7a…
|
leo
|
130 |
assert path.exists() |
|
09a0b7a…
|
leo
|
131 |
data = json.loads(path.read_text()) |
|
09a0b7a…
|
leo
|
132 |
assert "nodes" in data |
|
09a0b7a…
|
leo
|
133 |
assert data["nodes"][0]["name"] == "Test" |
|
09a0b7a…
|
leo
|
134 |
|
|
09a0b7a…
|
leo
|
135 |
|
|
09a0b7a…
|
leo
|
136 |
class TestBatchOutputDirs: |
|
09a0b7a…
|
leo
|
137 |
def test_creates_video_dirs(self, tmp_path): |
|
09a0b7a…
|
leo
|
138 |
dirs = create_batch_output_dirs(tmp_path / "batch", "test_batch") |
|
09a0b7a…
|
leo
|
139 |
assert dirs["root"].exists() |
|
09a0b7a…
|
leo
|
140 |
assert dirs["videos"].exists() |
|
09a0b7a…
|
leo
|
141 |
|
|
09a0b7a…
|
leo
|
142 |
|
|
09a0b7a…
|
leo
|
143 |
class TestBatchManifest: |
|
09a0b7a…
|
leo
|
144 |
def test_round_trip(self, tmp_path): |
|
09a0b7a…
|
leo
|
145 |
manifest = BatchManifest( |
|
09a0b7a…
|
leo
|
146 |
title="Test Batch", |
|
09a0b7a…
|
leo
|
147 |
total_videos=2, |
|
09a0b7a…
|
leo
|
148 |
completed_videos=1, |
|
09a0b7a…
|
leo
|
149 |
failed_videos=1, |
|
09a0b7a…
|
leo
|
150 |
videos=[ |
|
09a0b7a…
|
leo
|
151 |
BatchVideoEntry( |
|
09a0b7a…
|
leo
|
152 |
video_name="v1", |
|
09a0b7a…
|
leo
|
153 |
manifest_path="videos/v1/manifest.json", |
|
09a0b7a…
|
leo
|
154 |
status="completed", |
|
09a0b7a…
|
leo
|
155 |
diagrams_count=3, |
|
09a0b7a…
|
leo
|
156 |
), |
|
09a0b7a…
|
leo
|
157 |
BatchVideoEntry( |
|
09a0b7a…
|
leo
|
158 |
video_name="v2", |
|
09a0b7a…
|
leo
|
159 |
manifest_path="videos/v2/manifest.json", |
|
09a0b7a…
|
leo
|
160 |
status="failed", |
|
09a0b7a…
|
leo
|
161 |
error="Audio extraction failed", |
|
09a0b7a…
|
leo
|
162 |
), |
|
09a0b7a…
|
leo
|
163 |
], |
|
09a0b7a…
|
leo
|
164 |
) |
|
09a0b7a…
|
leo
|
165 |
write_batch_manifest(manifest, tmp_path) |
|
09a0b7a…
|
leo
|
166 |
restored = read_batch_manifest(tmp_path) |
|
09a0b7a…
|
leo
|
167 |
assert restored.title == "Test Batch" |
|
09a0b7a…
|
leo
|
168 |
assert restored.total_videos == 2 |
|
09a0b7a…
|
leo
|
169 |
assert restored.videos[0].status == "completed" |
|
09a0b7a…
|
leo
|
170 |
assert restored.videos[1].error == "Audio extraction failed" |
|
09a0b7a…
|
leo
|
171 |
|
|
09a0b7a…
|
leo
|
172 |
|
|
09a0b7a…
|
leo
|
173 |
class TestBatchSummary: |
|
09a0b7a…
|
leo
|
174 |
def test_generate_batch_summary(self, tmp_path): |
|
09a0b7a…
|
leo
|
175 |
manifests = [ |
|
09a0b7a…
|
leo
|
176 |
VideoManifest( |
|
09a0b7a…
|
leo
|
177 |
video=VideoMetadata(title="Meeting 1", duration_seconds=3600), |
|
09a0b7a…
|
leo
|
178 |
key_points=[KeyPoint(point="Point 1")], |
|
09a0b7a…
|
leo
|
179 |
action_items=[ActionItem(action="Do X", assignee="Alice")], |
|
09a0b7a…
|
leo
|
180 |
diagrams=[DiagramResult(frame_index=0, confidence=0.9)], |
|
09a0b7a…
|
leo
|
181 |
), |
|
09a0b7a…
|
leo
|
182 |
VideoManifest( |
|
09a0b7a…
|
leo
|
183 |
video=VideoMetadata(title="Meeting 2"), |
|
09a0b7a…
|
leo
|
184 |
key_points=[KeyPoint(point="Point 2"), KeyPoint(point="Point 3")], |
|
09a0b7a…
|
leo
|
185 |
action_items=[], |
|
09a0b7a…
|
leo
|
186 |
diagrams=[], |
|
09a0b7a…
|
leo
|
187 |
), |
|
09a0b7a…
|
leo
|
188 |
] |
|
09a0b7a…
|
leo
|
189 |
|
|
09a0b7a…
|
leo
|
190 |
gen = PlanGenerator() |
|
09a0b7a…
|
leo
|
191 |
summary = gen.generate_batch_summary( |
|
09a0b7a…
|
leo
|
192 |
manifests=manifests, |
|
09a0b7a…
|
leo
|
193 |
title="Weekly Meetings", |
|
09a0b7a…
|
leo
|
194 |
output_path=tmp_path / "summary.md", |
|
09a0b7a…
|
leo
|
195 |
) |
|
09a0b7a…
|
leo
|
196 |
|
|
09a0b7a…
|
leo
|
197 |
assert "Weekly Meetings" in summary |
|
09a0b7a…
|
leo
|
198 |
assert "2" in summary # 2 videos |
|
09a0b7a…
|
leo
|
199 |
assert "Meeting 1" in summary |
|
09a0b7a…
|
leo
|
200 |
assert "Meeting 2" in summary |
|
09a0b7a…
|
leo
|
201 |
assert "Do X" in summary |
|
09a0b7a…
|
leo
|
202 |
assert "Alice" in summary |
|
09a0b7a…
|
leo
|
203 |
assert (tmp_path / "summary.md").exists() |
|
09a0b7a…
|
leo
|
204 |
|
|
09a0b7a…
|
leo
|
205 |
def test_batch_summary_with_kg(self, tmp_path): |
|
09a0b7a…
|
leo
|
206 |
manifests = [ |
|
09a0b7a…
|
leo
|
207 |
VideoManifest(video=VideoMetadata(title="V1")), |
|
09a0b7a…
|
leo
|
208 |
] |
|
09a0b7a…
|
leo
|
209 |
kg = KnowledgeGraph() |
|
0ad36b7…
|
noreply
|
210 |
kg._store.merge_entity("Test", "concept", []) |
|
0ad36b7…
|
noreply
|
211 |
kg._store.add_relationship("Test", "Test", "self") |
|
09a0b7a…
|
leo
|
212 |
|
|
09a0b7a…
|
leo
|
213 |
gen = PlanGenerator() |
|
09a0b7a…
|
leo
|
214 |
summary = gen.generate_batch_summary( |
|
09a0b7a…
|
leo
|
215 |
manifests=manifests, kg=kg, output_path=tmp_path / "s.md" |
|
09a0b7a…
|
leo
|
216 |
) |
|
09a0b7a…
|
leo
|
217 |
assert "Knowledge Graph" in summary |
|
09a0b7a…
|
leo
|
218 |
assert "mermaid" in summary |