PlanOpticon

planopticon / tests / test_pipeline.py

Blame History Raw 535 lines

1	`"""Tests for the core video processing pipeline."""`
2
3	`import json`
4	`from pathlib import Path`
5	`from unittest.mock import MagicMock, patch`
6
7	`import pytest`
8
9	`from video_processor.pipeline import (`
10	`_extract_action_items,`
11	`_extract_key_points,`
12	`_format_srt_time,`
13	`process_single_video,`
14	`)`
15
16
17	`class TestFormatSrtTime:`
18	`def test_zero(self):`
19	`assert _format_srt_time(0) == "00:00:00,000"`
20
21	`def test_seconds(self):`
22	`assert _format_srt_time(5.5) == "00:00:05,500"`
23
24	`def test_minutes(self):`
25	`assert _format_srt_time(90.0) == "00:01:30,000"`
26
27	`def test_hours(self):`
28	`assert _format_srt_time(3661.123) == "01:01:01,123"`
29
30	`def test_large_value(self):`
31	`result = _format_srt_time(7200.0)`
32	`assert result == "02:00:00,000"`
33
34
35	`class TestExtractKeyPoints:`
36	`def test_parses_valid_response(self):`
37	`pm = MagicMock()`
38	`pm.chat.return_value = json.dumps(`
39	`[`
40	`{"point": "Main point", "topic": "Architecture", "details": "Some details"},`
41	`{"point": "Second point", "topic": None, "details": None},`
42	`]`
43	`)`
44	`result = _extract_key_points(pm, "Some transcript text here")`
45	`assert len(result) == 2`
46	`assert result[0].point == "Main point"`
47	`assert result[0].topic == "Architecture"`
48	`assert result[1].point == "Second point"`
49
50	`def test_skips_invalid_items(self):`
51	`pm = MagicMock()`
52	`pm.chat.return_value = json.dumps(`
53	`[`
54	`{"point": "Valid", "topic": None},`
55	`{"topic": "No point field"},`
56	`{"point": "", "topic": "Empty point"},`
57	`]`
58	`)`
59	`result = _extract_key_points(pm, "text")`
60	`assert len(result) == 1`
61	`assert result[0].point == "Valid"`
62
63	`def test_handles_error(self):`
64	`pm = MagicMock()`
65	`pm.chat.side_effect = Exception("API error")`
66	`result = _extract_key_points(pm, "text")`
67	`assert result == []`
68
69	`def test_handles_non_list_response(self):`
70	`pm = MagicMock()`
71	`pm.chat.return_value = '{"not": "a list"}'`
72	`result = _extract_key_points(pm, "text")`
73	`assert result == []`
74
75
76	`class TestExtractActionItems:`
77	`def test_parses_valid_response(self):`
78	`pm = MagicMock()`
79	`pm.chat.return_value = json.dumps(`
80	`[`
81	`{`
82	`"action": "Deploy fix",`
83	`"assignee": "Bob",`
84	`"deadline": "Friday",`
85	`"priority": "high",`
86	`"context": "Production",`
87	`},`
88	`]`
89	`)`
90	`result = _extract_action_items(pm, "Some transcript text")`
91	`assert len(result) == 1`
92	`assert result[0].action == "Deploy fix"`
93	`assert result[0].assignee == "Bob"`
94
95	`def test_skips_invalid_items(self):`
96	`pm = MagicMock()`
97	`pm.chat.return_value = json.dumps(`
98	`[`
99	`{"action": "Valid action"},`
100	`{"assignee": "No action field"},`
101	`{"action": ""},`
102	`]`
103	`)`
104	`result = _extract_action_items(pm, "text")`
105	`assert len(result) == 1`
106
107	`def test_handles_error(self):`
108	`pm = MagicMock()`
109	`pm.chat.side_effect = Exception("API down")`
110	`result = _extract_action_items(pm, "text")`
111	`assert result == []`
112
113
114	`# ---------------------------------------------------------------------------`
115	`# process_single_video tests (heavily mocked)`
116	`# ---------------------------------------------------------------------------`
117
118
119	`def _make_mock_pm():`
120	`"""Build a mock ProviderManager with usage tracker and predictable responses."""`
121	`pm = MagicMock()`
122
123	`# Usage tracker stub`
124	`pm.usage = MagicMock()`
125	`pm.usage.start_step = MagicMock()`
126	`pm.usage.end_step = MagicMock()`
127
128	`# transcribe_audio returns a simple transcript`
129	`pm.transcribe_audio.return_value = {`
130	`"text": "Alice discussed the Python deployment strategy with Bob.",`
131	`"segments": [`
132	`{"start": 0.0, "end": 5.0, "text": "Alice discussed the Python deployment strategy."},`
133	`{"start": 5.0, "end": 10.0, "text": "Bob agreed on the timeline."},`
134	`],`
135	`"duration": 10.0,`
136	`"language": "en",`
137	`"provider": "mock",`
138	`"model": "mock-whisper",`
139	`}`
140
141	`# chat returns predictable JSON depending on the call`
142	`def _chat_side_effect(messages, **kwargs):`
143	`content = messages[0]["content"] if messages else ""`
144	`if "key points" in content.lower():`
145	`return json.dumps(`
146	`[{"point": "Deployment strategy discussed", "topic": "DevOps", "details": "Python"}]`
147	`)`
148	`if "action items" in content.lower():`
149	`return json.dumps(`
150	`[{"action": "Deploy to production", "assignee": "Bob", "priority": "high"}]`
151	`)`
152	`# Default: entity extraction for knowledge graph`
153	`return json.dumps(`
154	`{`
155	`"entities": [`
156	`{"name": "Python", "type": "technology", "description": "Programming language"},`
157	`{"name": "Alice", "type": "person", "description": "Engineer"},`
158	`],`
159	`"relationships": [`
160	`{"source": "Alice", "target": "Python", "type": "uses"},`
161	`],`
162	`}`
163	`)`
164
165	`pm.chat.side_effect = _chat_side_effect`
166	`pm.get_models_used.return_value = {"chat": "mock-gpt", "transcription": "mock-whisper"}`
167	`return pm`
168
169
170	`def _make_tqdm_passthrough(mock_tqdm):`
171	`"""Configure mock tqdm to pass through iterables while supporting .set_description() etc."""`
172
173	`def _tqdm_side_effect(iterable, **kw):`
174	`wrapper = MagicMock()`
175	`wrapper.__iter__ = lambda self: iter(iterable)`
176	`return wrapper`
177
178	`mock_tqdm.side_effect = _tqdm_side_effect`
179
180
181	`def _create_fake_video(path: Path) -> Path:`
182	`"""Create a tiny file that stands in for a video (all extractors are mocked)."""`
183	`path.parent.mkdir(parents=True, exist_ok=True)`
184	`path.write_bytes(b"\x00" * 64)`
185	`return path`
186
187
188	`class TestProcessSingleVideo:`
189	`"""Integration-level tests for process_single_video with heavy mocking."""`
190
191	`@pytest.fixture`
192	`def setup(self, tmp_path):`
193	`"""Create fake video, output dir, and mock PM."""`
194	`video_path = _create_fake_video(tmp_path / "input" / "meeting.mp4")`
195	`output_dir = tmp_path / "output"`
196	`pm = _make_mock_pm()`
197	`return video_path, output_dir, pm`
198
199	`@patch("video_processor.pipeline.export_all_formats")`
200	`@patch("video_processor.pipeline.PlanGenerator")`
201	`@patch("video_processor.pipeline.DiagramAnalyzer")`
202	`@patch("video_processor.pipeline.AudioExtractor")`
203	`@patch("video_processor.pipeline.filter_people_frames")`
204	`@patch("video_processor.pipeline.save_frames")`
205	`@patch("video_processor.pipeline.extract_frames")`
206	`@patch("video_processor.pipeline.tqdm")`
207	`def test_returns_manifest(`
208	`self,`
209	`mock_tqdm,`
210	`mock_extract_frames,`
211	`mock_save_frames,`
212	`mock_filter_people,`
213	`mock_audio_extractor_cls,`
214	`mock_diagram_analyzer_cls,`
215	`mock_plan_gen_cls,`
216	`mock_export,`
217	`setup,`
218	`):`
219	`video_path, output_dir, pm = setup`
220
221	`# tqdm pass-through`
222	`_make_tqdm_passthrough(mock_tqdm)`
223
224	`# Frame extraction mocks`
225	`mock_extract_frames.return_value = [b"fake_frame_1", b"fake_frame_2"]`
226	`mock_filter_people.return_value = ([b"fake_frame_1", b"fake_frame_2"], 0)`
227
228	`frames_dir = output_dir / "frames"`
229	`frames_dir.mkdir(parents=True, exist_ok=True)`
230	`frame_paths = []`
231	`for i in range(2):`
232	`fp = frames_dir / f"frame_{i:04d}.jpg"`
233	`fp.write_bytes(b"\xff")`
234	`frame_paths.append(fp)`
235	`mock_save_frames.return_value = frame_paths`
236
237	`# Audio extractor mock`
238	`audio_ext = MagicMock()`
239	`audio_ext.extract_audio.return_value = output_dir / "audio" / "meeting.wav"`
240	`audio_ext.get_audio_properties.return_value = {"duration": 10.0}`
241	`mock_audio_extractor_cls.return_value = audio_ext`
242
243	`# Diagram analyzer mock`
244	`diag_analyzer = MagicMock()`
245	`diag_analyzer.process_frames.return_value = ([], [])`
246	`mock_diagram_analyzer_cls.return_value = diag_analyzer`
247
248	`# Plan generator mock`
249	`plan_gen = MagicMock()`
250	`mock_plan_gen_cls.return_value = plan_gen`
251
252	`# export_all_formats returns the manifest it receives`
253	`mock_export.side_effect = lambda out_dir, manifest: manifest`
254
255	`manifest = process_single_video(`
256	`input_path=video_path,`
257	`output_dir=output_dir,`
258	`provider_manager=pm,`
259	`depth="standard",`
260	`)`
261
262	`from video_processor.models import VideoManifest`
263
264	`assert isinstance(manifest, VideoManifest)`
265	`assert manifest.video.title == "Analysis of meeting"`
266	`assert manifest.stats.frames_extracted == 2`
267	`assert manifest.transcript_json == "transcript/transcript.json"`
268	`assert manifest.knowledge_graph_json == "results/knowledge_graph.json"`
269
270	`@patch("video_processor.pipeline.export_all_formats")`
271	`@patch("video_processor.pipeline.PlanGenerator")`
272	`@patch("video_processor.pipeline.DiagramAnalyzer")`
273	`@patch("video_processor.pipeline.AudioExtractor")`
274	`@patch("video_processor.pipeline.filter_people_frames")`
275	`@patch("video_processor.pipeline.save_frames")`
276	`@patch("video_processor.pipeline.extract_frames")`
277	`@patch("video_processor.pipeline.tqdm")`
278	`def test_creates_output_directories(`
279	`self,`
280	`mock_tqdm,`
281	`mock_extract_frames,`
282	`mock_save_frames,`
283	`mock_filter_people,`
284	`mock_audio_extractor_cls,`
285	`mock_diagram_analyzer_cls,`
286	`mock_plan_gen_cls,`
287	`mock_export,`
288	`setup,`
289	`):`
290	`video_path, output_dir, pm = setup`
291
292	`_make_tqdm_passthrough(mock_tqdm)`
293	`mock_extract_frames.return_value = []`
294	`mock_filter_people.return_value = ([], 0)`
295	`mock_save_frames.return_value = []`
296
297	`audio_ext = MagicMock()`
298	`audio_ext.extract_audio.return_value = output_dir / "audio" / "meeting.wav"`
299	`audio_ext.get_audio_properties.return_value = {"duration": 5.0}`
300	`mock_audio_extractor_cls.return_value = audio_ext`
301
302	`diag_analyzer = MagicMock()`
303	`diag_analyzer.process_frames.return_value = ([], [])`
304	`mock_diagram_analyzer_cls.return_value = diag_analyzer`
305
306	`plan_gen = MagicMock()`
307	`mock_plan_gen_cls.return_value = plan_gen`
308
309	`mock_export.side_effect = lambda out_dir, manifest: manifest`
310
311	`process_single_video(`
312	`input_path=video_path,`
313	`output_dir=output_dir,`
314	`provider_manager=pm,`
315	`)`
316
317	`# Verify standard output directories were created`
318	`assert (output_dir / "transcript").is_dir()`
319	`assert (output_dir / "frames").is_dir()`
320	`assert (output_dir / "results").is_dir()`
321
322	`@patch("video_processor.pipeline.export_all_formats")`
323	`@patch("video_processor.pipeline.PlanGenerator")`
324	`@patch("video_processor.pipeline.DiagramAnalyzer")`
325	`@patch("video_processor.pipeline.AudioExtractor")`
326	`@patch("video_processor.pipeline.filter_people_frames")`
327	`@patch("video_processor.pipeline.save_frames")`
328	`@patch("video_processor.pipeline.extract_frames")`
329	`@patch("video_processor.pipeline.tqdm")`
330	`def test_resume_existing_frames(`
331	`self,`
332	`mock_tqdm,`
333	`mock_extract_frames,`
334	`mock_save_frames,`
335	`mock_filter_people,`
336	`mock_audio_extractor_cls,`
337	`mock_diagram_analyzer_cls,`
338	`mock_plan_gen_cls,`
339	`mock_export,`
340	`setup,`
341	`):`
342	`"""When frames already exist on disk, extraction should be skipped."""`
343	`video_path, output_dir, pm = setup`
344
345	`_make_tqdm_passthrough(mock_tqdm)`
346
347	`# Pre-create frames directory with existing frames`
348	`frames_dir = output_dir / "frames"`
349	`frames_dir.mkdir(parents=True, exist_ok=True)`
350	`for i in range(3):`
351	`(frames_dir / f"frame_{i:04d}.jpg").write_bytes(b"\xff")`
352
353	`audio_ext = MagicMock()`
354	`audio_ext.extract_audio.return_value = output_dir / "audio" / "meeting.wav"`
355	`audio_ext.get_audio_properties.return_value = {"duration": 10.0}`
356	`mock_audio_extractor_cls.return_value = audio_ext`
357
358	`diag_analyzer = MagicMock()`
359	`diag_analyzer.process_frames.return_value = ([], [])`
360	`mock_diagram_analyzer_cls.return_value = diag_analyzer`
361
362	`plan_gen = MagicMock()`
363	`mock_plan_gen_cls.return_value = plan_gen`
364	`mock_export.side_effect = lambda out_dir, manifest: manifest`
365
366	`manifest = process_single_video(`
367	`input_path=video_path,`
368	`output_dir=output_dir,`
369	`provider_manager=pm,`
370	`)`
371
372	`# extract_frames should NOT have been called (resume path)`
373	`mock_extract_frames.assert_not_called()`
374	`assert manifest.stats.frames_extracted == 3`
375
376	`@patch("video_processor.pipeline.export_all_formats")`
377	`@patch("video_processor.pipeline.PlanGenerator")`
378	`@patch("video_processor.pipeline.DiagramAnalyzer")`
379	`@patch("video_processor.pipeline.AudioExtractor")`
380	`@patch("video_processor.pipeline.filter_people_frames")`
381	`@patch("video_processor.pipeline.save_frames")`
382	`@patch("video_processor.pipeline.extract_frames")`
383	`@patch("video_processor.pipeline.tqdm")`
384	`def test_resume_existing_transcript(`
385	`self,`
386	`mock_tqdm,`
387	`mock_extract_frames,`
388	`mock_save_frames,`
389	`mock_filter_people,`
390	`mock_audio_extractor_cls,`
391	`mock_diagram_analyzer_cls,`
392	`mock_plan_gen_cls,`
393	`mock_export,`
394	`setup,`
395	`):`
396	`"""When transcript exists on disk, transcription should be skipped."""`
397	`video_path, output_dir, pm = setup`
398
399	`_make_tqdm_passthrough(mock_tqdm)`
400	`mock_extract_frames.return_value = []`
401	`mock_filter_people.return_value = ([], 0)`
402	`mock_save_frames.return_value = []`
403
404	`audio_ext = MagicMock()`
405	`audio_ext.extract_audio.return_value = output_dir / "audio" / "meeting.wav"`
406	`audio_ext.get_audio_properties.return_value = {"duration": 10.0}`
407	`mock_audio_extractor_cls.return_value = audio_ext`
408
409	`# Pre-create transcript file`
410	`transcript_dir = output_dir / "transcript"`
411	`transcript_dir.mkdir(parents=True, exist_ok=True)`
412	`transcript_data = {`
413	`"text": "Pre-existing transcript text.",`
414	`"segments": [{"start": 0.0, "end": 5.0, "text": "Pre-existing transcript text."}],`
415	`"duration": 5.0,`
416	`}`
417	`(transcript_dir / "transcript.json").write_text(json.dumps(transcript_data))`
418
419	`diag_analyzer = MagicMock()`
420	`diag_analyzer.process_frames.return_value = ([], [])`
421	`mock_diagram_analyzer_cls.return_value = diag_analyzer`
422
423	`plan_gen = MagicMock()`
424	`mock_plan_gen_cls.return_value = plan_gen`
425	`mock_export.side_effect = lambda out_dir, manifest: manifest`
426
427	`process_single_video(`
428	`input_path=video_path,`
429	`output_dir=output_dir,`
430	`provider_manager=pm,`
431	`)`
432
433	`# transcribe_audio should NOT have been called (resume path)`
434	`pm.transcribe_audio.assert_not_called()`
435
436	`@patch("video_processor.pipeline.export_all_formats")`
437	`@patch("video_processor.pipeline.PlanGenerator")`
438	`@patch("video_processor.pipeline.DiagramAnalyzer")`
439	`@patch("video_processor.pipeline.AudioExtractor")`
440	`@patch("video_processor.pipeline.filter_people_frames")`
441	`@patch("video_processor.pipeline.save_frames")`
442	`@patch("video_processor.pipeline.extract_frames")`
443	`@patch("video_processor.pipeline.tqdm")`
444	`def test_custom_title(`
445	`self,`
446	`mock_tqdm,`
447	`mock_extract_frames,`
448	`mock_save_frames,`
449	`mock_filter_people,`
450	`mock_audio_extractor_cls,`
451	`mock_diagram_analyzer_cls,`
452	`mock_plan_gen_cls,`
453	`mock_export,`
454	`setup,`
455	`):`
456	`video_path, output_dir, pm = setup`
457
458	`_make_tqdm_passthrough(mock_tqdm)`
459	`mock_extract_frames.return_value = []`
460	`mock_filter_people.return_value = ([], 0)`
461	`mock_save_frames.return_value = []`
462
463	`audio_ext = MagicMock()`
464	`audio_ext.extract_audio.return_value = output_dir / "audio" / "meeting.wav"`
465	`audio_ext.get_audio_properties.return_value = {"duration": 5.0}`
466	`mock_audio_extractor_cls.return_value = audio_ext`
467
468	`diag_analyzer = MagicMock()`
469	`diag_analyzer.process_frames.return_value = ([], [])`
470	`mock_diagram_analyzer_cls.return_value = diag_analyzer`
471
472	`plan_gen = MagicMock()`
473	`mock_plan_gen_cls.return_value = plan_gen`
474	`mock_export.side_effect = lambda out_dir, manifest: manifest`
475
476	`manifest = process_single_video(`
477	`input_path=video_path,`
478	`output_dir=output_dir,`
479	`provider_manager=pm,`
480	`title="My Custom Title",`
481	`)`
482
483	`assert manifest.video.title == "My Custom Title"`
484
485	`@patch("video_processor.pipeline.export_all_formats")`
486	`@patch("video_processor.pipeline.PlanGenerator")`
487	`@patch("video_processor.pipeline.DiagramAnalyzer")`
488	`@patch("video_processor.pipeline.AudioExtractor")`
489	`@patch("video_processor.pipeline.filter_people_frames")`
490	`@patch("video_processor.pipeline.save_frames")`
491	`@patch("video_processor.pipeline.extract_frames")`
492	`@patch("video_processor.pipeline.tqdm")`
493	`def test_key_points_and_action_items_extracted(`
494	`self,`
495	`mock_tqdm,`
496	`mock_extract_frames,`
497	`mock_save_frames,`
498	`mock_filter_people,`
499	`mock_audio_extractor_cls,`
500	`mock_diagram_analyzer_cls,`
501	`mock_plan_gen_cls,`
502	`mock_export,`
503	`setup,`
504	`):`
505	`video_path, output_dir, pm = setup`
506
507	`_make_tqdm_passthrough(mock_tqdm)`
508	`mock_extract_frames.return_value = []`
509	`mock_filter_people.return_value = ([], 0)`
510	`mock_save_frames.return_value = []`
511
512	`audio_ext = MagicMock()`
513	`audio_ext.extract_audio.return_value = output_dir / "audio" / "meeting.wav"`
514	`audio_ext.get_audio_properties.return_value = {"duration": 10.0}`
515	`mock_audio_extractor_cls.return_value = audio_ext`
516
517	`diag_analyzer = MagicMock()`
518	`diag_analyzer.process_frames.return_value = ([], [])`
519	`mock_diagram_analyzer_cls.return_value = diag_analyzer`
520
521	`plan_gen = MagicMock()`
522	`mock_plan_gen_cls.return_value = plan_gen`
523	`mock_export.side_effect = lambda out_dir, manifest: manifest`
524
525	`manifest = process_single_video(`
526	`input_path=video_path,`
527	`output_dir=output_dir,`
528	`provider_manager=pm,`
529	`)`
530
531	`assert len(manifest.key_points) == 1`
532	`assert manifest.key_points[0].point == "Deployment strategy discussed"`
533	`assert len(manifest.action_items) == 1`
534	`assert manifest.action_items[0].action == "Deploy to production"`
535

PlanOpticon

Keyboard Shortcuts