PlanOpticon

Phase 7: Add cross-referencing, action detection, and test coverage - ContentAnalyzer: cross-references transcript and diagram entities with LLM fuzzy matching, enriches key points with diagram links - ActionDetector: detects action items via LLM or regex patterns, merges across sources, attaches timestamps from segments - Remove legacy API modules (llm_api, vision_api, transcription_api) replaced by providers layer - Add 185 tests total (38 new), covering content_analyzer, action_detector, pipeline helpers, api_cache, prompt_templates

leo 2026-02-14 22:29 trunk

Commit ccf32cc46510c70e5d783271586988dd7599f9c58e5b5abbcff24a37a90d939b

Parent 09a0b7acefdd54e…

8 files changed +252 +36 +1 +1 +63 +146 +92 +1

+ tests/test_action_detector.py + tests/test_api_cache.py + tests/test_content_analyzer.py + tests/test_pipeline.py + tests/test_prompt_templates.py + video_processor/analyzers/action_detector.py + video_processor/analyzers/content_analyzer.py + video_processor/api/__init__.py

A tests/test_action_detector.py

+252

		--- a/tests/test_action_detector.py
		+++ b/tests/test_action_detector.py
		@@ -0,0 +1,252 @@
	1	+"""Tests for enhanced action item detection."""
	2	+
	3	+import json
	4	+from unittest.mock import MagicMock
	5	+
	6	+import pytest
	7	+
	8	+from video_processor.analyzers.action_detector import ActionDetector
	9	+from video_processor.models import ActionItem, TranscriptSegment
	10	+
	11	+
	12	+class TestPatternExtract:
	13	+ def test_detects_need_to(self):
	14	+ detector = ActionDetector()
	15	+ items = detector.detect_from_transcript(""Tests for enhanced action m detection."""
	16	+
	17	+""Tests for enhanced action itehanced action item detection."""
	18	+
	19	+import json
	20	+from unittest.mock import MagicMock
	21	+
	22	+from video_processor.analyzers.action_detector import ActionDetector
	23	+from video_processor.models import ActionItem, TranscriptSegment
	24	+
	25	+
	26	+class TestPatternExtract:
	27	+ def test_detects_need_to(self):
	28	+ detector = ActionDetector()
	29	+ items = detector.detect_from_transcript(
	30	+ "We need"Action item: set up monitterns(self):
	31	+ roadmap.")
	32	+ assert len(items) >= 1
	33	+
	34	+ def test_detects_follow_up(self):
	35	+ detector = ActionDetector()
	36	+ items = detector.detect_from_transcript("Follow up with the client about requirements.")
	37	+ assert len(items) >= 1
	38	+
	39	+ def test_detects_lets(self):
	40	+ detector = ActionDetector()
	41	+ items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
	42	+ assert len(items) >= 1
	43	+
	44	+ def test_ignores_short_sentences(self):
	45	+ detector = ActionDetector()
	46	+ items = detector.detect_from_transcript("Do it.")
	47	+ assert len(items) == 0
	48	+
	49	+ def test_no_action_patterns(self):
	50	+ detector = ActionDetec
	51	+ "con"The weather was nice to
	52	+ )
	53	+ assert len(items) == 0
	54	+
	55	+ def test_multiple_sentences(self):
	56	+ detector = Actio""Tests for enhanced actioced action item detection."""
	57	+
	58	+We need to deploy the fix. A"
	59	+ tences("The sky is blue."
	60	+ detector = ActionDetectextterns(self):
	61	+ len(items) == 2
	62	+
	63	+ def test_source_is_transcript(self):
	64	+ detector = ActionDetector()
	65	+ items = detector.detect_from_transcript("We need to fix the authentication module.")
	66	+ for item in items:
	67	+ assert item.source == "transcript"
	68	+
	69	+
	70	+class TestLLMExtract:
	71	+ def test_llm_extraction(self):
	72	+ pm = MagicMock()
	73	+ pm.chat.return_value = json.dumps([ext": Non { assert len(items) >= 1
	74	+ "assignee": "Bob","priority": "high", "Action item: set up monito}
	75	+ ])_no_match_no_context(self):
	76	+ em detection."""
	77	+
	78	+import json
	79	+from unittest.mock import MagicMock
	80	+
	81	+fro"""Tests for enhanced action item detection."""
	82	+
	83	+import json
	84	+from unittest.mock import MagicMock
	85	+
	86	+from video_processor.analyzers.action_detector import ActionDetector
	87	+from video_processor.models import ActionItem, TranscriptSegment
	88	+
	89	+
	90	+class TestPatternExtract:
	91	+ def test_detects_need_to(self):
	92	+ detector = ActionDetector()
	93	+ items = detector.detect_from_transcript(
	94	+ "We need to update the database schema before release."
	95	+ )
	96	+ assert len(items) >= 1
	97	+ assert any("database" in i.action.lower() for i in items)
	98	+
	99	+ def test_detects_should(self):
	100	+ detector = ActionDetector()
	101	+ items = detector.detect_from_transcript("Alice should review the pull request by Friday.")
	102	+ assert len(items) >= 1
	103	+
	104	+ def test_detects_action_item_keyword(self):
	105	+ detector = ActionDetector()
	106	+ items = detector.detect_from_transcript(
	107	+ "Action item: set up monitoring for the new service."
	108	+ )
	109	+ assert len(items) >= 1
	110	+
	111	+ def test_detects_follow_up(self):
	112	+ detector = ActionDetector()
	113	+ items = detector.detect_from_transcript("Follow up with the client about requirements.")
	114	+ [ext": Non"""Tests for enhanced action item dsts for enhanced action ite{"action": "", "assignee": "Bob"])_no_match_no_context(self):
	115	+ em detection."""
	116	+
	117	+import json
	118	+from unittest.mock import MagicMock
	119	+
	120	+fro"""Tests .")
	121	+ assert len(items) >= 1
	122	+
	123	+ def test_detects_lets(self):
	124	+ detector = ActionDetector()
	125	+ items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
	126	+ assert len(items) >= 1
	127	+
	128	+ def test_ignores_short_sentences(self):
	129	+ detector = ActionDetector()
	130	+ items = detector.detect_from_transcript("Do it.")
	131	+ assert len(items) == 0
	132	+
	133	+ def test_no_action_patterns(self):
	134	+ detector = ActionDetector()
	135	+ items = detector.detect_from_transcript("The weather was nice today. We had lunch at noon.")
	136	+ assert len(items) == 0
	137	+
	138	+ def test_multiple_sentences(self):
	139	+ detector = ActionDetector()
	140	+ text = "We need to deploy the fix. Alice should test it first. The sky is blue."
	141	+ items = detector.detect_from_transcript(text)
	142	+ assert len(items) == 2
	143	+
	144	+ def test_source_is_transcript(self):
	145	+ detector = ActionDetector()
	146	+ items = detector.detect_from_transcript("We need to fix the authentication module.")
	147	+ for item in items:
	148	+ assert item.source == "transcript"
	149	+
	150	+
	151	+class TestLLMExtract:
	152	+ def test_llm_extraction(self):
	153	+ pm = MagicMock()
	154	+ pm.chat.return_value = json.dumps(
	155	+ [
	156	+ {
	157	+ "action": "Deploy new version",
	158	+ "assignee": "Bob",
	159	+ "deadline": "Friday",
	160	+ "priority": "high",
	161	+ "context": "Production release",
	162	+ }
	163	+ ]
	164	+ )
	165	+ detector = ActionDetector(provider_manager=pm)
	166	+ items = detector.detect_from_transcript("Deploy new version by Friday.")
	167	+ assert len(items) == 1
	168	+ assert items[0].action == "Deploy new version"
	169	+ assert items[0].assignee == "Bob"
	170	+ assert items[0].deadline == "Friday"
	171	+ assert items[0].priority == "high"
	172	+ assert items[0].source == "transcript"
	173	+
	174	+ def test_llm_returns_empty(self):
	175	+ pm = MagicMock()
	176	+ pm.chat.return_value = "[]"
	177	+ detector = ActionDetector(provider_manager=pm)
	178	+ items = detector.detect_from_transcript("No action items here.")
	179	+ assert items == []
	180	+
	181	+ def test_llm_error_returns_empty(self):
	182	+ pm = MagicMock()
	183	+ pm.chat.side_effect = Exception("API error")
	184	+ detector = ActionDetector(provider_manager=pm)
	185	+ items = detector.detect_from_transcript("We need to fix this.")
	186	+ assert items == []
	187	+
	188	+ def test_llm_bad_json(self):
	189	+ pm = MagicMock()
	190	+ pm.chat.return_value = "not valid json"
	191	+ detector = ActionDetector(provider_manager=pm)
	192	+ items = detector.detect_from_transcript("Update the docs.")
	193	+ assert items == []
	194	+
	195	+ def test_llm_skips_items_without_action(self):
	196	+ pm = MagicMock()
	197	+ pm.chat.return_value = json.dumps(
	198	+ [
	199	+ {"action": "Valid action", "assignee": None},
	200	+ {"assignee": "Alice"}, # No action field
	201	+ {"action": "", "assignee": "Bob"}, # Empty action
	202	+ ]
	203	+ )
	204	+ detector = ActionDetector(provider_manager=pm)
	205	+ items = detector.detect_from_transcript("Some text.")
	206	+ assert len(items) == 1
	207	+ assert items[0].action == "Valid action"
	208	+
	209	+
	210	+class TestDetectFromDiagrams:
	211	+ def test_dict_diagrams(self):
	212	+ pm = MagicMock()
	213	+ pm.chat.return_value = json.dumps(
	214	+ [
	215	+ {
	216	+ "action": "Migrate database",
	217	+ "assignee": None,
	218	+ "deadline": None,
	219	+ "priority": None,
	220	+ "context": None,
	221	+ },
	222	+ ]
	223	+ )
	224	+ detector = ActionDetector(provider_manager=pm)
	225	+ diagrams = [
	226	+ {"text_content": "Step 1: Migrate database", "elements": ["DB", "Migration"]},
	227	+ ]
	228	+ items = detector.detect_from_diagrams(diagrams)
	229	+ assert len(items) == 1
	230	+ assert items[0].source == "diagram"
	231	+
	232	+ def test_object_diagrams(self):
	233	+ pm = MagicMock()
	234	+ pm.chat.return_value = json.dumps(
	235	+ [
	236	+ {
	237	+ "action": "Update API",
	238	+ "assignee": None,
	239	+ "deadline": None,
	240	+ "priority": None,
	241	+ "context": None,
	242	+ },
	243	+ ]
	244	+ )
	245	+ detector = ActionDetector(provider_manager=pm)
	246	+
	247	+ class FakeDiagram:
	248	+ text_content = "Update API endpoints"
	249	+ elements = ["API", "Gateway"]
	250	+
	251	+ items = detector.detect_from_diagrams([FakeDiagram()])
	252	+ assert len(ite

	--- a/tests/test_action_detector.py
	+++ b/tests/test_action_detector.py
	@@ -0,0 +1,252 @@

	--- a/tests/test_action_detector.py
	+++ b/tests/test_action_detector.py
	@@ -0,0 +1,252 @@
1	"""Tests for enhanced action item detection."""
2
3	import json
4	from unittest.mock import MagicMock
5
6	import pytest
7
8	from video_processor.analyzers.action_detector import ActionDetector
9	from video_processor.models import ActionItem, TranscriptSegment
10
11
12	class TestPatternExtract:
13	def test_detects_need_to(self):
14	detector = ActionDetector()
15	items = detector.detect_from_transcript(""Tests for enhanced action m detection."""
16
17	""Tests for enhanced action itehanced action item detection."""
18
19	import json
20	from unittest.mock import MagicMock
21
22	from video_processor.analyzers.action_detector import ActionDetector
23	from video_processor.models import ActionItem, TranscriptSegment
24
25
26	class TestPatternExtract:
27	def test_detects_need_to(self):
28	detector = ActionDetector()
29	items = detector.detect_from_transcript(
30	"We need"Action item: set up monitterns(self):
31	roadmap.")
32	assert len(items) >= 1
33
34	def test_detects_follow_up(self):
35	detector = ActionDetector()
36	items = detector.detect_from_transcript("Follow up with the client about requirements.")
37	assert len(items) >= 1
38
39	def test_detects_lets(self):
40	detector = ActionDetector()
41	items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
42	assert len(items) >= 1
43
44	def test_ignores_short_sentences(self):
45	detector = ActionDetector()
46	items = detector.detect_from_transcript("Do it.")
47	assert len(items) == 0
48
49	def test_no_action_patterns(self):
50	detector = ActionDetec
51	"con"The weather was nice to
52	)
53	assert len(items) == 0
54
55	def test_multiple_sentences(self):
56	detector = Actio""Tests for enhanced actioced action item detection."""
57
58	We need to deploy the fix. A"
59	tences("The sky is blue."
60	detector = ActionDetectextterns(self):
61	len(items) == 2
62
63	def test_source_is_transcript(self):
64	detector = ActionDetector()
65	items = detector.detect_from_transcript("We need to fix the authentication module.")
66	for item in items:
67	assert item.source == "transcript"
68
69
70	class TestLLMExtract:
71	def test_llm_extraction(self):
72	pm = MagicMock()
73	pm.chat.return_value = json.dumps([ext": Non { assert len(items) >= 1
74	"assignee": "Bob","priority": "high", "Action item: set up monito}
75	])_no_match_no_context(self):
76	em detection."""
77
78	import json
79	from unittest.mock import MagicMock
80
81	fro"""Tests for enhanced action item detection."""
82
83	import json
84	from unittest.mock import MagicMock
85
86	from video_processor.analyzers.action_detector import ActionDetector
87	from video_processor.models import ActionItem, TranscriptSegment
88
89
90	class TestPatternExtract:
91	def test_detects_need_to(self):
92	detector = ActionDetector()
93	items = detector.detect_from_transcript(
94	"We need to update the database schema before release."
95	)
96	assert len(items) >= 1
97	assert any("database" in i.action.lower() for i in items)
98
99	def test_detects_should(self):
100	detector = ActionDetector()
101	items = detector.detect_from_transcript("Alice should review the pull request by Friday.")
102	assert len(items) >= 1
103
104	def test_detects_action_item_keyword(self):
105	detector = ActionDetector()
106	items = detector.detect_from_transcript(
107	"Action item: set up monitoring for the new service."
108	)
109	assert len(items) >= 1
110
111	def test_detects_follow_up(self):
112	detector = ActionDetector()
113	items = detector.detect_from_transcript("Follow up with the client about requirements.")
114	[ext": Non"""Tests for enhanced action item dsts for enhanced action ite{"action": "", "assignee": "Bob"])_no_match_no_context(self):
115	em detection."""
116
117	import json
118	from unittest.mock import MagicMock
119
120	fro"""Tests .")
121	assert len(items) >= 1
122
123	def test_detects_lets(self):
124	detector = ActionDetector()
125	items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
126	assert len(items) >= 1
127
128	def test_ignores_short_sentences(self):
129	detector = ActionDetector()
130	items = detector.detect_from_transcript("Do it.")
131	assert len(items) == 0
132
133	def test_no_action_patterns(self):
134	detector = ActionDetector()
135	items = detector.detect_from_transcript("The weather was nice today. We had lunch at noon.")
136	assert len(items) == 0
137
138	def test_multiple_sentences(self):
139	detector = ActionDetector()
140	text = "We need to deploy the fix. Alice should test it first. The sky is blue."
141	items = detector.detect_from_transcript(text)
142	assert len(items) == 2
143
144	def test_source_is_transcript(self):
145	detector = ActionDetector()
146	items = detector.detect_from_transcript("We need to fix the authentication module.")
147	for item in items:
148	assert item.source == "transcript"
149
150
151	class TestLLMExtract:
152	def test_llm_extraction(self):
153	pm = MagicMock()
154	pm.chat.return_value = json.dumps(
155	[
156	{
157	"action": "Deploy new version",
158	"assignee": "Bob",
159	"deadline": "Friday",
160	"priority": "high",
161	"context": "Production release",
162	}
163	]
164	)
165	detector = ActionDetector(provider_manager=pm)
166	items = detector.detect_from_transcript("Deploy new version by Friday.")
167	assert len(items) == 1
168	assert items[0].action == "Deploy new version"
169	assert items[0].assignee == "Bob"
170	assert items[0].deadline == "Friday"
171	assert items[0].priority == "high"
172	assert items[0].source == "transcript"
173
174	def test_llm_returns_empty(self):
175	pm = MagicMock()
176	pm.chat.return_value = "[]"
177	detector = ActionDetector(provider_manager=pm)
178	items = detector.detect_from_transcript("No action items here.")
179	assert items == []
180
181	def test_llm_error_returns_empty(self):
182	pm = MagicMock()
183	pm.chat.side_effect = Exception("API error")
184	detector = ActionDetector(provider_manager=pm)
185	items = detector.detect_from_transcript("We need to fix this.")
186	assert items == []
187
188	def test_llm_bad_json(self):
189	pm = MagicMock()
190	pm.chat.return_value = "not valid json"
191	detector = ActionDetector(provider_manager=pm)
192	items = detector.detect_from_transcript("Update the docs.")
193	assert items == []
194
195	def test_llm_skips_items_without_action(self):
196	pm = MagicMock()
197	pm.chat.return_value = json.dumps(
198	[
199	{"action": "Valid action", "assignee": None},
200	{"assignee": "Alice"}, # No action field
201	{"action": "", "assignee": "Bob"}, # Empty action
202	]
203	)
204	detector = ActionDetector(provider_manager=pm)
205	items = detector.detect_from_transcript("Some text.")
206	assert len(items) == 1
207	assert items[0].action == "Valid action"
208
209
210	class TestDetectFromDiagrams:
211	def test_dict_diagrams(self):
212	pm = MagicMock()
213	pm.chat.return_value = json.dumps(
214	[
215	{
216	"action": "Migrate database",
217	"assignee": None,
218	"deadline": None,
219	"priority": None,
220	"context": None,
221	},
222	]
223	)
224	detector = ActionDetector(provider_manager=pm)
225	diagrams = [
226	{"text_content": "Step 1: Migrate database", "elements": ["DB", "Migration"]},
227	]
228	items = detector.detect_from_diagrams(diagrams)
229	assert len(items) == 1
230	assert items[0].source == "diagram"
231
232	def test_object_diagrams(self):
233	pm = MagicMock()
234	pm.chat.return_value = json.dumps(
235	[
236	{
237	"action": "Update API",
238	"assignee": None,
239	"deadline": None,
240	"priority": None,
241	"context": None,
242	},
243	]
244	)
245	detector = ActionDetector(provider_manager=pm)
246
247	class FakeDiagram:
248	text_content = "Update API endpoints"
249	elements = ["API", "Gateway"]
250
251	items = detector.detect_from_diagrams([FakeDiagram()])
252	assert len(ite

A tests/test_api_cache.py

+36

		--- a/tests/test_api_cache.py
		+++ b/tests/test_api_cache.py
		@@ -0,0 +1,36 @@
	1	+"""Tests for API json
	2	+import time
	3	+
	4	+import pytestonse cache."""
	5	+
	6	+import time
	7	+
	8	+from video_processor.utils.api_cache import ApiCache
	9	+
	10	+
	11	+class TestApiCache:
	12	+ def test_set_and_get(self, tmp_path):
	13	+ cache = ApiCache(tmp_path, namespace="test")
	14	+ cache.set("key1", {"data": "value"})
	15	+ result = cache.get("key1")
	16	+ assert result == {"data": "value"}
	17	+
	18	+ def test_get_missing_key(self, tmp_path):
	19	+ cache = ApiCache(tmp_path, namespace="test")
	20	+ assert cache.get("nonexistent") is None
	21	+
	22	+ def test_ttl_expiry(self, tmp_path):
	23	+ cache = ApiCache(tmp_path, namespace="test", ttl=0)
	24	+ cache.set("key1", "value")
	25	+ # With TTL=0, any subsequent access should be expired
	26	+ time.sleep(0.01)
	27	+ assert cache.get("key1") ist(self, tmp_path):
	28	+ h):
	29	+ cache = ApiCache(tmp_path, namespace="test")
	30	+ cache.set("key1", "value")
	31	+ assert cache.get("key1") == "value"
	32	+ result = cache.invalidate("key1")
	33	+ assert result is True
	34	+ assert cache.get("key1") is None
	35	+
	36	+ def test_invalidate_missing(se

	--- a/tests/test_api_cache.py
	+++ b/tests/test_api_cache.py
	@@ -0,0 +1,36 @@

	--- a/tests/test_api_cache.py
	+++ b/tests/test_api_cache.py
	@@ -0,0 +1,36 @@
1	"""Tests for API json
2	import time
3
4	import pytestonse cache."""
5
6	import time
7
8	from video_processor.utils.api_cache import ApiCache
9
10
11	class TestApiCache:
12	def test_set_and_get(self, tmp_path):
13	cache = ApiCache(tmp_path, namespace="test")
14	cache.set("key1", {"data": "value"})
15	result = cache.get("key1")
16	assert result == {"data": "value"}
17
18	def test_get_missing_key(self, tmp_path):
19	cache = ApiCache(tmp_path, namespace="test")
20	assert cache.get("nonexistent") is None
21
22	def test_ttl_expiry(self, tmp_path):
23	cache = ApiCache(tmp_path, namespace="test", ttl=0)
24	cache.set("key1", "value")
25	# With TTL=0, any subsequent access should be expired
26	time.sleep(0.01)
27	assert cache.get("key1") ist(self, tmp_path):
28	h):
29	cache = ApiCache(tmp_path, namespace="test")
30	cache.set("key1", "value")
31	assert cache.get("key1") == "value"
32	result = cache.invalidate("key1")
33	assert result is True
34	assert cache.get("key1") is None
35
36	def test_invalidate_missing(se

A tests/test_content_analyzer.py

		--- a/tests/test_content_analyzer.py
		+++ b/tests/test_content_analyzer.py
		@@ -0,0 +1 @@
	1	+"

	--- a/tests/test_content_analyzer.py
	+++ b/tests/test_content_analyzer.py
	@@ -0,0 +1 @@

	--- a/tests/test_content_analyzer.py
	+++ b/tests/test_content_analyzer.py
	@@ -0,0 +1 @@
1	"

A tests/test_pipeline.py

		--- a/tests/test_pipeline.py
		+++ b/tests/test_pipeline.py
		@@ -0,0 +1 @@
	1	+"""Tests for the cor

	--- a/tests/test_pipeline.py
	+++ b/tests/test_pipeline.py
	@@ -0,0 +1 @@

	--- a/tests/test_pipeline.py
	+++ b/tests/test_pipeline.py
	@@ -0,0 +1 @@
1	"""Tests for the cor

A tests/test_prompt_templates.py

+63

		--- a/tests/test_prompt_templates.py
		+++ b/tests/test_prompt_templates.py
		@@ -0,0 +1,63 @@
	1	+"""Tests for prompimport pytestmpt template management."""
	2	+
	3	+from video_processor.utils.prompt_templates import (
	4	+ DEFAULT_TEMPLATES,
	5	+ PromptTemplate,
	6	+ default_prompt_manager,
	7	+)
	8	+
	9	+
	10	+class TestPromptTemplate:
	11	+ def test_default_templates_loaded(self):
	12	+ pm = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
	13	+ assert len(pm.templates) == 10
	14	+
	15	+ def test_all_expected_templates_exist(self):
	16	+ expected = [
	17	+ "content_analysis",
	18	+ "diagram_extraction",
	19	+ "action_item_detection",
	20	+ "content_summary",
	21	+ "summary_generation",
	22	+ "key_points_extraction",
	23	+ "entity_extraction",
	24	+ "relationship_extraction",
	25	+ "diagram_analysis",
	26	+ "mermaid_generation",
	27	+ ]
	28	+ for name in expected:
	29	+ assert name in DEFAULT_TEMPLATES, f"Missing template: {name}"
	30	+
	31	+ def test_get_template(self):
	32	+ pm = PromptTemplate(default_templates={"test": "Hello $name"})
	33	+ template = pm.get_template("test")
	34	+ assert template is not None
	35	+
	36	+ def test_get_missing_template(self):
	37	+ pm = PromptTemplate(default_templates={})
	38	+ assert pm.get_template("nonexistent") is None
	39	+
	40	+ def test_format_prompt(self):
	41	+ pm = PromptTemplate(default_templates={"greet": "Hello $name, welcome to $place"})
	42	+ result = pm.format_prompt("greet", name="Alice", place="Wonderland")
	43	+ assert "Alice" in result
	44	+ assert "Wonderland" in result
	45	+
	46	+ def test_format_missing_template(self):
	47	+ pm = PromptTemplate(default_templates={})
	48	+ result = pm.format_prompt("nonexistent", key="value")
	49	+ assert result is None
	50	+
	51	+ def test_safe_substitute_missing_vars(self):
	52	+ pm = PromptTemplate(default_templates={"test": "Hello $name and $other"})
	53	+ result = pm.format_prompt("test", name="Alice")
	54	+ assert "Alice" in result
	55	+ assert "$other" in result # safe_substitute keeps unresolved vars
	56	+
	57	+ def test_add_template(self):
	58	+ pm = PromptTemplate(default_templates={})
	59	+ pm.add_template("new", "New template: $var")
	60	+ result = pm.format_prompt("new", var="value")
	61	+ assert "value" in result
	62	+
	63	+ def test_save_template_no_

	--- a/tests/test_prompt_templates.py
	+++ b/tests/test_prompt_templates.py
	@@ -0,0 +1,63 @@

	--- a/tests/test_prompt_templates.py
	+++ b/tests/test_prompt_templates.py
	@@ -0,0 +1,63 @@
1	"""Tests for prompimport pytestmpt template management."""
2
3	from video_processor.utils.prompt_templates import (
4	DEFAULT_TEMPLATES,
5	PromptTemplate,
6	default_prompt_manager,
7	)
8
9
10	class TestPromptTemplate:
11	def test_default_templates_loaded(self):
12	pm = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
13	assert len(pm.templates) == 10
14
15	def test_all_expected_templates_exist(self):
16	expected = [
17	"content_analysis",
18	"diagram_extraction",
19	"action_item_detection",
20	"content_summary",
21	"summary_generation",
22	"key_points_extraction",
23	"entity_extraction",
24	"relationship_extraction",
25	"diagram_analysis",
26	"mermaid_generation",
27	]
28	for name in expected:
29	assert name in DEFAULT_TEMPLATES, f"Missing template: {name}"
30
31	def test_get_template(self):
32	pm = PromptTemplate(default_templates={"test": "Hello $name"})
33	template = pm.get_template("test")
34	assert template is not None
35
36	def test_get_missing_template(self):
37	pm = PromptTemplate(default_templates={})
38	assert pm.get_template("nonexistent") is None
39
40	def test_format_prompt(self):
41	pm = PromptTemplate(default_templates={"greet": "Hello $name, welcome to $place"})
42	result = pm.format_prompt("greet", name="Alice", place="Wonderland")
43	assert "Alice" in result
44	assert "Wonderland" in result
45
46	def test_format_missing_template(self):
47	pm = PromptTemplate(default_templates={})
48	result = pm.format_prompt("nonexistent", key="value")
49	assert result is None
50
51	def test_safe_substitute_missing_vars(self):
52	pm = PromptTemplate(default_templates={"test": "Hello $name and $other"})
53	result = pm.format_prompt("test", name="Alice")
54	assert "Alice" in result
55	assert "$other" in result # safe_substitute keeps unresolved vars
56
57	def test_add_template(self):
58	pm = PromptTemplate(default_templates={})
59	pm.add_template("new", "New template: $var")
60	result = pm.format_prompt("new", var="value")
61	assert "value" in result
62
63	def test_save_template_no_

A video_processor/analyzers/action_detector.py

+146

		--- a/video_processor/analyzers/action_detector.py
		+++ b/video_processor/analyzers/action_detector.py
		@@ -0,0 +1,146 @@
	1	+"""Enhanced action item detection from transcripts and diagrams."""
	2	+
	3	+import logging
	4	+import re
	5	+from typing import List, Optional
	6	+
	7	+from video_processor.models import ActionItem, TranscriptSegment
	8	+from video_processor.providers.manager import ProviderManager
	9	+from video_processor.utils.json_parsing import parse_json_from_response
	10	+
	11	+logger = logging.getLogger(__name__)
	12	+
	13	+# Patterns that indicate action items in natural language
	14	+_ACTION_PATTERNS = [
	15	+ re.compile(r"\b(?:need\|needs)\s+to\b", re.IGNORECASE),
	16	+ re.compile(r"\b(?:should\|must\|shall)\s+\w+", re.IGNORECASE),
	17	+ re.compile(r"\b(?:will\|going\s+to)\s+\w+", re.IGNORECASE),
	18	+ re.compile(r"\b(?:action\s+item\|todo\|to-do\|follow[\s-]?up)\b", re.IGNORECASE),
	19	+ re.compile(r"\b(?:assigned?\s+to\|responsible\s+for)\b", re.IGNORECASE),
	20	+ re.compile(r"\b(?:deadline\|due\s+(?:date\|by))\b", re.IGNORECASE),
	21	+ re.compile(r"\b(?:let'?s\|let\s+us)\s+\w+", re.IGNORECASE),
	22	+ re.compile(r"\b(?:make\s+sure\|ensure)\b", re.IGNORECASE),
	23	+ re.compile(r"\b(?:can\s+you\|could\s+you\|please)\s+\w+", re.IGNORECASE),
	24	+]
	25	+
	26	+
	27	+class ActionDetector:
	28	+ """Detects action items from transcripts using heuristics and LLM."""
	29	+
	30	+ def __init__(self, provider_manager: Optional[ProviderManager] = None):
	31	+ self.pm = provider_manager
	32	+
	33	+ def detect_from_transcript(
	34	+ self,
	35	+ text: str,
	36	+ segments: Optional[List[TranscriptSegment]] = None,
	37	+ ) -> List[ActionItem]:
	38	+ """
	39	+ Detect action items from transcript text.
	40	+
	41	+ Uses LLM extraction when available, falls back to pattern matching.
	42	+ Segments are used to attach timestamps.
	43	+ """
	44	+ if self.pm:
	45	+ items = self._llm_extract(text)
	46	+ else:
	47	+ items = self._pattern_extract(text)
	48	+
	49	+ # Attach timestamps from segments if available
	50	+ if segments and items:
	51	+ self._attach_timestamps(items, segments)
	52	+
	53	+ return items
	54	+
	55	+ def detect_from_diagrams(
	56	+ self,
	57	+ diagrams: list,
	58	+ ) -> List[ActionItem]:
	59	+ """
	60	+ Extract action items mentioned in diagram text content.
	61	+
	62	+ Looks for action-oriented language in diagram text/elements.
	63	+ """
	64	+ items: List[ActionItem] = []
	65	+
	66	+ for diagram in diagrams:
	67	+ text = ""
	68	+ if isinstance(diagram, dict):
	69	+ text = diagram.get("text_content", "") or ""
	70	+ elements = diagram.get("elements", [])
	71	+ else:
	72	+ text = getattr(diagram, "text_content", "") or ""
	73	+ elements = getattr(diagram, "elements", [])
	74	+
	75	+ combined = text + " " + " ".join(str(e) for e in elements)
	76	+ if not combined.strip():
	77	+ continue
	78	+
	79	+ if self.pm:
	80	+ diagram_items = self._llm_extract(combined)
	81	+ else:
	82	+ diagram_items = self._pattern_extract(combined)
	83	+
	84	+ for item in diagram_items:
	85	+ item.source = "diagram"
	86	+ items.extend(diagram_items)
	87	+
	88	+ return items
	89	+
	90	+ def merge_action_items(
	91	+ self,
	92	+ transcript_items: List[ActionItem],
	93	+ diagram_items: List[ActionItem],
	94	+ ) -> List[ActionItem]:
	95	+ """
	96	+ Merge action items from transcript and diagram sources.
	97	+
	98	+ Deduplicates by checking for similar action text.
	99	+ """
	100	+ merged: List[ActionItem] = list(transcript_items)
	101	+ existing_actions = {a.action.lower().strip() for a in merged}
	102	+
	103	+ for item in diagram_items:
	104	+ normalized = item.action.lower().strip()
	105	+ if normalized not in existing_actions:
	106	+ merged.append(item)
	107	+ existing_actions.add(normalized)
	108	+
	109	+ return merged
	110	+
	111	+ def _llm_extract(self, text: str) -> List[ActionItem]:
	112	+ """Extract action items using LLM."""
	113	+ if not self.pm:
	114	+ return []
	115	+
	116	+ prompt = (
	117	+ "Extract all action items, tasks, and commitments "
	118	+ "from the following text.\n\n"
	119	+ f"TEXT:\n{text[:8000]}\n\n"
	120	+ "Return a JSON array:\n"
	121	+ '[{"action": "...", "assignee": "...", "deadline": "...", '
	122	+ '"priority": "...", "context": "..."}]\n\n'
	123	+ "Only include clear, actionable items. "
	124	+ "Set fields to null if not mentioned.\n"
	125	+ "Return ONLY the JSON array."
	126	+ )
	127	+
	128	+ try:
	129	+ raw = self.pm.chat(
	130	+ [{"role": "user", "content": prompt}],
	131	+ temperature=0.3,
	132	+ )
	133	+ parsed = parse_json_from_response(raw)
	134	+ if isinstance(parsed, list):
	135	+ return [
	136	+ ActionItem(
	137	+ action=item.get("action", ""),
	138	+ assignee=item.get("assignee"),
	139	+ deadline=item.get("deadline"),
	140	+ priority=item.get("priority"),
	141	+ context=item.get("context"),
	142	+ source="transcript",
	143	+ )
	144	+ for item in parsed
	145	+ if isinstance(item, dict) and item.get("action")
	146	+

	--- a/video_processor/analyzers/action_detector.py
	+++ b/video_processor/analyzers/action_detector.py
	@@ -0,0 +1,146 @@

	--- a/video_processor/analyzers/action_detector.py
	+++ b/video_processor/analyzers/action_detector.py
	@@ -0,0 +1,146 @@
1	"""Enhanced action item detection from transcripts and diagrams."""
2
3	import logging
4	import re
5	from typing import List, Optional
6
7	from video_processor.models import ActionItem, TranscriptSegment
8	from video_processor.providers.manager import ProviderManager
9	from video_processor.utils.json_parsing import parse_json_from_response
10
11	logger = logging.getLogger(__name__)
12
13	# Patterns that indicate action items in natural language
14	_ACTION_PATTERNS = [
15	re.compile(r"\b(?:need\|needs)\s+to\b", re.IGNORECASE),
16	re.compile(r"\b(?:should\|must\|shall)\s+\w+", re.IGNORECASE),
17	re.compile(r"\b(?:will\|going\s+to)\s+\w+", re.IGNORECASE),
18	re.compile(r"\b(?:action\s+item\|todo\|to-do\|follow[\s-]?up)\b", re.IGNORECASE),
19	re.compile(r"\b(?:assigned?\s+to\|responsible\s+for)\b", re.IGNORECASE),
20	re.compile(r"\b(?:deadline\|due\s+(?:date\|by))\b", re.IGNORECASE),
21	re.compile(r"\b(?:let'?s\|let\s+us)\s+\w+", re.IGNORECASE),
22	re.compile(r"\b(?:make\s+sure\|ensure)\b", re.IGNORECASE),
23	re.compile(r"\b(?:can\s+you\|could\s+you\|please)\s+\w+", re.IGNORECASE),
24	]
25
26
27	class ActionDetector:
28	"""Detects action items from transcripts using heuristics and LLM."""
29
30	def __init__(self, provider_manager: Optional[ProviderManager] = None):
31	self.pm = provider_manager
32
33	def detect_from_transcript(
34	self,
35	text: str,
36	segments: Optional[List[TranscriptSegment]] = None,
37	) -> List[ActionItem]:
38	"""
39	Detect action items from transcript text.
40
41	Uses LLM extraction when available, falls back to pattern matching.
42	Segments are used to attach timestamps.
43	"""
44	if self.pm:
45	items = self._llm_extract(text)
46	else:
47	items = self._pattern_extract(text)
48
49	# Attach timestamps from segments if available
50	if segments and items:
51	self._attach_timestamps(items, segments)
52
53	return items
54
55	def detect_from_diagrams(
56	self,
57	diagrams: list,
58	) -> List[ActionItem]:
59	"""
60	Extract action items mentioned in diagram text content.
61
62	Looks for action-oriented language in diagram text/elements.
63	"""
64	items: List[ActionItem] = []
65
66	for diagram in diagrams:
67	text = ""
68	if isinstance(diagram, dict):
69	text = diagram.get("text_content", "") or ""
70	elements = diagram.get("elements", [])
71	else:
72	text = getattr(diagram, "text_content", "") or ""
73	elements = getattr(diagram, "elements", [])
74
75	combined = text + " " + " ".join(str(e) for e in elements)
76	if not combined.strip():
77	continue
78
79	if self.pm:
80	diagram_items = self._llm_extract(combined)
81	else:
82	diagram_items = self._pattern_extract(combined)
83
84	for item in diagram_items:
85	item.source = "diagram"
86	items.extend(diagram_items)
87
88	return items
89
90	def merge_action_items(
91	self,
92	transcript_items: List[ActionItem],
93	diagram_items: List[ActionItem],
94	) -> List[ActionItem]:
95	"""
96	Merge action items from transcript and diagram sources.
97
98	Deduplicates by checking for similar action text.
99	"""
100	merged: List[ActionItem] = list(transcript_items)
101	existing_actions = {a.action.lower().strip() for a in merged}
102
103	for item in diagram_items:
104	normalized = item.action.lower().strip()
105	if normalized not in existing_actions:
106	merged.append(item)
107	existing_actions.add(normalized)
108
109	return merged
110
111	def _llm_extract(self, text: str) -> List[ActionItem]:
112	"""Extract action items using LLM."""
113	if not self.pm:
114	return []
115
116	prompt = (
117	"Extract all action items, tasks, and commitments "
118	"from the following text.\n\n"
119	f"TEXT:\n{text[:8000]}\n\n"
120	"Return a JSON array:\n"
121	'[{"action": "...", "assignee": "...", "deadline": "...", '
122	'"priority": "...", "context": "..."}]\n\n'
123	"Only include clear, actionable items. "
124	"Set fields to null if not mentioned.\n"
125	"Return ONLY the JSON array."
126	)
127
128	try:
129	raw = self.pm.chat(
130	[{"role": "user", "content": prompt}],
131	temperature=0.3,
132	)
133	parsed = parse_json_from_response(raw)
134	if isinstance(parsed, list):
135	return [
136	ActionItem(
137	action=item.get("action", ""),
138	assignee=item.get("assignee"),
139	deadline=item.get("deadline"),
140	priority=item.get("priority"),
141	context=item.get("context"),
142	source="transcript",
143	)
144	for item in parsed
145	if isinstance(item, dict) and item.get("action")
146

A video_processor/analyzers/content_analyzer.py

+92

		--- a/video_processor/analyzers/content_analyzer.py
		+++ b/video_processor/analyzers/content_analyzer.py
		@@ -0,0 +1,92 @@
	1	+"""Content cross-referencing between transcript and diagram entities."""
	2	+
	3	+import logging
	4	+from typing import List, Optional
	5	+
	6	+from video_processor.models import Entity, KeyPoint
	7	+from video_processor.providers.manager import ProviderManager
	8	+from video_processor.utils.json_parsing import parse_json_from_response
	9	+
	10	+logger = logging.getLogger(__name__)
	11	+
	12	+
	13	+class ContentAnalyzer:
	14	+ """Cross-references transcript and diagram entities for richer knowledge."""
	15	+
	16	+ def __init__(self, provider_manager: Optional[ProviderManager] = None):
	17	+ self.pm = provider_manager
	18	+
	19	+ def cross_reference(
	20	+ self,
	21	+ transcript_entities: List[Entity],
	22	+ diagram_entities: List[Entity],
	23	+ ) -> List[Entity]:
	24	+ """
	25	+ Merge entities from transcripts and diagrams.
	26	+
	27	+ Merges by exact name overlap first, then uses LLM for fuzzy matching
	28	+ of remaining entities. Adds source attribution.
	29	+ """
	30	+ merged: dict[str, Entity] = {}
	31	+
	32	+ # Index transcript entities
	33	+ for e in transcript_entities:
	34	+ key = e.name.lower()
	35	+ merged[key] = Entity(
	36	+ name=e.name,
	37	+ type=e.type,
	38	+ descriptions=list(e.descriptions),
	39	+ source="transcript",
	40	+ occurrences=list(e.occurrences),
	41	+ )
	42	+
	43	+ # Merge diagram entities
	44	+ for e in diagram_entities:
	45	+ key = e.name.lower()
	46	+ if key in merged:
	47	+ existing = merged[key]
	48	+ existing.source = "both"
	49	+ existing.descriptions = list(set(existing.descriptions + e.descriptions))
	50	+ existing.occurrences.extend(e.occurrences)
	51	+ else:
	52	+ merged[key] = Entity(
	53	+ name=e.name,
	54	+ type=e.type,
	55	+ descriptions=list(e.descriptions),
	56	+ source="diagram",
	57	+ occurrences=list(e.occurrences),
	58	+ )
	59	+
	60	+ # LLM fuzzy matching for unmatched entities
	61	+ if self.pm:
	62	+ unmatched_t = [
	63	+ e e
	64	+ f if e.name.lower() not in {):
	65	+ re if e.name.lo):
	66	+ }
	67	+ ]
	68	+ unmatcme,
	69	+ ty if e.name.lower() not in {):
	70	+ re"""Content crossencing between tr}
	71	+ text = (
	72	+ d.get("text_content", "") if isinstance(d, dict) else getattr(d, "text
	73	+ )
	74	+ entities = set(str(e).lower() for e in elements)
	75	+ if text:
	76	+ entities.update(word.lower() for word in text.split() if len(word) > 3)
	77	+ diagram_entities[i] = entities
	78	+
	79	+ # Match key points to diagrams
	80	+ for kp in key_points:
	81	+ kp_words = set(kp.point.lower().split())
	82	+ if kp.details:
	83	+ kp_words.update(kp.details.lower().split())
	84	+
	85	+ related = []
	86	+ for idx, d_entities in diagram_entities.items():
	87	+ overlap = kp_words & d_entities
	88	+ if len(overlap) >= 2:
	89	+ related.append(idx)
	90	+
	91	+ if related:
	92	+ kp.related_diagrams = related

	--- a/video_processor/analyzers/content_analyzer.py
	+++ b/video_processor/analyzers/content_analyzer.py
	@@ -0,0 +1,92 @@

	--- a/video_processor/analyzers/content_analyzer.py
	+++ b/video_processor/analyzers/content_analyzer.py
	@@ -0,0 +1,92 @@
1	"""Content cross-referencing between transcript and diagram entities."""
2
3	import logging
4	from typing import List, Optional
5
6	from video_processor.models import Entity, KeyPoint
7	from video_processor.providers.manager import ProviderManager
8	from video_processor.utils.json_parsing import parse_json_from_response
9
10	logger = logging.getLogger(__name__)
11
12
13	class ContentAnalyzer:
14	"""Cross-references transcript and diagram entities for richer knowledge."""
15
16	def __init__(self, provider_manager: Optional[ProviderManager] = None):
17	self.pm = provider_manager
18
19	def cross_reference(
20	self,
21	transcript_entities: List[Entity],
22	diagram_entities: List[Entity],
23	) -> List[Entity]:
24	"""
25	Merge entities from transcripts and diagrams.
26
27	Merges by exact name overlap first, then uses LLM for fuzzy matching
28	of remaining entities. Adds source attribution.
29	"""
30	merged: dict[str, Entity] = {}
31
32	# Index transcript entities
33	for e in transcript_entities:
34	key = e.name.lower()
35	merged[key] = Entity(
36	name=e.name,
37	type=e.type,
38	descriptions=list(e.descriptions),
39	source="transcript",
40	occurrences=list(e.occurrences),
41	)
42
43	# Merge diagram entities
44	for e in diagram_entities:
45	key = e.name.lower()
46	if key in merged:
47	existing = merged[key]
48	existing.source = "both"
49	existing.descriptions = list(set(existing.descriptions + e.descriptions))
50	existing.occurrences.extend(e.occurrences)
51	else:
52	merged[key] = Entity(
53	name=e.name,
54	type=e.type,
55	descriptions=list(e.descriptions),
56	source="diagram",
57	occurrences=list(e.occurrences),
58	)
59
60	# LLM fuzzy matching for unmatched entities
61	if self.pm:
62	unmatched_t = [
63	e e
64	f if e.name.lower() not in {):
65	re if e.name.lo):
66	}
67	]
68	unmatcme,
69	ty if e.name.lower() not in {):
70	re"""Content crossencing between tr}
71	text = (
72	d.get("text_content", "") if isinstance(d, dict) else getattr(d, "text
73	)
74	entities = set(str(e).lower() for e in elements)
75	if text:
76	entities.update(word.lower() for word in text.split() if len(word) > 3)
77	diagram_entities[i] = entities
78
79	# Match key points to diagrams
80	for kp in key_points:
81	kp_words = set(kp.point.lower().split())
82	if kp.details:
83	kp_words.update(kp.details.lower().split())
84
85	related = []
86	for idx, d_entities in diagram_entities.items():
87	overlap = kp_words & d_entities
88	if len(overlap) >= 2:
89	related.append(idx)
90
91	if related:
92	kp.related_diagrams = related

A video_processor/api/__init__.py

		--- a/video_processor/api/__init__.py
		+++ b/video_processor/api/__init__.py
		@@ -0,0 +1 @@
	1	+# Legacy API modules removed — use video_processor.providers instead.

	--- a/video_processor/api/__init__.py
	+++ b/video_processor/api/__init__.py
	@@ -0,0 +1 @@

	--- a/video_processor/api/__init__.py
	+++ b/video_processor/api/__init__.py
	@@ -0,0 +1 @@
1	# Legacy API modules removed — use video_processor.providers instead.

PlanOpticon

Keyboard Shortcuts