PlanOpticon

Phase 7: Add cross-referencing, action detection, and test coverage - ContentAnalyzer: cross-references transcript and diagram entities with LLM fuzzy matching, enriches key points with diagram links - ActionDetector: detects action items via LLM or regex patterns, merges across sources, attaches timestamps from segments - Remove legacy API modules (llm_api, vision_api, transcription_api) replaced by providers layer - Add 185 tests total (38 new), covering content_analyzer, action_detector, pipeline helpers, api_cache, prompt_templates

leo 2026-02-14 22:29 trunk
Commit ccf32cc46510c70e5d783271586988dd7599f9c58e5b5abbcff24a37a90d939b
--- a/tests/test_action_detector.py
+++ b/tests/test_action_detector.py
@@ -0,0 +1,252 @@
1
+"""Tests for enhanced action item detection."""
2
+
3
+import json
4
+from unittest.mock import MagicMock
5
+
6
+import pytest
7
+
8
+from video_processor.analyzers.action_detector import ActionDetector
9
+from video_processor.models import ActionItem, TranscriptSegment
10
+
11
+
12
+class TestPatternExtract:
13
+ def test_detects_need_to(self):
14
+ detector = ActionDetector()
15
+ items = detector.detect_from_transcript(""Tests for enhanced action m detection."""
16
+
17
+""Tests for enhanced action itehanced action item detection."""
18
+
19
+import json
20
+from unittest.mock import MagicMock
21
+
22
+from video_processor.analyzers.action_detector import ActionDetector
23
+from video_processor.models import ActionItem, TranscriptSegment
24
+
25
+
26
+class TestPatternExtract:
27
+ def test_detects_need_to(self):
28
+ detector = ActionDetector()
29
+ items = detector.detect_from_transcript(
30
+ "We need"Action item: set up monitterns(self):
31
+ roadmap.")
32
+ assert len(items) >= 1
33
+
34
+ def test_detects_follow_up(self):
35
+ detector = ActionDetector()
36
+ items = detector.detect_from_transcript("Follow up with the client about requirements.")
37
+ assert len(items) >= 1
38
+
39
+ def test_detects_lets(self):
40
+ detector = ActionDetector()
41
+ items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
42
+ assert len(items) >= 1
43
+
44
+ def test_ignores_short_sentences(self):
45
+ detector = ActionDetector()
46
+ items = detector.detect_from_transcript("Do it.")
47
+ assert len(items) == 0
48
+
49
+ def test_no_action_patterns(self):
50
+ detector = ActionDetec
51
+ "con"The weather was nice to
52
+ )
53
+ assert len(items) == 0
54
+
55
+ def test_multiple_sentences(self):
56
+ detector = Actio""Tests for enhanced actioced action item detection."""
57
+
58
+We need to deploy the fix. A"
59
+ tences("The sky is blue."
60
+ detector = ActionDetectextterns(self):
61
+ len(items) == 2
62
+
63
+ def test_source_is_transcript(self):
64
+ detector = ActionDetector()
65
+ items = detector.detect_from_transcript("We need to fix the authentication module.")
66
+ for item in items:
67
+ assert item.source == "transcript"
68
+
69
+
70
+class TestLLMExtract:
71
+ def test_llm_extraction(self):
72
+ pm = MagicMock()
73
+ pm.chat.return_value = json.dumps([ext": Non { assert len(items) >= 1
74
+ "assignee": "Bob","priority": "high", "Action item: set up monito}
75
+ ])_no_match_no_context(self):
76
+ em detection."""
77
+
78
+import json
79
+from unittest.mock import MagicMock
80
+
81
+fro"""Tests for enhanced action item detection."""
82
+
83
+import json
84
+from unittest.mock import MagicMock
85
+
86
+from video_processor.analyzers.action_detector import ActionDetector
87
+from video_processor.models import ActionItem, TranscriptSegment
88
+
89
+
90
+class TestPatternExtract:
91
+ def test_detects_need_to(self):
92
+ detector = ActionDetector()
93
+ items = detector.detect_from_transcript(
94
+ "We need to update the database schema before release."
95
+ )
96
+ assert len(items) >= 1
97
+ assert any("database" in i.action.lower() for i in items)
98
+
99
+ def test_detects_should(self):
100
+ detector = ActionDetector()
101
+ items = detector.detect_from_transcript("Alice should review the pull request by Friday.")
102
+ assert len(items) >= 1
103
+
104
+ def test_detects_action_item_keyword(self):
105
+ detector = ActionDetector()
106
+ items = detector.detect_from_transcript(
107
+ "Action item: set up monitoring for the new service."
108
+ )
109
+ assert len(items) >= 1
110
+
111
+ def test_detects_follow_up(self):
112
+ detector = ActionDetector()
113
+ items = detector.detect_from_transcript("Follow up with the client about requirements.")
114
+ [ext": Non"""Tests for enhanced action item dsts for enhanced action ite{"action": "", "assignee": "Bob"])_no_match_no_context(self):
115
+ em detection."""
116
+
117
+import json
118
+from unittest.mock import MagicMock
119
+
120
+fro"""Tests .")
121
+ assert len(items) >= 1
122
+
123
+ def test_detects_lets(self):
124
+ detector = ActionDetector()
125
+ items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
126
+ assert len(items) >= 1
127
+
128
+ def test_ignores_short_sentences(self):
129
+ detector = ActionDetector()
130
+ items = detector.detect_from_transcript("Do it.")
131
+ assert len(items) == 0
132
+
133
+ def test_no_action_patterns(self):
134
+ detector = ActionDetector()
135
+ items = detector.detect_from_transcript("The weather was nice today. We had lunch at noon.")
136
+ assert len(items) == 0
137
+
138
+ def test_multiple_sentences(self):
139
+ detector = ActionDetector()
140
+ text = "We need to deploy the fix. Alice should test it first. The sky is blue."
141
+ items = detector.detect_from_transcript(text)
142
+ assert len(items) == 2
143
+
144
+ def test_source_is_transcript(self):
145
+ detector = ActionDetector()
146
+ items = detector.detect_from_transcript("We need to fix the authentication module.")
147
+ for item in items:
148
+ assert item.source == "transcript"
149
+
150
+
151
+class TestLLMExtract:
152
+ def test_llm_extraction(self):
153
+ pm = MagicMock()
154
+ pm.chat.return_value = json.dumps(
155
+ [
156
+ {
157
+ "action": "Deploy new version",
158
+ "assignee": "Bob",
159
+ "deadline": "Friday",
160
+ "priority": "high",
161
+ "context": "Production release",
162
+ }
163
+ ]
164
+ )
165
+ detector = ActionDetector(provider_manager=pm)
166
+ items = detector.detect_from_transcript("Deploy new version by Friday.")
167
+ assert len(items) == 1
168
+ assert items[0].action == "Deploy new version"
169
+ assert items[0].assignee == "Bob"
170
+ assert items[0].deadline == "Friday"
171
+ assert items[0].priority == "high"
172
+ assert items[0].source == "transcript"
173
+
174
+ def test_llm_returns_empty(self):
175
+ pm = MagicMock()
176
+ pm.chat.return_value = "[]"
177
+ detector = ActionDetector(provider_manager=pm)
178
+ items = detector.detect_from_transcript("No action items here.")
179
+ assert items == []
180
+
181
+ def test_llm_error_returns_empty(self):
182
+ pm = MagicMock()
183
+ pm.chat.side_effect = Exception("API error")
184
+ detector = ActionDetector(provider_manager=pm)
185
+ items = detector.detect_from_transcript("We need to fix this.")
186
+ assert items == []
187
+
188
+ def test_llm_bad_json(self):
189
+ pm = MagicMock()
190
+ pm.chat.return_value = "not valid json"
191
+ detector = ActionDetector(provider_manager=pm)
192
+ items = detector.detect_from_transcript("Update the docs.")
193
+ assert items == []
194
+
195
+ def test_llm_skips_items_without_action(self):
196
+ pm = MagicMock()
197
+ pm.chat.return_value = json.dumps(
198
+ [
199
+ {"action": "Valid action", "assignee": None},
200
+ {"assignee": "Alice"}, # No action field
201
+ {"action": "", "assignee": "Bob"}, # Empty action
202
+ ]
203
+ )
204
+ detector = ActionDetector(provider_manager=pm)
205
+ items = detector.detect_from_transcript("Some text.")
206
+ assert len(items) == 1
207
+ assert items[0].action == "Valid action"
208
+
209
+
210
+class TestDetectFromDiagrams:
211
+ def test_dict_diagrams(self):
212
+ pm = MagicMock()
213
+ pm.chat.return_value = json.dumps(
214
+ [
215
+ {
216
+ "action": "Migrate database",
217
+ "assignee": None,
218
+ "deadline": None,
219
+ "priority": None,
220
+ "context": None,
221
+ },
222
+ ]
223
+ )
224
+ detector = ActionDetector(provider_manager=pm)
225
+ diagrams = [
226
+ {"text_content": "Step 1: Migrate database", "elements": ["DB", "Migration"]},
227
+ ]
228
+ items = detector.detect_from_diagrams(diagrams)
229
+ assert len(items) == 1
230
+ assert items[0].source == "diagram"
231
+
232
+ def test_object_diagrams(self):
233
+ pm = MagicMock()
234
+ pm.chat.return_value = json.dumps(
235
+ [
236
+ {
237
+ "action": "Update API",
238
+ "assignee": None,
239
+ "deadline": None,
240
+ "priority": None,
241
+ "context": None,
242
+ },
243
+ ]
244
+ )
245
+ detector = ActionDetector(provider_manager=pm)
246
+
247
+ class FakeDiagram:
248
+ text_content = "Update API endpoints"
249
+ elements = ["API", "Gateway"]
250
+
251
+ items = detector.detect_from_diagrams([FakeDiagram()])
252
+ assert len(ite
--- a/tests/test_action_detector.py
+++ b/tests/test_action_detector.py
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_action_detector.py
+++ b/tests/test_action_detector.py
@@ -0,0 +1,252 @@
1 """Tests for enhanced action item detection."""
2
3 import json
4 from unittest.mock import MagicMock
5
6 import pytest
7
8 from video_processor.analyzers.action_detector import ActionDetector
9 from video_processor.models import ActionItem, TranscriptSegment
10
11
12 class TestPatternExtract:
13 def test_detects_need_to(self):
14 detector = ActionDetector()
15 items = detector.detect_from_transcript(""Tests for enhanced action m detection."""
16
17 ""Tests for enhanced action itehanced action item detection."""
18
19 import json
20 from unittest.mock import MagicMock
21
22 from video_processor.analyzers.action_detector import ActionDetector
23 from video_processor.models import ActionItem, TranscriptSegment
24
25
26 class TestPatternExtract:
27 def test_detects_need_to(self):
28 detector = ActionDetector()
29 items = detector.detect_from_transcript(
30 "We need"Action item: set up monitterns(self):
31 roadmap.")
32 assert len(items) >= 1
33
34 def test_detects_follow_up(self):
35 detector = ActionDetector()
36 items = detector.detect_from_transcript("Follow up with the client about requirements.")
37 assert len(items) >= 1
38
39 def test_detects_lets(self):
40 detector = ActionDetector()
41 items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
42 assert len(items) >= 1
43
44 def test_ignores_short_sentences(self):
45 detector = ActionDetector()
46 items = detector.detect_from_transcript("Do it.")
47 assert len(items) == 0
48
49 def test_no_action_patterns(self):
50 detector = ActionDetec
51 "con"The weather was nice to
52 )
53 assert len(items) == 0
54
55 def test_multiple_sentences(self):
56 detector = Actio""Tests for enhanced actioced action item detection."""
57
58 We need to deploy the fix. A"
59 tences("The sky is blue."
60 detector = ActionDetectextterns(self):
61 len(items) == 2
62
63 def test_source_is_transcript(self):
64 detector = ActionDetector()
65 items = detector.detect_from_transcript("We need to fix the authentication module.")
66 for item in items:
67 assert item.source == "transcript"
68
69
70 class TestLLMExtract:
71 def test_llm_extraction(self):
72 pm = MagicMock()
73 pm.chat.return_value = json.dumps([ext": Non { assert len(items) >= 1
74 "assignee": "Bob","priority": "high", "Action item: set up monito}
75 ])_no_match_no_context(self):
76 em detection."""
77
78 import json
79 from unittest.mock import MagicMock
80
81 fro"""Tests for enhanced action item detection."""
82
83 import json
84 from unittest.mock import MagicMock
85
86 from video_processor.analyzers.action_detector import ActionDetector
87 from video_processor.models import ActionItem, TranscriptSegment
88
89
90 class TestPatternExtract:
91 def test_detects_need_to(self):
92 detector = ActionDetector()
93 items = detector.detect_from_transcript(
94 "We need to update the database schema before release."
95 )
96 assert len(items) >= 1
97 assert any("database" in i.action.lower() for i in items)
98
99 def test_detects_should(self):
100 detector = ActionDetector()
101 items = detector.detect_from_transcript("Alice should review the pull request by Friday.")
102 assert len(items) >= 1
103
104 def test_detects_action_item_keyword(self):
105 detector = ActionDetector()
106 items = detector.detect_from_transcript(
107 "Action item: set up monitoring for the new service."
108 )
109 assert len(items) >= 1
110
111 def test_detects_follow_up(self):
112 detector = ActionDetector()
113 items = detector.detect_from_transcript("Follow up with the client about requirements.")
114 [ext": Non"""Tests for enhanced action item dsts for enhanced action ite{"action": "", "assignee": "Bob"])_no_match_no_context(self):
115 em detection."""
116
117 import json
118 from unittest.mock import MagicMock
119
120 fro"""Tests .")
121 assert len(items) >= 1
122
123 def test_detects_lets(self):
124 detector = ActionDetector()
125 items = detector.detect_from_transcript("Let's schedule a meeting to discuss the roadmap.")
126 assert len(items) >= 1
127
128 def test_ignores_short_sentences(self):
129 detector = ActionDetector()
130 items = detector.detect_from_transcript("Do it.")
131 assert len(items) == 0
132
133 def test_no_action_patterns(self):
134 detector = ActionDetector()
135 items = detector.detect_from_transcript("The weather was nice today. We had lunch at noon.")
136 assert len(items) == 0
137
138 def test_multiple_sentences(self):
139 detector = ActionDetector()
140 text = "We need to deploy the fix. Alice should test it first. The sky is blue."
141 items = detector.detect_from_transcript(text)
142 assert len(items) == 2
143
144 def test_source_is_transcript(self):
145 detector = ActionDetector()
146 items = detector.detect_from_transcript("We need to fix the authentication module.")
147 for item in items:
148 assert item.source == "transcript"
149
150
151 class TestLLMExtract:
152 def test_llm_extraction(self):
153 pm = MagicMock()
154 pm.chat.return_value = json.dumps(
155 [
156 {
157 "action": "Deploy new version",
158 "assignee": "Bob",
159 "deadline": "Friday",
160 "priority": "high",
161 "context": "Production release",
162 }
163 ]
164 )
165 detector = ActionDetector(provider_manager=pm)
166 items = detector.detect_from_transcript("Deploy new version by Friday.")
167 assert len(items) == 1
168 assert items[0].action == "Deploy new version"
169 assert items[0].assignee == "Bob"
170 assert items[0].deadline == "Friday"
171 assert items[0].priority == "high"
172 assert items[0].source == "transcript"
173
174 def test_llm_returns_empty(self):
175 pm = MagicMock()
176 pm.chat.return_value = "[]"
177 detector = ActionDetector(provider_manager=pm)
178 items = detector.detect_from_transcript("No action items here.")
179 assert items == []
180
181 def test_llm_error_returns_empty(self):
182 pm = MagicMock()
183 pm.chat.side_effect = Exception("API error")
184 detector = ActionDetector(provider_manager=pm)
185 items = detector.detect_from_transcript("We need to fix this.")
186 assert items == []
187
188 def test_llm_bad_json(self):
189 pm = MagicMock()
190 pm.chat.return_value = "not valid json"
191 detector = ActionDetector(provider_manager=pm)
192 items = detector.detect_from_transcript("Update the docs.")
193 assert items == []
194
195 def test_llm_skips_items_without_action(self):
196 pm = MagicMock()
197 pm.chat.return_value = json.dumps(
198 [
199 {"action": "Valid action", "assignee": None},
200 {"assignee": "Alice"}, # No action field
201 {"action": "", "assignee": "Bob"}, # Empty action
202 ]
203 )
204 detector = ActionDetector(provider_manager=pm)
205 items = detector.detect_from_transcript("Some text.")
206 assert len(items) == 1
207 assert items[0].action == "Valid action"
208
209
210 class TestDetectFromDiagrams:
211 def test_dict_diagrams(self):
212 pm = MagicMock()
213 pm.chat.return_value = json.dumps(
214 [
215 {
216 "action": "Migrate database",
217 "assignee": None,
218 "deadline": None,
219 "priority": None,
220 "context": None,
221 },
222 ]
223 )
224 detector = ActionDetector(provider_manager=pm)
225 diagrams = [
226 {"text_content": "Step 1: Migrate database", "elements": ["DB", "Migration"]},
227 ]
228 items = detector.detect_from_diagrams(diagrams)
229 assert len(items) == 1
230 assert items[0].source == "diagram"
231
232 def test_object_diagrams(self):
233 pm = MagicMock()
234 pm.chat.return_value = json.dumps(
235 [
236 {
237 "action": "Update API",
238 "assignee": None,
239 "deadline": None,
240 "priority": None,
241 "context": None,
242 },
243 ]
244 )
245 detector = ActionDetector(provider_manager=pm)
246
247 class FakeDiagram:
248 text_content = "Update API endpoints"
249 elements = ["API", "Gateway"]
250
251 items = detector.detect_from_diagrams([FakeDiagram()])
252 assert len(ite
--- a/tests/test_api_cache.py
+++ b/tests/test_api_cache.py
@@ -0,0 +1,36 @@
1
+"""Tests for API json
2
+import time
3
+
4
+import pytestonse cache."""
5
+
6
+import time
7
+
8
+from video_processor.utils.api_cache import ApiCache
9
+
10
+
11
+class TestApiCache:
12
+ def test_set_and_get(self, tmp_path):
13
+ cache = ApiCache(tmp_path, namespace="test")
14
+ cache.set("key1", {"data": "value"})
15
+ result = cache.get("key1")
16
+ assert result == {"data": "value"}
17
+
18
+ def test_get_missing_key(self, tmp_path):
19
+ cache = ApiCache(tmp_path, namespace="test")
20
+ assert cache.get("nonexistent") is None
21
+
22
+ def test_ttl_expiry(self, tmp_path):
23
+ cache = ApiCache(tmp_path, namespace="test", ttl=0)
24
+ cache.set("key1", "value")
25
+ # With TTL=0, any subsequent access should be expired
26
+ time.sleep(0.01)
27
+ assert cache.get("key1") ist(self, tmp_path):
28
+ h):
29
+ cache = ApiCache(tmp_path, namespace="test")
30
+ cache.set("key1", "value")
31
+ assert cache.get("key1") == "value"
32
+ result = cache.invalidate("key1")
33
+ assert result is True
34
+ assert cache.get("key1") is None
35
+
36
+ def test_invalidate_missing(se
--- a/tests/test_api_cache.py
+++ b/tests/test_api_cache.py
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_api_cache.py
+++ b/tests/test_api_cache.py
@@ -0,0 +1,36 @@
1 """Tests for API json
2 import time
3
4 import pytestonse cache."""
5
6 import time
7
8 from video_processor.utils.api_cache import ApiCache
9
10
11 class TestApiCache:
12 def test_set_and_get(self, tmp_path):
13 cache = ApiCache(tmp_path, namespace="test")
14 cache.set("key1", {"data": "value"})
15 result = cache.get("key1")
16 assert result == {"data": "value"}
17
18 def test_get_missing_key(self, tmp_path):
19 cache = ApiCache(tmp_path, namespace="test")
20 assert cache.get("nonexistent") is None
21
22 def test_ttl_expiry(self, tmp_path):
23 cache = ApiCache(tmp_path, namespace="test", ttl=0)
24 cache.set("key1", "value")
25 # With TTL=0, any subsequent access should be expired
26 time.sleep(0.01)
27 assert cache.get("key1") ist(self, tmp_path):
28 h):
29 cache = ApiCache(tmp_path, namespace="test")
30 cache.set("key1", "value")
31 assert cache.get("key1") == "value"
32 result = cache.invalidate("key1")
33 assert result is True
34 assert cache.get("key1") is None
35
36 def test_invalidate_missing(se
--- a/tests/test_content_analyzer.py
+++ b/tests/test_content_analyzer.py
@@ -0,0 +1 @@
1
+"
--- a/tests/test_content_analyzer.py
+++ b/tests/test_content_analyzer.py
@@ -0,0 +1 @@
 
--- a/tests/test_content_analyzer.py
+++ b/tests/test_content_analyzer.py
@@ -0,0 +1 @@
1 "
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -0,0 +1 @@
1
+"""Tests for the cor
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -0,0 +1 @@
 
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -0,0 +1 @@
1 """Tests for the cor
--- a/tests/test_prompt_templates.py
+++ b/tests/test_prompt_templates.py
@@ -0,0 +1,63 @@
1
+"""Tests for prompimport pytestmpt template management."""
2
+
3
+from video_processor.utils.prompt_templates import (
4
+ DEFAULT_TEMPLATES,
5
+ PromptTemplate,
6
+ default_prompt_manager,
7
+)
8
+
9
+
10
+class TestPromptTemplate:
11
+ def test_default_templates_loaded(self):
12
+ pm = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
13
+ assert len(pm.templates) == 10
14
+
15
+ def test_all_expected_templates_exist(self):
16
+ expected = [
17
+ "content_analysis",
18
+ "diagram_extraction",
19
+ "action_item_detection",
20
+ "content_summary",
21
+ "summary_generation",
22
+ "key_points_extraction",
23
+ "entity_extraction",
24
+ "relationship_extraction",
25
+ "diagram_analysis",
26
+ "mermaid_generation",
27
+ ]
28
+ for name in expected:
29
+ assert name in DEFAULT_TEMPLATES, f"Missing template: {name}"
30
+
31
+ def test_get_template(self):
32
+ pm = PromptTemplate(default_templates={"test": "Hello $name"})
33
+ template = pm.get_template("test")
34
+ assert template is not None
35
+
36
+ def test_get_missing_template(self):
37
+ pm = PromptTemplate(default_templates={})
38
+ assert pm.get_template("nonexistent") is None
39
+
40
+ def test_format_prompt(self):
41
+ pm = PromptTemplate(default_templates={"greet": "Hello $name, welcome to $place"})
42
+ result = pm.format_prompt("greet", name="Alice", place="Wonderland")
43
+ assert "Alice" in result
44
+ assert "Wonderland" in result
45
+
46
+ def test_format_missing_template(self):
47
+ pm = PromptTemplate(default_templates={})
48
+ result = pm.format_prompt("nonexistent", key="value")
49
+ assert result is None
50
+
51
+ def test_safe_substitute_missing_vars(self):
52
+ pm = PromptTemplate(default_templates={"test": "Hello $name and $other"})
53
+ result = pm.format_prompt("test", name="Alice")
54
+ assert "Alice" in result
55
+ assert "$other" in result # safe_substitute keeps unresolved vars
56
+
57
+ def test_add_template(self):
58
+ pm = PromptTemplate(default_templates={})
59
+ pm.add_template("new", "New template: $var")
60
+ result = pm.format_prompt("new", var="value")
61
+ assert "value" in result
62
+
63
+ def test_save_template_no_
--- a/tests/test_prompt_templates.py
+++ b/tests/test_prompt_templates.py
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_prompt_templates.py
+++ b/tests/test_prompt_templates.py
@@ -0,0 +1,63 @@
1 """Tests for prompimport pytestmpt template management."""
2
3 from video_processor.utils.prompt_templates import (
4 DEFAULT_TEMPLATES,
5 PromptTemplate,
6 default_prompt_manager,
7 )
8
9
10 class TestPromptTemplate:
11 def test_default_templates_loaded(self):
12 pm = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
13 assert len(pm.templates) == 10
14
15 def test_all_expected_templates_exist(self):
16 expected = [
17 "content_analysis",
18 "diagram_extraction",
19 "action_item_detection",
20 "content_summary",
21 "summary_generation",
22 "key_points_extraction",
23 "entity_extraction",
24 "relationship_extraction",
25 "diagram_analysis",
26 "mermaid_generation",
27 ]
28 for name in expected:
29 assert name in DEFAULT_TEMPLATES, f"Missing template: {name}"
30
31 def test_get_template(self):
32 pm = PromptTemplate(default_templates={"test": "Hello $name"})
33 template = pm.get_template("test")
34 assert template is not None
35
36 def test_get_missing_template(self):
37 pm = PromptTemplate(default_templates={})
38 assert pm.get_template("nonexistent") is None
39
40 def test_format_prompt(self):
41 pm = PromptTemplate(default_templates={"greet": "Hello $name, welcome to $place"})
42 result = pm.format_prompt("greet", name="Alice", place="Wonderland")
43 assert "Alice" in result
44 assert "Wonderland" in result
45
46 def test_format_missing_template(self):
47 pm = PromptTemplate(default_templates={})
48 result = pm.format_prompt("nonexistent", key="value")
49 assert result is None
50
51 def test_safe_substitute_missing_vars(self):
52 pm = PromptTemplate(default_templates={"test": "Hello $name and $other"})
53 result = pm.format_prompt("test", name="Alice")
54 assert "Alice" in result
55 assert "$other" in result # safe_substitute keeps unresolved vars
56
57 def test_add_template(self):
58 pm = PromptTemplate(default_templates={})
59 pm.add_template("new", "New template: $var")
60 result = pm.format_prompt("new", var="value")
61 assert "value" in result
62
63 def test_save_template_no_
--- a/video_processor/analyzers/action_detector.py
+++ b/video_processor/analyzers/action_detector.py
@@ -0,0 +1,146 @@
1
+"""Enhanced action item detection from transcripts and diagrams."""
2
+
3
+import logging
4
+import re
5
+from typing import List, Optional
6
+
7
+from video_processor.models import ActionItem, TranscriptSegment
8
+from video_processor.providers.manager import ProviderManager
9
+from video_processor.utils.json_parsing import parse_json_from_response
10
+
11
+logger = logging.getLogger(__name__)
12
+
13
+# Patterns that indicate action items in natural language
14
+_ACTION_PATTERNS = [
15
+ re.compile(r"\b(?:need|needs)\s+to\b", re.IGNORECASE),
16
+ re.compile(r"\b(?:should|must|shall)\s+\w+", re.IGNORECASE),
17
+ re.compile(r"\b(?:will|going\s+to)\s+\w+", re.IGNORECASE),
18
+ re.compile(r"\b(?:action\s+item|todo|to-do|follow[\s-]?up)\b", re.IGNORECASE),
19
+ re.compile(r"\b(?:assigned?\s+to|responsible\s+for)\b", re.IGNORECASE),
20
+ re.compile(r"\b(?:deadline|due\s+(?:date|by))\b", re.IGNORECASE),
21
+ re.compile(r"\b(?:let'?s|let\s+us)\s+\w+", re.IGNORECASE),
22
+ re.compile(r"\b(?:make\s+sure|ensure)\b", re.IGNORECASE),
23
+ re.compile(r"\b(?:can\s+you|could\s+you|please)\s+\w+", re.IGNORECASE),
24
+]
25
+
26
+
27
+class ActionDetector:
28
+ """Detects action items from transcripts using heuristics and LLM."""
29
+
30
+ def __init__(self, provider_manager: Optional[ProviderManager] = None):
31
+ self.pm = provider_manager
32
+
33
+ def detect_from_transcript(
34
+ self,
35
+ text: str,
36
+ segments: Optional[List[TranscriptSegment]] = None,
37
+ ) -> List[ActionItem]:
38
+ """
39
+ Detect action items from transcript text.
40
+
41
+ Uses LLM extraction when available, falls back to pattern matching.
42
+ Segments are used to attach timestamps.
43
+ """
44
+ if self.pm:
45
+ items = self._llm_extract(text)
46
+ else:
47
+ items = self._pattern_extract(text)
48
+
49
+ # Attach timestamps from segments if available
50
+ if segments and items:
51
+ self._attach_timestamps(items, segments)
52
+
53
+ return items
54
+
55
+ def detect_from_diagrams(
56
+ self,
57
+ diagrams: list,
58
+ ) -> List[ActionItem]:
59
+ """
60
+ Extract action items mentioned in diagram text content.
61
+
62
+ Looks for action-oriented language in diagram text/elements.
63
+ """
64
+ items: List[ActionItem] = []
65
+
66
+ for diagram in diagrams:
67
+ text = ""
68
+ if isinstance(diagram, dict):
69
+ text = diagram.get("text_content", "") or ""
70
+ elements = diagram.get("elements", [])
71
+ else:
72
+ text = getattr(diagram, "text_content", "") or ""
73
+ elements = getattr(diagram, "elements", [])
74
+
75
+ combined = text + " " + " ".join(str(e) for e in elements)
76
+ if not combined.strip():
77
+ continue
78
+
79
+ if self.pm:
80
+ diagram_items = self._llm_extract(combined)
81
+ else:
82
+ diagram_items = self._pattern_extract(combined)
83
+
84
+ for item in diagram_items:
85
+ item.source = "diagram"
86
+ items.extend(diagram_items)
87
+
88
+ return items
89
+
90
+ def merge_action_items(
91
+ self,
92
+ transcript_items: List[ActionItem],
93
+ diagram_items: List[ActionItem],
94
+ ) -> List[ActionItem]:
95
+ """
96
+ Merge action items from transcript and diagram sources.
97
+
98
+ Deduplicates by checking for similar action text.
99
+ """
100
+ merged: List[ActionItem] = list(transcript_items)
101
+ existing_actions = {a.action.lower().strip() for a in merged}
102
+
103
+ for item in diagram_items:
104
+ normalized = item.action.lower().strip()
105
+ if normalized not in existing_actions:
106
+ merged.append(item)
107
+ existing_actions.add(normalized)
108
+
109
+ return merged
110
+
111
+ def _llm_extract(self, text: str) -> List[ActionItem]:
112
+ """Extract action items using LLM."""
113
+ if not self.pm:
114
+ return []
115
+
116
+ prompt = (
117
+ "Extract all action items, tasks, and commitments "
118
+ "from the following text.\n\n"
119
+ f"TEXT:\n{text[:8000]}\n\n"
120
+ "Return a JSON array:\n"
121
+ '[{"action": "...", "assignee": "...", "deadline": "...", '
122
+ '"priority": "...", "context": "..."}]\n\n'
123
+ "Only include clear, actionable items. "
124
+ "Set fields to null if not mentioned.\n"
125
+ "Return ONLY the JSON array."
126
+ )
127
+
128
+ try:
129
+ raw = self.pm.chat(
130
+ [{"role": "user", "content": prompt}],
131
+ temperature=0.3,
132
+ )
133
+ parsed = parse_json_from_response(raw)
134
+ if isinstance(parsed, list):
135
+ return [
136
+ ActionItem(
137
+ action=item.get("action", ""),
138
+ assignee=item.get("assignee"),
139
+ deadline=item.get("deadline"),
140
+ priority=item.get("priority"),
141
+ context=item.get("context"),
142
+ source="transcript",
143
+ )
144
+ for item in parsed
145
+ if isinstance(item, dict) and item.get("action")
146
+
--- a/video_processor/analyzers/action_detector.py
+++ b/video_processor/analyzers/action_detector.py
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/video_processor/analyzers/action_detector.py
+++ b/video_processor/analyzers/action_detector.py
@@ -0,0 +1,146 @@
1 """Enhanced action item detection from transcripts and diagrams."""
2
3 import logging
4 import re
5 from typing import List, Optional
6
7 from video_processor.models import ActionItem, TranscriptSegment
8 from video_processor.providers.manager import ProviderManager
9 from video_processor.utils.json_parsing import parse_json_from_response
10
11 logger = logging.getLogger(__name__)
12
13 # Patterns that indicate action items in natural language
14 _ACTION_PATTERNS = [
15 re.compile(r"\b(?:need|needs)\s+to\b", re.IGNORECASE),
16 re.compile(r"\b(?:should|must|shall)\s+\w+", re.IGNORECASE),
17 re.compile(r"\b(?:will|going\s+to)\s+\w+", re.IGNORECASE),
18 re.compile(r"\b(?:action\s+item|todo|to-do|follow[\s-]?up)\b", re.IGNORECASE),
19 re.compile(r"\b(?:assigned?\s+to|responsible\s+for)\b", re.IGNORECASE),
20 re.compile(r"\b(?:deadline|due\s+(?:date|by))\b", re.IGNORECASE),
21 re.compile(r"\b(?:let'?s|let\s+us)\s+\w+", re.IGNORECASE),
22 re.compile(r"\b(?:make\s+sure|ensure)\b", re.IGNORECASE),
23 re.compile(r"\b(?:can\s+you|could\s+you|please)\s+\w+", re.IGNORECASE),
24 ]
25
26
27 class ActionDetector:
28 """Detects action items from transcripts using heuristics and LLM."""
29
30 def __init__(self, provider_manager: Optional[ProviderManager] = None):
31 self.pm = provider_manager
32
33 def detect_from_transcript(
34 self,
35 text: str,
36 segments: Optional[List[TranscriptSegment]] = None,
37 ) -> List[ActionItem]:
38 """
39 Detect action items from transcript text.
40
41 Uses LLM extraction when available, falls back to pattern matching.
42 Segments are used to attach timestamps.
43 """
44 if self.pm:
45 items = self._llm_extract(text)
46 else:
47 items = self._pattern_extract(text)
48
49 # Attach timestamps from segments if available
50 if segments and items:
51 self._attach_timestamps(items, segments)
52
53 return items
54
55 def detect_from_diagrams(
56 self,
57 diagrams: list,
58 ) -> List[ActionItem]:
59 """
60 Extract action items mentioned in diagram text content.
61
62 Looks for action-oriented language in diagram text/elements.
63 """
64 items: List[ActionItem] = []
65
66 for diagram in diagrams:
67 text = ""
68 if isinstance(diagram, dict):
69 text = diagram.get("text_content", "") or ""
70 elements = diagram.get("elements", [])
71 else:
72 text = getattr(diagram, "text_content", "") or ""
73 elements = getattr(diagram, "elements", [])
74
75 combined = text + " " + " ".join(str(e) for e in elements)
76 if not combined.strip():
77 continue
78
79 if self.pm:
80 diagram_items = self._llm_extract(combined)
81 else:
82 diagram_items = self._pattern_extract(combined)
83
84 for item in diagram_items:
85 item.source = "diagram"
86 items.extend(diagram_items)
87
88 return items
89
90 def merge_action_items(
91 self,
92 transcript_items: List[ActionItem],
93 diagram_items: List[ActionItem],
94 ) -> List[ActionItem]:
95 """
96 Merge action items from transcript and diagram sources.
97
98 Deduplicates by checking for similar action text.
99 """
100 merged: List[ActionItem] = list(transcript_items)
101 existing_actions = {a.action.lower().strip() for a in merged}
102
103 for item in diagram_items:
104 normalized = item.action.lower().strip()
105 if normalized not in existing_actions:
106 merged.append(item)
107 existing_actions.add(normalized)
108
109 return merged
110
111 def _llm_extract(self, text: str) -> List[ActionItem]:
112 """Extract action items using LLM."""
113 if not self.pm:
114 return []
115
116 prompt = (
117 "Extract all action items, tasks, and commitments "
118 "from the following text.\n\n"
119 f"TEXT:\n{text[:8000]}\n\n"
120 "Return a JSON array:\n"
121 '[{"action": "...", "assignee": "...", "deadline": "...", '
122 '"priority": "...", "context": "..."}]\n\n'
123 "Only include clear, actionable items. "
124 "Set fields to null if not mentioned.\n"
125 "Return ONLY the JSON array."
126 )
127
128 try:
129 raw = self.pm.chat(
130 [{"role": "user", "content": prompt}],
131 temperature=0.3,
132 )
133 parsed = parse_json_from_response(raw)
134 if isinstance(parsed, list):
135 return [
136 ActionItem(
137 action=item.get("action", ""),
138 assignee=item.get("assignee"),
139 deadline=item.get("deadline"),
140 priority=item.get("priority"),
141 context=item.get("context"),
142 source="transcript",
143 )
144 for item in parsed
145 if isinstance(item, dict) and item.get("action")
146
--- a/video_processor/analyzers/content_analyzer.py
+++ b/video_processor/analyzers/content_analyzer.py
@@ -0,0 +1,92 @@
1
+"""Content cross-referencing between transcript and diagram entities."""
2
+
3
+import logging
4
+from typing import List, Optional
5
+
6
+from video_processor.models import Entity, KeyPoint
7
+from video_processor.providers.manager import ProviderManager
8
+from video_processor.utils.json_parsing import parse_json_from_response
9
+
10
+logger = logging.getLogger(__name__)
11
+
12
+
13
+class ContentAnalyzer:
14
+ """Cross-references transcript and diagram entities for richer knowledge."""
15
+
16
+ def __init__(self, provider_manager: Optional[ProviderManager] = None):
17
+ self.pm = provider_manager
18
+
19
+ def cross_reference(
20
+ self,
21
+ transcript_entities: List[Entity],
22
+ diagram_entities: List[Entity],
23
+ ) -> List[Entity]:
24
+ """
25
+ Merge entities from transcripts and diagrams.
26
+
27
+ Merges by exact name overlap first, then uses LLM for fuzzy matching
28
+ of remaining entities. Adds source attribution.
29
+ """
30
+ merged: dict[str, Entity] = {}
31
+
32
+ # Index transcript entities
33
+ for e in transcript_entities:
34
+ key = e.name.lower()
35
+ merged[key] = Entity(
36
+ name=e.name,
37
+ type=e.type,
38
+ descriptions=list(e.descriptions),
39
+ source="transcript",
40
+ occurrences=list(e.occurrences),
41
+ )
42
+
43
+ # Merge diagram entities
44
+ for e in diagram_entities:
45
+ key = e.name.lower()
46
+ if key in merged:
47
+ existing = merged[key]
48
+ existing.source = "both"
49
+ existing.descriptions = list(set(existing.descriptions + e.descriptions))
50
+ existing.occurrences.extend(e.occurrences)
51
+ else:
52
+ merged[key] = Entity(
53
+ name=e.name,
54
+ type=e.type,
55
+ descriptions=list(e.descriptions),
56
+ source="diagram",
57
+ occurrences=list(e.occurrences),
58
+ )
59
+
60
+ # LLM fuzzy matching for unmatched entities
61
+ if self.pm:
62
+ unmatched_t = [
63
+ e e
64
+ f if e.name.lower() not in {):
65
+ re if e.name.lo):
66
+ }
67
+ ]
68
+ unmatcme,
69
+ ty if e.name.lower() not in {):
70
+ re"""Content crossencing between tr}
71
+ text = (
72
+ d.get("text_content", "") if isinstance(d, dict) else getattr(d, "text
73
+ )
74
+ entities = set(str(e).lower() for e in elements)
75
+ if text:
76
+ entities.update(word.lower() for word in text.split() if len(word) > 3)
77
+ diagram_entities[i] = entities
78
+
79
+ # Match key points to diagrams
80
+ for kp in key_points:
81
+ kp_words = set(kp.point.lower().split())
82
+ if kp.details:
83
+ kp_words.update(kp.details.lower().split())
84
+
85
+ related = []
86
+ for idx, d_entities in diagram_entities.items():
87
+ overlap = kp_words & d_entities
88
+ if len(overlap) >= 2:
89
+ related.append(idx)
90
+
91
+ if related:
92
+ kp.related_diagrams = related
--- a/video_processor/analyzers/content_analyzer.py
+++ b/video_processor/analyzers/content_analyzer.py
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/video_processor/analyzers/content_analyzer.py
+++ b/video_processor/analyzers/content_analyzer.py
@@ -0,0 +1,92 @@
1 """Content cross-referencing between transcript and diagram entities."""
2
3 import logging
4 from typing import List, Optional
5
6 from video_processor.models import Entity, KeyPoint
7 from video_processor.providers.manager import ProviderManager
8 from video_processor.utils.json_parsing import parse_json_from_response
9
10 logger = logging.getLogger(__name__)
11
12
13 class ContentAnalyzer:
14 """Cross-references transcript and diagram entities for richer knowledge."""
15
16 def __init__(self, provider_manager: Optional[ProviderManager] = None):
17 self.pm = provider_manager
18
19 def cross_reference(
20 self,
21 transcript_entities: List[Entity],
22 diagram_entities: List[Entity],
23 ) -> List[Entity]:
24 """
25 Merge entities from transcripts and diagrams.
26
27 Merges by exact name overlap first, then uses LLM for fuzzy matching
28 of remaining entities. Adds source attribution.
29 """
30 merged: dict[str, Entity] = {}
31
32 # Index transcript entities
33 for e in transcript_entities:
34 key = e.name.lower()
35 merged[key] = Entity(
36 name=e.name,
37 type=e.type,
38 descriptions=list(e.descriptions),
39 source="transcript",
40 occurrences=list(e.occurrences),
41 )
42
43 # Merge diagram entities
44 for e in diagram_entities:
45 key = e.name.lower()
46 if key in merged:
47 existing = merged[key]
48 existing.source = "both"
49 existing.descriptions = list(set(existing.descriptions + e.descriptions))
50 existing.occurrences.extend(e.occurrences)
51 else:
52 merged[key] = Entity(
53 name=e.name,
54 type=e.type,
55 descriptions=list(e.descriptions),
56 source="diagram",
57 occurrences=list(e.occurrences),
58 )
59
60 # LLM fuzzy matching for unmatched entities
61 if self.pm:
62 unmatched_t = [
63 e e
64 f if e.name.lower() not in {):
65 re if e.name.lo):
66 }
67 ]
68 unmatcme,
69 ty if e.name.lower() not in {):
70 re"""Content crossencing between tr}
71 text = (
72 d.get("text_content", "") if isinstance(d, dict) else getattr(d, "text
73 )
74 entities = set(str(e).lower() for e in elements)
75 if text:
76 entities.update(word.lower() for word in text.split() if len(word) > 3)
77 diagram_entities[i] = entities
78
79 # Match key points to diagrams
80 for kp in key_points:
81 kp_words = set(kp.point.lower().split())
82 if kp.details:
83 kp_words.update(kp.details.lower().split())
84
85 related = []
86 for idx, d_entities in diagram_entities.items():
87 overlap = kp_words & d_entities
88 if len(overlap) >= 2:
89 related.append(idx)
90
91 if related:
92 kp.related_diagrams = related
--- a/video_processor/api/__init__.py
+++ b/video_processor/api/__init__.py
@@ -0,0 +1 @@
1
+# Legacy API modules removed — use video_processor.providers instead.
--- a/video_processor/api/__init__.py
+++ b/video_processor/api/__init__.py
@@ -0,0 +1 @@
 
--- a/video_processor/api/__init__.py
+++ b/video_processor/api/__init__.py
@@ -0,0 +1 @@
1 # Legacy API modules removed — use video_processor.providers instead.

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button