PlanOpticon

planopticon / video_processor / analyzers / action_detector.py
Blame History Raw 199 lines
1
"""Enhanced action item detection from transcripts and diagrams."""
2
3
import logging
4
import re
5
from typing import List, Optional
6
7
from video_processor.models import ActionItem, TranscriptSegment
8
from video_processor.providers.manager import ProviderManager
9
from video_processor.utils.json_parsing import parse_json_from_response
10
11
logger = logging.getLogger(__name__)
12
13
# Patterns that indicate action items in natural language
14
_ACTION_PATTERNS = [
15
re.compile(r"\b(?:need|needs)\s+to\b", re.IGNORECASE),
16
re.compile(r"\b(?:should|must|shall)\s+\w+", re.IGNORECASE),
17
re.compile(r"\b(?:will|going\s+to)\s+\w+", re.IGNORECASE),
18
re.compile(r"\b(?:action\s+item|todo|to-do|follow[\s-]?up)\b", re.IGNORECASE),
19
re.compile(r"\b(?:assigned?\s+to|responsible\s+for)\b", re.IGNORECASE),
20
re.compile(r"\b(?:deadline|due\s+(?:date|by))\b", re.IGNORECASE),
21
re.compile(r"\b(?:let'?s|let\s+us)\s+\w+", re.IGNORECASE),
22
re.compile(r"\b(?:make\s+sure|ensure)\b", re.IGNORECASE),
23
re.compile(r"\b(?:can\s+you|could\s+you|please)\s+\w+", re.IGNORECASE),
24
]
25
26
27
class ActionDetector:
28
"""Detects action items from transcripts using heuristics and LLM."""
29
30
def __init__(self, provider_manager: Optional[ProviderManager] = None):
31
self.pm = provider_manager
32
33
def detect_from_transcript(
34
self,
35
text: str,
36
segments: Optional[List[TranscriptSegment]] = None,
37
) -> List[ActionItem]:
38
"""
39
Detect action items from transcript text.
40
41
Uses LLM extraction when available, falls back to pattern matching.
42
Segments are used to attach timestamps.
43
"""
44
if self.pm:
45
items = self._llm_extract(text)
46
else:
47
items = self._pattern_extract(text)
48
49
# Attach timestamps from segments if available
50
if segments and items:
51
self._attach_timestamps(items, segments)
52
53
return items
54
55
def detect_from_diagrams(
56
self,
57
diagrams: list,
58
) -> List[ActionItem]:
59
"""
60
Extract action items mentioned in diagram text content.
61
62
Looks for action-oriented language in diagram text/elements.
63
"""
64
items: List[ActionItem] = []
65
66
for diagram in diagrams:
67
text = ""
68
if isinstance(diagram, dict):
69
text = diagram.get("text_content", "") or ""
70
elements = diagram.get("elements", [])
71
else:
72
text = getattr(diagram, "text_content", "") or ""
73
elements = getattr(diagram, "elements", [])
74
75
combined = text + " " + " ".join(str(e) for e in elements)
76
if not combined.strip():
77
continue
78
79
if self.pm:
80
diagram_items = self._llm_extract(combined)
81
else:
82
diagram_items = self._pattern_extract(combined)
83
84
for item in diagram_items:
85
item.source = "diagram"
86
items.extend(diagram_items)
87
88
return items
89
90
def merge_action_items(
91
self,
92
transcript_items: List[ActionItem],
93
diagram_items: List[ActionItem],
94
) -> List[ActionItem]:
95
"""
96
Merge action items from transcript and diagram sources.
97
98
Deduplicates by checking for similar action text.
99
"""
100
merged: List[ActionItem] = list(transcript_items)
101
existing_actions = {a.action.lower().strip() for a in merged}
102
103
for item in diagram_items:
104
normalized = item.action.lower().strip()
105
if normalized not in existing_actions:
106
merged.append(item)
107
existing_actions.add(normalized)
108
109
return merged
110
111
def _llm_extract(self, text: str) -> List[ActionItem]:
112
"""Extract action items using LLM."""
113
if not self.pm:
114
return []
115
116
prompt = (
117
"Extract all action items, tasks, and commitments "
118
"from the following text.\n\n"
119
f"TEXT:\n{text[:8000]}\n\n"
120
"Return a JSON array:\n"
121
'[{"action": "...", "assignee": "...", "deadline": "...", '
122
'"priority": "...", "context": "..."}]\n\n'
123
"Only include clear, actionable items. "
124
"Set fields to null if not mentioned.\n"
125
"Return ONLY the JSON array."
126
)
127
128
try:
129
raw = self.pm.chat(
130
[{"role": "user", "content": prompt}],
131
temperature=0.3,
132
)
133
parsed = parse_json_from_response(raw)
134
if isinstance(parsed, list):
135
return [
136
ActionItem(
137
action=item.get("action", ""),
138
assignee=item.get("assignee"),
139
deadline=item.get("deadline"),
140
priority=item.get("priority"),
141
context=item.get("context"),
142
source="transcript",
143
)
144
for item in parsed
145
if isinstance(item, dict) and item.get("action")
146
]
147
except Exception as e:
148
logger.warning(f"LLM action extraction failed: {e}")
149
150
return []
151
152
def _pattern_extract(self, text: str) -> List[ActionItem]:
153
"""Extract action items using regex pattern matching."""
154
items: List[ActionItem] = []
155
sentences = re.split(r"[.!?]\s+", text)
156
157
for sentence in sentences:
158
sentence = sentence.strip()
159
if not sentence or len(sentence) < 10:
160
continue
161
162
for pattern in _ACTION_PATTERNS:
163
if pattern.search(sentence):
164
items.append(
165
ActionItem(
166
action=sentence,
167
source="transcript",
168
)
169
)
170
break # One match per sentence is enough
171
172
return items
173
174
def _attach_timestamps(
175
self,
176
items: List[ActionItem],
177
segments: List[TranscriptSegment],
178
) -> None:
179
"""Attach timestamps to action items by finding matching segments."""
180
for item in items:
181
action_lower = item.action.lower()
182
best_overlap = 0
183
best_segment = None
184
185
for seg in segments:
186
seg_lower = seg.text.lower()
187
# Check word overlap
188
action_words = set(action_lower.split())
189
seg_words = set(seg_lower.split())
190
overlap = len(action_words & seg_words)
191
192
if overlap > best_overlap:
193
best_overlap = overlap
194
best_segment = seg
195
196
if best_segment and best_overlap >= 3:
197
if not item.context:
198
item.context = f"at {best_segment.start:.0f}s"
199

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button