|
1
|
"""Enhanced action item detection from transcripts and diagrams.""" |
|
2
|
|
|
3
|
import logging |
|
4
|
import re |
|
5
|
from typing import List, Optional |
|
6
|
|
|
7
|
from video_processor.models import ActionItem, TranscriptSegment |
|
8
|
from video_processor.providers.manager import ProviderManager |
|
9
|
from video_processor.utils.json_parsing import parse_json_from_response |
|
10
|
|
|
11
|
logger = logging.getLogger(__name__) |
|
12
|
|
|
13
|
# Patterns that indicate action items in natural language |
|
14
|
_ACTION_PATTERNS = [ |
|
15
|
re.compile(r"\b(?:need|needs)\s+to\b", re.IGNORECASE), |
|
16
|
re.compile(r"\b(?:should|must|shall)\s+\w+", re.IGNORECASE), |
|
17
|
re.compile(r"\b(?:will|going\s+to)\s+\w+", re.IGNORECASE), |
|
18
|
re.compile(r"\b(?:action\s+item|todo|to-do|follow[\s-]?up)\b", re.IGNORECASE), |
|
19
|
re.compile(r"\b(?:assigned?\s+to|responsible\s+for)\b", re.IGNORECASE), |
|
20
|
re.compile(r"\b(?:deadline|due\s+(?:date|by))\b", re.IGNORECASE), |
|
21
|
re.compile(r"\b(?:let'?s|let\s+us)\s+\w+", re.IGNORECASE), |
|
22
|
re.compile(r"\b(?:make\s+sure|ensure)\b", re.IGNORECASE), |
|
23
|
re.compile(r"\b(?:can\s+you|could\s+you|please)\s+\w+", re.IGNORECASE), |
|
24
|
] |
|
25
|
|
|
26
|
|
|
27
|
class ActionDetector: |
|
28
|
"""Detects action items from transcripts using heuristics and LLM.""" |
|
29
|
|
|
30
|
def __init__(self, provider_manager: Optional[ProviderManager] = None): |
|
31
|
self.pm = provider_manager |
|
32
|
|
|
33
|
def detect_from_transcript( |
|
34
|
self, |
|
35
|
text: str, |
|
36
|
segments: Optional[List[TranscriptSegment]] = None, |
|
37
|
) -> List[ActionItem]: |
|
38
|
""" |
|
39
|
Detect action items from transcript text. |
|
40
|
|
|
41
|
Uses LLM extraction when available, falls back to pattern matching. |
|
42
|
Segments are used to attach timestamps. |
|
43
|
""" |
|
44
|
if self.pm: |
|
45
|
items = self._llm_extract(text) |
|
46
|
else: |
|
47
|
items = self._pattern_extract(text) |
|
48
|
|
|
49
|
# Attach timestamps from segments if available |
|
50
|
if segments and items: |
|
51
|
self._attach_timestamps(items, segments) |
|
52
|
|
|
53
|
return items |
|
54
|
|
|
55
|
def detect_from_diagrams( |
|
56
|
self, |
|
57
|
diagrams: list, |
|
58
|
) -> List[ActionItem]: |
|
59
|
""" |
|
60
|
Extract action items mentioned in diagram text content. |
|
61
|
|
|
62
|
Looks for action-oriented language in diagram text/elements. |
|
63
|
""" |
|
64
|
items: List[ActionItem] = [] |
|
65
|
|
|
66
|
for diagram in diagrams: |
|
67
|
text = "" |
|
68
|
if isinstance(diagram, dict): |
|
69
|
text = diagram.get("text_content", "") or "" |
|
70
|
elements = diagram.get("elements", []) |
|
71
|
else: |
|
72
|
text = getattr(diagram, "text_content", "") or "" |
|
73
|
elements = getattr(diagram, "elements", []) |
|
74
|
|
|
75
|
combined = text + " " + " ".join(str(e) for e in elements) |
|
76
|
if not combined.strip(): |
|
77
|
continue |
|
78
|
|
|
79
|
if self.pm: |
|
80
|
diagram_items = self._llm_extract(combined) |
|
81
|
else: |
|
82
|
diagram_items = self._pattern_extract(combined) |
|
83
|
|
|
84
|
for item in diagram_items: |
|
85
|
item.source = "diagram" |
|
86
|
items.extend(diagram_items) |
|
87
|
|
|
88
|
return items |
|
89
|
|
|
90
|
def merge_action_items( |
|
91
|
self, |
|
92
|
transcript_items: List[ActionItem], |
|
93
|
diagram_items: List[ActionItem], |
|
94
|
) -> List[ActionItem]: |
|
95
|
""" |
|
96
|
Merge action items from transcript and diagram sources. |
|
97
|
|
|
98
|
Deduplicates by checking for similar action text. |
|
99
|
""" |
|
100
|
merged: List[ActionItem] = list(transcript_items) |
|
101
|
existing_actions = {a.action.lower().strip() for a in merged} |
|
102
|
|
|
103
|
for item in diagram_items: |
|
104
|
normalized = item.action.lower().strip() |
|
105
|
if normalized not in existing_actions: |
|
106
|
merged.append(item) |
|
107
|
existing_actions.add(normalized) |
|
108
|
|
|
109
|
return merged |
|
110
|
|
|
111
|
def _llm_extract(self, text: str) -> List[ActionItem]: |
|
112
|
"""Extract action items using LLM.""" |
|
113
|
if not self.pm: |
|
114
|
return [] |
|
115
|
|
|
116
|
prompt = ( |
|
117
|
"Extract all action items, tasks, and commitments " |
|
118
|
"from the following text.\n\n" |
|
119
|
f"TEXT:\n{text[:8000]}\n\n" |
|
120
|
"Return a JSON array:\n" |
|
121
|
'[{"action": "...", "assignee": "...", "deadline": "...", ' |
|
122
|
'"priority": "...", "context": "..."}]\n\n' |
|
123
|
"Only include clear, actionable items. " |
|
124
|
"Set fields to null if not mentioned.\n" |
|
125
|
"Return ONLY the JSON array." |
|
126
|
) |
|
127
|
|
|
128
|
try: |
|
129
|
raw = self.pm.chat( |
|
130
|
[{"role": "user", "content": prompt}], |
|
131
|
temperature=0.3, |
|
132
|
) |
|
133
|
parsed = parse_json_from_response(raw) |
|
134
|
if isinstance(parsed, list): |
|
135
|
return [ |
|
136
|
ActionItem( |
|
137
|
action=item.get("action", ""), |
|
138
|
assignee=item.get("assignee"), |
|
139
|
deadline=item.get("deadline"), |
|
140
|
priority=item.get("priority"), |
|
141
|
context=item.get("context"), |
|
142
|
source="transcript", |
|
143
|
) |
|
144
|
for item in parsed |
|
145
|
if isinstance(item, dict) and item.get("action") |
|
146
|
] |
|
147
|
except Exception as e: |
|
148
|
logger.warning(f"LLM action extraction failed: {e}") |
|
149
|
|
|
150
|
return [] |
|
151
|
|
|
152
|
def _pattern_extract(self, text: str) -> List[ActionItem]: |
|
153
|
"""Extract action items using regex pattern matching.""" |
|
154
|
items: List[ActionItem] = [] |
|
155
|
sentences = re.split(r"[.!?]\s+", text) |
|
156
|
|
|
157
|
for sentence in sentences: |
|
158
|
sentence = sentence.strip() |
|
159
|
if not sentence or len(sentence) < 10: |
|
160
|
continue |
|
161
|
|
|
162
|
for pattern in _ACTION_PATTERNS: |
|
163
|
if pattern.search(sentence): |
|
164
|
items.append( |
|
165
|
ActionItem( |
|
166
|
action=sentence, |
|
167
|
source="transcript", |
|
168
|
) |
|
169
|
) |
|
170
|
break # One match per sentence is enough |
|
171
|
|
|
172
|
return items |
|
173
|
|
|
174
|
def _attach_timestamps( |
|
175
|
self, |
|
176
|
items: List[ActionItem], |
|
177
|
segments: List[TranscriptSegment], |
|
178
|
) -> None: |
|
179
|
"""Attach timestamps to action items by finding matching segments.""" |
|
180
|
for item in items: |
|
181
|
action_lower = item.action.lower() |
|
182
|
best_overlap = 0 |
|
183
|
best_segment = None |
|
184
|
|
|
185
|
for seg in segments: |
|
186
|
seg_lower = seg.text.lower() |
|
187
|
# Check word overlap |
|
188
|
action_words = set(action_lower.split()) |
|
189
|
seg_words = set(seg_lower.split()) |
|
190
|
overlap = len(action_words & seg_words) |
|
191
|
|
|
192
|
if overlap > best_overlap: |
|
193
|
best_overlap = overlap |
|
194
|
best_segment = seg |
|
195
|
|
|
196
|
if best_segment and best_overlap >= 3: |
|
197
|
if not item.context: |
|
198
|
item.context = f"at {best_segment.start:.0f}s" |
|
199
|
|