|
ccf32cc…
|
leo
|
1 |
"""Enhanced action item detection from transcripts and diagrams.""" |
|
ccf32cc…
|
leo
|
2 |
|
|
ccf32cc…
|
leo
|
3 |
import logging |
|
ccf32cc…
|
leo
|
4 |
import re |
|
ccf32cc…
|
leo
|
5 |
from typing import List, Optional |
|
ccf32cc…
|
leo
|
6 |
|
|
ccf32cc…
|
leo
|
7 |
from video_processor.models import ActionItem, TranscriptSegment |
|
ccf32cc…
|
leo
|
8 |
from video_processor.providers.manager import ProviderManager |
|
ccf32cc…
|
leo
|
9 |
from video_processor.utils.json_parsing import parse_json_from_response |
|
ccf32cc…
|
leo
|
10 |
|
|
ccf32cc…
|
leo
|
11 |
logger = logging.getLogger(__name__) |
|
ccf32cc…
|
leo
|
12 |
|
|
ccf32cc…
|
leo
|
13 |
# Patterns that indicate action items in natural language |
|
ccf32cc…
|
leo
|
14 |
_ACTION_PATTERNS = [ |
|
ccf32cc…
|
leo
|
15 |
re.compile(r"\b(?:need|needs)\s+to\b", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
16 |
re.compile(r"\b(?:should|must|shall)\s+\w+", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
17 |
re.compile(r"\b(?:will|going\s+to)\s+\w+", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
18 |
re.compile(r"\b(?:action\s+item|todo|to-do|follow[\s-]?up)\b", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
19 |
re.compile(r"\b(?:assigned?\s+to|responsible\s+for)\b", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
20 |
re.compile(r"\b(?:deadline|due\s+(?:date|by))\b", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
21 |
re.compile(r"\b(?:let'?s|let\s+us)\s+\w+", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
22 |
re.compile(r"\b(?:make\s+sure|ensure)\b", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
23 |
re.compile(r"\b(?:can\s+you|could\s+you|please)\s+\w+", re.IGNORECASE), |
|
ccf32cc…
|
leo
|
24 |
] |
|
ccf32cc…
|
leo
|
25 |
|
|
ccf32cc…
|
leo
|
26 |
|
|
ccf32cc…
|
leo
|
27 |
class ActionDetector: |
|
ccf32cc…
|
leo
|
28 |
"""Detects action items from transcripts using heuristics and LLM.""" |
|
ccf32cc…
|
leo
|
29 |
|
|
ccf32cc…
|
leo
|
30 |
def __init__(self, provider_manager: Optional[ProviderManager] = None): |
|
ccf32cc…
|
leo
|
31 |
self.pm = provider_manager |
|
ccf32cc…
|
leo
|
32 |
|
|
ccf32cc…
|
leo
|
33 |
def detect_from_transcript( |
|
ccf32cc…
|
leo
|
34 |
self, |
|
ccf32cc…
|
leo
|
35 |
text: str, |
|
ccf32cc…
|
leo
|
36 |
segments: Optional[List[TranscriptSegment]] = None, |
|
ccf32cc…
|
leo
|
37 |
) -> List[ActionItem]: |
|
ccf32cc…
|
leo
|
38 |
""" |
|
ccf32cc…
|
leo
|
39 |
Detect action items from transcript text. |
|
ccf32cc…
|
leo
|
40 |
|
|
ccf32cc…
|
leo
|
41 |
Uses LLM extraction when available, falls back to pattern matching. |
|
ccf32cc…
|
leo
|
42 |
Segments are used to attach timestamps. |
|
ccf32cc…
|
leo
|
43 |
""" |
|
ccf32cc…
|
leo
|
44 |
if self.pm: |
|
ccf32cc…
|
leo
|
45 |
items = self._llm_extract(text) |
|
ccf32cc…
|
leo
|
46 |
else: |
|
ccf32cc…
|
leo
|
47 |
items = self._pattern_extract(text) |
|
ccf32cc…
|
leo
|
48 |
|
|
ccf32cc…
|
leo
|
49 |
# Attach timestamps from segments if available |
|
ccf32cc…
|
leo
|
50 |
if segments and items: |
|
ccf32cc…
|
leo
|
51 |
self._attach_timestamps(items, segments) |
|
ccf32cc…
|
leo
|
52 |
|
|
ccf32cc…
|
leo
|
53 |
return items |
|
ccf32cc…
|
leo
|
54 |
|
|
ccf32cc…
|
leo
|
55 |
def detect_from_diagrams( |
|
ccf32cc…
|
leo
|
56 |
self, |
|
ccf32cc…
|
leo
|
57 |
diagrams: list, |
|
ccf32cc…
|
leo
|
58 |
) -> List[ActionItem]: |
|
ccf32cc…
|
leo
|
59 |
""" |
|
ccf32cc…
|
leo
|
60 |
Extract action items mentioned in diagram text content. |
|
ccf32cc…
|
leo
|
61 |
|
|
ccf32cc…
|
leo
|
62 |
Looks for action-oriented language in diagram text/elements. |
|
ccf32cc…
|
leo
|
63 |
""" |
|
ccf32cc…
|
leo
|
64 |
items: List[ActionItem] = [] |
|
ccf32cc…
|
leo
|
65 |
|
|
ccf32cc…
|
leo
|
66 |
for diagram in diagrams: |
|
ccf32cc…
|
leo
|
67 |
text = "" |
|
ccf32cc…
|
leo
|
68 |
if isinstance(diagram, dict): |
|
ccf32cc…
|
leo
|
69 |
text = diagram.get("text_content", "") or "" |
|
ccf32cc…
|
leo
|
70 |
elements = diagram.get("elements", []) |
|
ccf32cc…
|
leo
|
71 |
else: |
|
ccf32cc…
|
leo
|
72 |
text = getattr(diagram, "text_content", "") or "" |
|
ccf32cc…
|
leo
|
73 |
elements = getattr(diagram, "elements", []) |
|
ccf32cc…
|
leo
|
74 |
|
|
ccf32cc…
|
leo
|
75 |
combined = text + " " + " ".join(str(e) for e in elements) |
|
ccf32cc…
|
leo
|
76 |
if not combined.strip(): |
|
ccf32cc…
|
leo
|
77 |
continue |
|
ccf32cc…
|
leo
|
78 |
|
|
ccf32cc…
|
leo
|
79 |
if self.pm: |
|
ccf32cc…
|
leo
|
80 |
diagram_items = self._llm_extract(combined) |
|
ccf32cc…
|
leo
|
81 |
else: |
|
ccf32cc…
|
leo
|
82 |
diagram_items = self._pattern_extract(combined) |
|
ccf32cc…
|
leo
|
83 |
|
|
ccf32cc…
|
leo
|
84 |
for item in diagram_items: |
|
ccf32cc…
|
leo
|
85 |
item.source = "diagram" |
|
ccf32cc…
|
leo
|
86 |
items.extend(diagram_items) |
|
ccf32cc…
|
leo
|
87 |
|
|
ccf32cc…
|
leo
|
88 |
return items |
|
ccf32cc…
|
leo
|
89 |
|
|
ccf32cc…
|
leo
|
90 |
def merge_action_items( |
|
ccf32cc…
|
leo
|
91 |
self, |
|
ccf32cc…
|
leo
|
92 |
transcript_items: List[ActionItem], |
|
ccf32cc…
|
leo
|
93 |
diagram_items: List[ActionItem], |
|
ccf32cc…
|
leo
|
94 |
) -> List[ActionItem]: |
|
ccf32cc…
|
leo
|
95 |
""" |
|
ccf32cc…
|
leo
|
96 |
Merge action items from transcript and diagram sources. |
|
ccf32cc…
|
leo
|
97 |
|
|
ccf32cc…
|
leo
|
98 |
Deduplicates by checking for similar action text. |
|
ccf32cc…
|
leo
|
99 |
""" |
|
ccf32cc…
|
leo
|
100 |
merged: List[ActionItem] = list(transcript_items) |
|
ccf32cc…
|
leo
|
101 |
existing_actions = {a.action.lower().strip() for a in merged} |
|
ccf32cc…
|
leo
|
102 |
|
|
ccf32cc…
|
leo
|
103 |
for item in diagram_items: |
|
ccf32cc…
|
leo
|
104 |
normalized = item.action.lower().strip() |
|
ccf32cc…
|
leo
|
105 |
if normalized not in existing_actions: |
|
ccf32cc…
|
leo
|
106 |
merged.append(item) |
|
ccf32cc…
|
leo
|
107 |
existing_actions.add(normalized) |
|
ccf32cc…
|
leo
|
108 |
|
|
ccf32cc…
|
leo
|
109 |
return merged |
|
ccf32cc…
|
leo
|
110 |
|
|
ccf32cc…
|
leo
|
111 |
def _llm_extract(self, text: str) -> List[ActionItem]: |
|
ccf32cc…
|
leo
|
112 |
"""Extract action items using LLM.""" |
|
ccf32cc…
|
leo
|
113 |
if not self.pm: |
|
ccf32cc…
|
leo
|
114 |
return [] |
|
ccf32cc…
|
leo
|
115 |
|
|
ccf32cc…
|
leo
|
116 |
prompt = ( |
|
ccf32cc…
|
leo
|
117 |
"Extract all action items, tasks, and commitments " |
|
ccf32cc…
|
leo
|
118 |
"from the following text.\n\n" |
|
ccf32cc…
|
leo
|
119 |
f"TEXT:\n{text[:8000]}\n\n" |
|
ccf32cc…
|
leo
|
120 |
"Return a JSON array:\n" |
|
ccf32cc…
|
leo
|
121 |
'[{"action": "...", "assignee": "...", "deadline": "...", ' |
|
ccf32cc…
|
leo
|
122 |
'"priority": "...", "context": "..."}]\n\n' |
|
ccf32cc…
|
leo
|
123 |
"Only include clear, actionable items. " |
|
ccf32cc…
|
leo
|
124 |
"Set fields to null if not mentioned.\n" |
|
ccf32cc…
|
leo
|
125 |
"Return ONLY the JSON array." |
|
ccf32cc…
|
leo
|
126 |
) |
|
ccf32cc…
|
leo
|
127 |
|
|
ccf32cc…
|
leo
|
128 |
try: |
|
ccf32cc…
|
leo
|
129 |
raw = self.pm.chat( |
|
ccf32cc…
|
leo
|
130 |
[{"role": "user", "content": prompt}], |
|
ccf32cc…
|
leo
|
131 |
temperature=0.3, |
|
ccf32cc…
|
leo
|
132 |
) |
|
ccf32cc…
|
leo
|
133 |
parsed = parse_json_from_response(raw) |
|
ccf32cc…
|
leo
|
134 |
if isinstance(parsed, list): |
|
ccf32cc…
|
leo
|
135 |
return [ |
|
ccf32cc…
|
leo
|
136 |
ActionItem( |
|
ccf32cc…
|
leo
|
137 |
action=item.get("action", ""), |
|
ccf32cc…
|
leo
|
138 |
assignee=item.get("assignee"), |
|
ccf32cc…
|
leo
|
139 |
deadline=item.get("deadline"), |
|
ccf32cc…
|
leo
|
140 |
priority=item.get("priority"), |
|
ccf32cc…
|
leo
|
141 |
context=item.get("context"), |
|
ccf32cc…
|
leo
|
142 |
source="transcript", |
|
ccf32cc…
|
leo
|
143 |
) |
|
ccf32cc…
|
leo
|
144 |
for item in parsed |
|
ccf32cc…
|
leo
|
145 |
if isinstance(item, dict) and item.get("action") |
|
ccf32cc…
|
leo
|
146 |
] |
|
ccf32cc…
|
leo
|
147 |
except Exception as e: |
|
ccf32cc…
|
leo
|
148 |
logger.warning(f"LLM action extraction failed: {e}") |
|
ccf32cc…
|
leo
|
149 |
|
|
ccf32cc…
|
leo
|
150 |
return [] |
|
ccf32cc…
|
leo
|
151 |
|
|
ccf32cc…
|
leo
|
152 |
def _pattern_extract(self, text: str) -> List[ActionItem]: |
|
ccf32cc…
|
leo
|
153 |
"""Extract action items using regex pattern matching.""" |
|
ccf32cc…
|
leo
|
154 |
items: List[ActionItem] = [] |
|
829e24a…
|
leo
|
155 |
sentences = re.split(r"[.!?]\s+", text) |
|
ccf32cc…
|
leo
|
156 |
|
|
ccf32cc…
|
leo
|
157 |
for sentence in sentences: |
|
ccf32cc…
|
leo
|
158 |
sentence = sentence.strip() |
|
ccf32cc…
|
leo
|
159 |
if not sentence or len(sentence) < 10: |
|
ccf32cc…
|
leo
|
160 |
continue |
|
ccf32cc…
|
leo
|
161 |
|
|
ccf32cc…
|
leo
|
162 |
for pattern in _ACTION_PATTERNS: |
|
ccf32cc…
|
leo
|
163 |
if pattern.search(sentence): |
|
829e24a…
|
leo
|
164 |
items.append( |
|
829e24a…
|
leo
|
165 |
ActionItem( |
|
829e24a…
|
leo
|
166 |
action=sentence, |
|
829e24a…
|
leo
|
167 |
source="transcript", |
|
829e24a…
|
leo
|
168 |
) |
|
829e24a…
|
leo
|
169 |
) |
|
ccf32cc…
|
leo
|
170 |
break # One match per sentence is enough |
|
ccf32cc…
|
leo
|
171 |
|
|
ccf32cc…
|
leo
|
172 |
return items |
|
ccf32cc…
|
leo
|
173 |
|
|
ccf32cc…
|
leo
|
174 |
def _attach_timestamps( |
|
ccf32cc…
|
leo
|
175 |
self, |
|
ccf32cc…
|
leo
|
176 |
items: List[ActionItem], |
|
ccf32cc…
|
leo
|
177 |
segments: List[TranscriptSegment], |
|
ccf32cc…
|
leo
|
178 |
) -> None: |
|
ccf32cc…
|
leo
|
179 |
"""Attach timestamps to action items by finding matching segments.""" |
|
ccf32cc…
|
leo
|
180 |
for item in items: |
|
ccf32cc…
|
leo
|
181 |
action_lower = item.action.lower() |
|
ccf32cc…
|
leo
|
182 |
best_overlap = 0 |
|
ccf32cc…
|
leo
|
183 |
best_segment = None |
|
ccf32cc…
|
leo
|
184 |
|
|
ccf32cc…
|
leo
|
185 |
for seg in segments: |
|
ccf32cc…
|
leo
|
186 |
seg_lower = seg.text.lower() |
|
ccf32cc…
|
leo
|
187 |
# Check word overlap |
|
ccf32cc…
|
leo
|
188 |
action_words = set(action_lower.split()) |
|
ccf32cc…
|
leo
|
189 |
seg_words = set(seg_lower.split()) |
|
ccf32cc…
|
leo
|
190 |
overlap = len(action_words & seg_words) |
|
ccf32cc…
|
leo
|
191 |
|
|
ccf32cc…
|
leo
|
192 |
if overlap > best_overlap: |
|
ccf32cc…
|
leo
|
193 |
best_overlap = overlap |
|
ccf32cc…
|
leo
|
194 |
best_segment = seg |
|
ccf32cc…
|
leo
|
195 |
|
|
ccf32cc…
|
leo
|
196 |
if best_segment and best_overlap >= 3: |
|
ccf32cc…
|
leo
|
197 |
if not item.context: |
|
ccf32cc…
|
leo
|
198 |
item.context = f"at {best_segment.start:.0f}s" |