PlanOpticon

planopticon / video_processor / analyzers / action_detector.py

Source Blame History 198 lines

ccf32cc…	leo	1	"""Enhanced action item detection from transcripts and diagrams."""
ccf32cc…	leo	2
ccf32cc…	leo	3	import logging
ccf32cc…	leo	4	import re
ccf32cc…	leo	5	from typing import List, Optional
ccf32cc…	leo	6
ccf32cc…	leo	7	from video_processor.models import ActionItem, TranscriptSegment
ccf32cc…	leo	8	from video_processor.providers.manager import ProviderManager
ccf32cc…	leo	9	from video_processor.utils.json_parsing import parse_json_from_response
ccf32cc…	leo	10
ccf32cc…	leo	11	logger = logging.getLogger(__name__)
ccf32cc…	leo	12
ccf32cc…	leo	13	# Patterns that indicate action items in natural language
ccf32cc…	leo	14	_ACTION_PATTERNS = [
ccf32cc…	leo	15	re.compile(r"\b(?:need\|needs)\s+to\b", re.IGNORECASE),
ccf32cc…	leo	16	re.compile(r"\b(?:should\|must\|shall)\s+\w+", re.IGNORECASE),
ccf32cc…	leo	17	re.compile(r"\b(?:will\|going\s+to)\s+\w+", re.IGNORECASE),
ccf32cc…	leo	18	re.compile(r"\b(?:action\s+item\|todo\|to-do\|follow[\s-]?up)\b", re.IGNORECASE),
ccf32cc…	leo	19	re.compile(r"\b(?:assigned?\s+to\|responsible\s+for)\b", re.IGNORECASE),
ccf32cc…	leo	20	re.compile(r"\b(?:deadline\|due\s+(?:date\|by))\b", re.IGNORECASE),
ccf32cc…	leo	21	re.compile(r"\b(?:let'?s\|let\s+us)\s+\w+", re.IGNORECASE),
ccf32cc…	leo	22	re.compile(r"\b(?:make\s+sure\|ensure)\b", re.IGNORECASE),
ccf32cc…	leo	23	re.compile(r"\b(?:can\s+you\|could\s+you\|please)\s+\w+", re.IGNORECASE),
ccf32cc…	leo	24	]
ccf32cc…	leo	25
ccf32cc…	leo	26
ccf32cc…	leo	27	class ActionDetector:
ccf32cc…	leo	28	"""Detects action items from transcripts using heuristics and LLM."""
ccf32cc…	leo	29
ccf32cc…	leo	30	def __init__(self, provider_manager: Optional[ProviderManager] = None):
ccf32cc…	leo	31	self.pm = provider_manager
ccf32cc…	leo	32
ccf32cc…	leo	33	def detect_from_transcript(
ccf32cc…	leo	34	self,
ccf32cc…	leo	35	text: str,
ccf32cc…	leo	36	segments: Optional[List[TranscriptSegment]] = None,
ccf32cc…	leo	37	) -> List[ActionItem]:
ccf32cc…	leo	38	"""
ccf32cc…	leo	39	Detect action items from transcript text.
ccf32cc…	leo	40
ccf32cc…	leo	41	Uses LLM extraction when available, falls back to pattern matching.
ccf32cc…	leo	42	Segments are used to attach timestamps.
ccf32cc…	leo	43	"""
ccf32cc…	leo	44	if self.pm:
ccf32cc…	leo	45	items = self._llm_extract(text)
ccf32cc…	leo	46	else:
ccf32cc…	leo	47	items = self._pattern_extract(text)
ccf32cc…	leo	48
ccf32cc…	leo	49	# Attach timestamps from segments if available
ccf32cc…	leo	50	if segments and items:
ccf32cc…	leo	51	self._attach_timestamps(items, segments)
ccf32cc…	leo	52
ccf32cc…	leo	53	return items
ccf32cc…	leo	54
ccf32cc…	leo	55	def detect_from_diagrams(
ccf32cc…	leo	56	self,
ccf32cc…	leo	57	diagrams: list,
ccf32cc…	leo	58	) -> List[ActionItem]:
ccf32cc…	leo	59	"""
ccf32cc…	leo	60	Extract action items mentioned in diagram text content.
ccf32cc…	leo	61
ccf32cc…	leo	62	Looks for action-oriented language in diagram text/elements.
ccf32cc…	leo	63	"""
ccf32cc…	leo	64	items: List[ActionItem] = []
ccf32cc…	leo	65
ccf32cc…	leo	66	for diagram in diagrams:
ccf32cc…	leo	67	text = ""
ccf32cc…	leo	68	if isinstance(diagram, dict):
ccf32cc…	leo	69	text = diagram.get("text_content", "") or ""
ccf32cc…	leo	70	elements = diagram.get("elements", [])
ccf32cc…	leo	71	else:
ccf32cc…	leo	72	text = getattr(diagram, "text_content", "") or ""
ccf32cc…	leo	73	elements = getattr(diagram, "elements", [])
ccf32cc…	leo	74
ccf32cc…	leo	75	combined = text + " " + " ".join(str(e) for e in elements)
ccf32cc…	leo	76	if not combined.strip():
ccf32cc…	leo	77	continue
ccf32cc…	leo	78
ccf32cc…	leo	79	if self.pm:
ccf32cc…	leo	80	diagram_items = self._llm_extract(combined)
ccf32cc…	leo	81	else:
ccf32cc…	leo	82	diagram_items = self._pattern_extract(combined)
ccf32cc…	leo	83
ccf32cc…	leo	84	for item in diagram_items:
ccf32cc…	leo	85	item.source = "diagram"
ccf32cc…	leo	86	items.extend(diagram_items)
ccf32cc…	leo	87
ccf32cc…	leo	88	return items
ccf32cc…	leo	89
ccf32cc…	leo	90	def merge_action_items(
ccf32cc…	leo	91	self,
ccf32cc…	leo	92	transcript_items: List[ActionItem],
ccf32cc…	leo	93	diagram_items: List[ActionItem],
ccf32cc…	leo	94	) -> List[ActionItem]:
ccf32cc…	leo	95	"""
ccf32cc…	leo	96	Merge action items from transcript and diagram sources.
ccf32cc…	leo	97
ccf32cc…	leo	98	Deduplicates by checking for similar action text.
ccf32cc…	leo	99	"""
ccf32cc…	leo	100	merged: List[ActionItem] = list(transcript_items)
ccf32cc…	leo	101	existing_actions = {a.action.lower().strip() for a in merged}
ccf32cc…	leo	102
ccf32cc…	leo	103	for item in diagram_items:
ccf32cc…	leo	104	normalized = item.action.lower().strip()
ccf32cc…	leo	105	if normalized not in existing_actions:
ccf32cc…	leo	106	merged.append(item)
ccf32cc…	leo	107	existing_actions.add(normalized)
ccf32cc…	leo	108
ccf32cc…	leo	109	return merged
ccf32cc…	leo	110
ccf32cc…	leo	111	def _llm_extract(self, text: str) -> List[ActionItem]:
ccf32cc…	leo	112	"""Extract action items using LLM."""
ccf32cc…	leo	113	if not self.pm:
ccf32cc…	leo	114	return []
ccf32cc…	leo	115
ccf32cc…	leo	116	prompt = (
ccf32cc…	leo	117	"Extract all action items, tasks, and commitments "
ccf32cc…	leo	118	"from the following text.\n\n"
ccf32cc…	leo	119	f"TEXT:\n{text[:8000]}\n\n"
ccf32cc…	leo	120	"Return a JSON array:\n"
ccf32cc…	leo	121	'[{"action": "...", "assignee": "...", "deadline": "...", '
ccf32cc…	leo	122	'"priority": "...", "context": "..."}]\n\n'
ccf32cc…	leo	123	"Only include clear, actionable items. "
ccf32cc…	leo	124	"Set fields to null if not mentioned.\n"
ccf32cc…	leo	125	"Return ONLY the JSON array."
ccf32cc…	leo	126	)
ccf32cc…	leo	127
ccf32cc…	leo	128	try:
ccf32cc…	leo	129	raw = self.pm.chat(
ccf32cc…	leo	130	[{"role": "user", "content": prompt}],
ccf32cc…	leo	131	temperature=0.3,
ccf32cc…	leo	132	)
ccf32cc…	leo	133	parsed = parse_json_from_response(raw)
ccf32cc…	leo	134	if isinstance(parsed, list):
ccf32cc…	leo	135	return [
ccf32cc…	leo	136	ActionItem(
ccf32cc…	leo	137	action=item.get("action", ""),
ccf32cc…	leo	138	assignee=item.get("assignee"),
ccf32cc…	leo	139	deadline=item.get("deadline"),
ccf32cc…	leo	140	priority=item.get("priority"),
ccf32cc…	leo	141	context=item.get("context"),
ccf32cc…	leo	142	source="transcript",
ccf32cc…	leo	143	)
ccf32cc…	leo	144	for item in parsed
ccf32cc…	leo	145	if isinstance(item, dict) and item.get("action")
ccf32cc…	leo	146	]
ccf32cc…	leo	147	except Exception as e:
ccf32cc…	leo	148	logger.warning(f"LLM action extraction failed: {e}")
ccf32cc…	leo	149
ccf32cc…	leo	150	return []
ccf32cc…	leo	151
ccf32cc…	leo	152	def _pattern_extract(self, text: str) -> List[ActionItem]:
ccf32cc…	leo	153	"""Extract action items using regex pattern matching."""
ccf32cc…	leo	154	items: List[ActionItem] = []
829e24a…	leo	155	sentences = re.split(r"[.!?]\s+", text)
ccf32cc…	leo	156
ccf32cc…	leo	157	for sentence in sentences:
ccf32cc…	leo	158	sentence = sentence.strip()
ccf32cc…	leo	159	if not sentence or len(sentence) < 10:
ccf32cc…	leo	160	continue
ccf32cc…	leo	161
ccf32cc…	leo	162	for pattern in _ACTION_PATTERNS:
ccf32cc…	leo	163	if pattern.search(sentence):
829e24a…	leo	164	items.append(
829e24a…	leo	165	ActionItem(
829e24a…	leo	166	action=sentence,
829e24a…	leo	167	source="transcript",
829e24a…	leo	168	)
829e24a…	leo	169	)
ccf32cc…	leo	170	break # One match per sentence is enough
ccf32cc…	leo	171
ccf32cc…	leo	172	return items
ccf32cc…	leo	173
ccf32cc…	leo	174	def _attach_timestamps(
ccf32cc…	leo	175	self,
ccf32cc…	leo	176	items: List[ActionItem],
ccf32cc…	leo	177	segments: List[TranscriptSegment],
ccf32cc…	leo	178	) -> None:
ccf32cc…	leo	179	"""Attach timestamps to action items by finding matching segments."""
ccf32cc…	leo	180	for item in items:
ccf32cc…	leo	181	action_lower = item.action.lower()
ccf32cc…	leo	182	best_overlap = 0
ccf32cc…	leo	183	best_segment = None
ccf32cc…	leo	184
ccf32cc…	leo	185	for seg in segments:
ccf32cc…	leo	186	seg_lower = seg.text.lower()
ccf32cc…	leo	187	# Check word overlap
ccf32cc…	leo	188	action_words = set(action_lower.split())
ccf32cc…	leo	189	seg_words = set(seg_lower.split())
ccf32cc…	leo	190	overlap = len(action_words & seg_words)
ccf32cc…	leo	191
ccf32cc…	leo	192	if overlap > best_overlap:
ccf32cc…	leo	193	best_overlap = overlap
ccf32cc…	leo	194	best_segment = seg
ccf32cc…	leo	195
ccf32cc…	leo	196	if best_segment and best_overlap >= 3:
ccf32cc…	leo	197	if not item.context:
ccf32cc…	leo	198	item.context = f"at {best_segment.start:.0f}s"

PlanOpticon

Keyboard Shortcuts