PlanOpticon

planopticon / video_processor / analyzers / action_detector.py
Source Blame History 198 lines
ccf32cc… leo 1 """Enhanced action item detection from transcripts and diagrams."""
ccf32cc… leo 2
ccf32cc… leo 3 import logging
ccf32cc… leo 4 import re
ccf32cc… leo 5 from typing import List, Optional
ccf32cc… leo 6
ccf32cc… leo 7 from video_processor.models import ActionItem, TranscriptSegment
ccf32cc… leo 8 from video_processor.providers.manager import ProviderManager
ccf32cc… leo 9 from video_processor.utils.json_parsing import parse_json_from_response
ccf32cc… leo 10
ccf32cc… leo 11 logger = logging.getLogger(__name__)
ccf32cc… leo 12
ccf32cc… leo 13 # Patterns that indicate action items in natural language
ccf32cc… leo 14 _ACTION_PATTERNS = [
ccf32cc… leo 15 re.compile(r"\b(?:need|needs)\s+to\b", re.IGNORECASE),
ccf32cc… leo 16 re.compile(r"\b(?:should|must|shall)\s+\w+", re.IGNORECASE),
ccf32cc… leo 17 re.compile(r"\b(?:will|going\s+to)\s+\w+", re.IGNORECASE),
ccf32cc… leo 18 re.compile(r"\b(?:action\s+item|todo|to-do|follow[\s-]?up)\b", re.IGNORECASE),
ccf32cc… leo 19 re.compile(r"\b(?:assigned?\s+to|responsible\s+for)\b", re.IGNORECASE),
ccf32cc… leo 20 re.compile(r"\b(?:deadline|due\s+(?:date|by))\b", re.IGNORECASE),
ccf32cc… leo 21 re.compile(r"\b(?:let'?s|let\s+us)\s+\w+", re.IGNORECASE),
ccf32cc… leo 22 re.compile(r"\b(?:make\s+sure|ensure)\b", re.IGNORECASE),
ccf32cc… leo 23 re.compile(r"\b(?:can\s+you|could\s+you|please)\s+\w+", re.IGNORECASE),
ccf32cc… leo 24 ]
ccf32cc… leo 25
ccf32cc… leo 26
ccf32cc… leo 27 class ActionDetector:
ccf32cc… leo 28 """Detects action items from transcripts using heuristics and LLM."""
ccf32cc… leo 29
ccf32cc… leo 30 def __init__(self, provider_manager: Optional[ProviderManager] = None):
ccf32cc… leo 31 self.pm = provider_manager
ccf32cc… leo 32
ccf32cc… leo 33 def detect_from_transcript(
ccf32cc… leo 34 self,
ccf32cc… leo 35 text: str,
ccf32cc… leo 36 segments: Optional[List[TranscriptSegment]] = None,
ccf32cc… leo 37 ) -> List[ActionItem]:
ccf32cc… leo 38 """
ccf32cc… leo 39 Detect action items from transcript text.
ccf32cc… leo 40
ccf32cc… leo 41 Uses LLM extraction when available, falls back to pattern matching.
ccf32cc… leo 42 Segments are used to attach timestamps.
ccf32cc… leo 43 """
ccf32cc… leo 44 if self.pm:
ccf32cc… leo 45 items = self._llm_extract(text)
ccf32cc… leo 46 else:
ccf32cc… leo 47 items = self._pattern_extract(text)
ccf32cc… leo 48
ccf32cc… leo 49 # Attach timestamps from segments if available
ccf32cc… leo 50 if segments and items:
ccf32cc… leo 51 self._attach_timestamps(items, segments)
ccf32cc… leo 52
ccf32cc… leo 53 return items
ccf32cc… leo 54
ccf32cc… leo 55 def detect_from_diagrams(
ccf32cc… leo 56 self,
ccf32cc… leo 57 diagrams: list,
ccf32cc… leo 58 ) -> List[ActionItem]:
ccf32cc… leo 59 """
ccf32cc… leo 60 Extract action items mentioned in diagram text content.
ccf32cc… leo 61
ccf32cc… leo 62 Looks for action-oriented language in diagram text/elements.
ccf32cc… leo 63 """
ccf32cc… leo 64 items: List[ActionItem] = []
ccf32cc… leo 65
ccf32cc… leo 66 for diagram in diagrams:
ccf32cc… leo 67 text = ""
ccf32cc… leo 68 if isinstance(diagram, dict):
ccf32cc… leo 69 text = diagram.get("text_content", "") or ""
ccf32cc… leo 70 elements = diagram.get("elements", [])
ccf32cc… leo 71 else:
ccf32cc… leo 72 text = getattr(diagram, "text_content", "") or ""
ccf32cc… leo 73 elements = getattr(diagram, "elements", [])
ccf32cc… leo 74
ccf32cc… leo 75 combined = text + " " + " ".join(str(e) for e in elements)
ccf32cc… leo 76 if not combined.strip():
ccf32cc… leo 77 continue
ccf32cc… leo 78
ccf32cc… leo 79 if self.pm:
ccf32cc… leo 80 diagram_items = self._llm_extract(combined)
ccf32cc… leo 81 else:
ccf32cc… leo 82 diagram_items = self._pattern_extract(combined)
ccf32cc… leo 83
ccf32cc… leo 84 for item in diagram_items:
ccf32cc… leo 85 item.source = "diagram"
ccf32cc… leo 86 items.extend(diagram_items)
ccf32cc… leo 87
ccf32cc… leo 88 return items
ccf32cc… leo 89
ccf32cc… leo 90 def merge_action_items(
ccf32cc… leo 91 self,
ccf32cc… leo 92 transcript_items: List[ActionItem],
ccf32cc… leo 93 diagram_items: List[ActionItem],
ccf32cc… leo 94 ) -> List[ActionItem]:
ccf32cc… leo 95 """
ccf32cc… leo 96 Merge action items from transcript and diagram sources.
ccf32cc… leo 97
ccf32cc… leo 98 Deduplicates by checking for similar action text.
ccf32cc… leo 99 """
ccf32cc… leo 100 merged: List[ActionItem] = list(transcript_items)
ccf32cc… leo 101 existing_actions = {a.action.lower().strip() for a in merged}
ccf32cc… leo 102
ccf32cc… leo 103 for item in diagram_items:
ccf32cc… leo 104 normalized = item.action.lower().strip()
ccf32cc… leo 105 if normalized not in existing_actions:
ccf32cc… leo 106 merged.append(item)
ccf32cc… leo 107 existing_actions.add(normalized)
ccf32cc… leo 108
ccf32cc… leo 109 return merged
ccf32cc… leo 110
ccf32cc… leo 111 def _llm_extract(self, text: str) -> List[ActionItem]:
ccf32cc… leo 112 """Extract action items using LLM."""
ccf32cc… leo 113 if not self.pm:
ccf32cc… leo 114 return []
ccf32cc… leo 115
ccf32cc… leo 116 prompt = (
ccf32cc… leo 117 "Extract all action items, tasks, and commitments "
ccf32cc… leo 118 "from the following text.\n\n"
ccf32cc… leo 119 f"TEXT:\n{text[:8000]}\n\n"
ccf32cc… leo 120 "Return a JSON array:\n"
ccf32cc… leo 121 '[{"action": "...", "assignee": "...", "deadline": "...", '
ccf32cc… leo 122 '"priority": "...", "context": "..."}]\n\n'
ccf32cc… leo 123 "Only include clear, actionable items. "
ccf32cc… leo 124 "Set fields to null if not mentioned.\n"
ccf32cc… leo 125 "Return ONLY the JSON array."
ccf32cc… leo 126 )
ccf32cc… leo 127
ccf32cc… leo 128 try:
ccf32cc… leo 129 raw = self.pm.chat(
ccf32cc… leo 130 [{"role": "user", "content": prompt}],
ccf32cc… leo 131 temperature=0.3,
ccf32cc… leo 132 )
ccf32cc… leo 133 parsed = parse_json_from_response(raw)
ccf32cc… leo 134 if isinstance(parsed, list):
ccf32cc… leo 135 return [
ccf32cc… leo 136 ActionItem(
ccf32cc… leo 137 action=item.get("action", ""),
ccf32cc… leo 138 assignee=item.get("assignee"),
ccf32cc… leo 139 deadline=item.get("deadline"),
ccf32cc… leo 140 priority=item.get("priority"),
ccf32cc… leo 141 context=item.get("context"),
ccf32cc… leo 142 source="transcript",
ccf32cc… leo 143 )
ccf32cc… leo 144 for item in parsed
ccf32cc… leo 145 if isinstance(item, dict) and item.get("action")
ccf32cc… leo 146 ]
ccf32cc… leo 147 except Exception as e:
ccf32cc… leo 148 logger.warning(f"LLM action extraction failed: {e}")
ccf32cc… leo 149
ccf32cc… leo 150 return []
ccf32cc… leo 151
ccf32cc… leo 152 def _pattern_extract(self, text: str) -> List[ActionItem]:
ccf32cc… leo 153 """Extract action items using regex pattern matching."""
ccf32cc… leo 154 items: List[ActionItem] = []
829e24a… leo 155 sentences = re.split(r"[.!?]\s+", text)
ccf32cc… leo 156
ccf32cc… leo 157 for sentence in sentences:
ccf32cc… leo 158 sentence = sentence.strip()
ccf32cc… leo 159 if not sentence or len(sentence) < 10:
ccf32cc… leo 160 continue
ccf32cc… leo 161
ccf32cc… leo 162 for pattern in _ACTION_PATTERNS:
ccf32cc… leo 163 if pattern.search(sentence):
829e24a… leo 164 items.append(
829e24a… leo 165 ActionItem(
829e24a… leo 166 action=sentence,
829e24a… leo 167 source="transcript",
829e24a… leo 168 )
829e24a… leo 169 )
ccf32cc… leo 170 break # One match per sentence is enough
ccf32cc… leo 171
ccf32cc… leo 172 return items
ccf32cc… leo 173
ccf32cc… leo 174 def _attach_timestamps(
ccf32cc… leo 175 self,
ccf32cc… leo 176 items: List[ActionItem],
ccf32cc… leo 177 segments: List[TranscriptSegment],
ccf32cc… leo 178 ) -> None:
ccf32cc… leo 179 """Attach timestamps to action items by finding matching segments."""
ccf32cc… leo 180 for item in items:
ccf32cc… leo 181 action_lower = item.action.lower()
ccf32cc… leo 182 best_overlap = 0
ccf32cc… leo 183 best_segment = None
ccf32cc… leo 184
ccf32cc… leo 185 for seg in segments:
ccf32cc… leo 186 seg_lower = seg.text.lower()
ccf32cc… leo 187 # Check word overlap
ccf32cc… leo 188 action_words = set(action_lower.split())
ccf32cc… leo 189 seg_words = set(seg_lower.split())
ccf32cc… leo 190 overlap = len(action_words & seg_words)
ccf32cc… leo 191
ccf32cc… leo 192 if overlap > best_overlap:
ccf32cc… leo 193 best_overlap = overlap
ccf32cc… leo 194 best_segment = seg
ccf32cc… leo 195
ccf32cc… leo 196 if best_segment and best_overlap >= 3:
ccf32cc… leo 197 if not item.context:
ccf32cc… leo 198 item.context = f"at {best_segment.start:.0f}s"

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button