|
9b34c98…
|
leo
|
1 |
"""Agent orchestrator — intelligent, adaptive video processing.""" |
|
9b34c98…
|
leo
|
2 |
|
|
9b34c98…
|
leo
|
3 |
import json |
|
9b34c98…
|
leo
|
4 |
import logging |
|
9b34c98…
|
leo
|
5 |
import time |
|
9b34c98…
|
leo
|
6 |
from pathlib import Path |
|
9b34c98…
|
leo
|
7 |
from typing import Any, Dict, List, Optional |
|
9b34c98…
|
leo
|
8 |
|
|
9b34c98…
|
leo
|
9 |
from video_processor.models import ( |
|
829e24a…
|
leo
|
10 |
ProcessingStats, |
|
9b34c98…
|
leo
|
11 |
VideoManifest, |
|
9b34c98…
|
leo
|
12 |
VideoMetadata, |
|
9b34c98…
|
leo
|
13 |
) |
|
9b34c98…
|
leo
|
14 |
from video_processor.providers.manager import ProviderManager |
|
9b34c98…
|
leo
|
15 |
|
|
9b34c98…
|
leo
|
16 |
logger = logging.getLogger(__name__) |
|
9b34c98…
|
leo
|
17 |
|
|
9b34c98…
|
leo
|
18 |
|
|
9b34c98…
|
leo
|
19 |
class AgentOrchestrator: |
|
9b34c98…
|
leo
|
20 |
""" |
|
9b34c98…
|
leo
|
21 |
Agentic orchestrator that adaptively processes videos. |
|
9b34c98…
|
leo
|
22 |
|
|
9b34c98…
|
leo
|
23 |
Instead of running a fixed pipeline, the agent: |
|
9b34c98…
|
leo
|
24 |
- Decides processing depth per-video based on content analysis |
|
9b34c98…
|
leo
|
25 |
- Retries failed extractions with alternative strategies |
|
9b34c98…
|
leo
|
26 |
- Chains deeper analysis into sections that matter |
|
9b34c98…
|
leo
|
27 |
- Surfaces insights proactively |
|
9b34c98…
|
leo
|
28 |
""" |
|
9b34c98…
|
leo
|
29 |
|
|
9b34c98…
|
leo
|
30 |
def __init__( |
|
9b34c98…
|
leo
|
31 |
self, |
|
9b34c98…
|
leo
|
32 |
provider_manager: Optional[ProviderManager] = None, |
|
9b34c98…
|
leo
|
33 |
max_retries: int = 2, |
|
9b34c98…
|
leo
|
34 |
): |
|
9b34c98…
|
leo
|
35 |
self.pm = provider_manager or ProviderManager() |
|
9b34c98…
|
leo
|
36 |
self.max_retries = max_retries |
|
9b34c98…
|
leo
|
37 |
self._plan: List[Dict[str, Any]] = [] |
|
9b34c98…
|
leo
|
38 |
self._results: Dict[str, Any] = {} |
|
9b34c98…
|
leo
|
39 |
self._insights: List[str] = [] |
|
9b34c98…
|
leo
|
40 |
|
|
9b34c98…
|
leo
|
41 |
def process( |
|
9b34c98…
|
leo
|
42 |
self, |
|
9b34c98…
|
leo
|
43 |
input_path: Path, |
|
9b34c98…
|
leo
|
44 |
output_dir: Path, |
|
9b34c98…
|
leo
|
45 |
initial_depth: str = "standard", |
|
9b34c98…
|
leo
|
46 |
title: Optional[str] = None, |
|
9b34c98…
|
leo
|
47 |
) -> VideoManifest: |
|
9b34c98…
|
leo
|
48 |
""" |
|
9b34c98…
|
leo
|
49 |
Agentic processing of a single video. |
|
9b34c98…
|
leo
|
50 |
|
|
9b34c98…
|
leo
|
51 |
The agent plans, executes, and adapts based on results. |
|
9b34c98…
|
leo
|
52 |
""" |
|
9b34c98…
|
leo
|
53 |
start_time = time.time() |
|
9b34c98…
|
leo
|
54 |
input_path = Path(input_path) |
|
9b34c98…
|
leo
|
55 |
output_dir = Path(output_dir) |
|
9b34c98…
|
leo
|
56 |
|
|
9b34c98…
|
leo
|
57 |
logger.info(f"Agent processing: {input_path}") |
|
9b34c98…
|
leo
|
58 |
|
|
9b34c98…
|
leo
|
59 |
# Phase 1: Plan |
|
9b34c98…
|
leo
|
60 |
plan = self._create_plan(input_path, initial_depth) |
|
9b34c98…
|
leo
|
61 |
logger.info(f"Agent plan: {len(plan)} steps") |
|
9b34c98…
|
leo
|
62 |
|
|
9b34c98…
|
leo
|
63 |
# Phase 2: Execute with adaptation |
|
9b34c98…
|
leo
|
64 |
for step in plan: |
|
9b34c98…
|
leo
|
65 |
self._execute_step(step, input_path, output_dir) |
|
9b34c98…
|
leo
|
66 |
|
|
9b34c98…
|
leo
|
67 |
# Phase 3: Reflect and enrich |
|
9b34c98…
|
leo
|
68 |
self._reflect_and_enrich(output_dir) |
|
9b34c98…
|
leo
|
69 |
|
|
9b34c98…
|
leo
|
70 |
# Phase 4: Build manifest |
|
9b34c98…
|
leo
|
71 |
elapsed = time.time() - start_time |
|
9b34c98…
|
leo
|
72 |
manifest = self._build_manifest(input_path, output_dir, title, elapsed) |
|
9b34c98…
|
leo
|
73 |
|
|
9b34c98…
|
leo
|
74 |
logger.info( |
|
9b34c98…
|
leo
|
75 |
f"Agent complete in {elapsed:.1f}s — " |
|
9b34c98…
|
leo
|
76 |
f"{len(manifest.diagrams)} diagrams, " |
|
9b34c98…
|
leo
|
77 |
f"{len(manifest.key_points)} key points, " |
|
9b34c98…
|
leo
|
78 |
f"{len(manifest.action_items)} action items, " |
|
9b34c98…
|
leo
|
79 |
f"{len(self._insights)} insights" |
|
9b34c98…
|
leo
|
80 |
) |
|
9b34c98…
|
leo
|
81 |
|
|
9b34c98…
|
leo
|
82 |
return manifest |
|
9b34c98…
|
leo
|
83 |
|
|
9b34c98…
|
leo
|
84 |
def _create_plan(self, input_path: Path, depth: str) -> List[Dict[str, Any]]: |
|
9b34c98…
|
leo
|
85 |
"""Create an adaptive processing plan.""" |
|
9b34c98…
|
leo
|
86 |
plan = [ |
|
9b34c98…
|
leo
|
87 |
{"step": "extract_frames", "priority": "required"}, |
|
9b34c98…
|
leo
|
88 |
{"step": "extract_audio", "priority": "required"}, |
|
9b34c98…
|
leo
|
89 |
{"step": "transcribe", "priority": "required"}, |
|
9b34c98…
|
leo
|
90 |
] |
|
9b34c98…
|
leo
|
91 |
|
|
9b34c98…
|
leo
|
92 |
if depth in ("standard", "comprehensive"): |
|
9b34c98…
|
leo
|
93 |
plan.append({"step": "detect_diagrams", "priority": "standard"}) |
|
9b34c98…
|
leo
|
94 |
plan.append({"step": "build_knowledge_graph", "priority": "standard"}) |
|
9b34c98…
|
leo
|
95 |
|
|
9b34c98…
|
leo
|
96 |
plan.append({"step": "extract_key_points", "priority": "required"}) |
|
9b34c98…
|
leo
|
97 |
plan.append({"step": "extract_action_items", "priority": "required"}) |
|
9b34c98…
|
leo
|
98 |
|
|
9b34c98…
|
leo
|
99 |
if depth == "comprehensive": |
|
9b34c98…
|
leo
|
100 |
plan.append({"step": "deep_analysis", "priority": "comprehensive"}) |
|
9b34c98…
|
leo
|
101 |
plan.append({"step": "cross_reference", "priority": "comprehensive"}) |
|
9b34c98…
|
leo
|
102 |
|
|
9b34c98…
|
leo
|
103 |
plan.append({"step": "generate_reports", "priority": "required"}) |
|
9b34c98…
|
leo
|
104 |
|
|
9b34c98…
|
leo
|
105 |
self._plan = plan |
|
9b34c98…
|
leo
|
106 |
return plan |
|
9b34c98…
|
leo
|
107 |
|
|
829e24a…
|
leo
|
108 |
def _execute_step(self, step: Dict[str, Any], input_path: Path, output_dir: Path) -> None: |
|
9b34c98…
|
leo
|
109 |
"""Execute a single step with retry logic.""" |
|
9b34c98…
|
leo
|
110 |
step_name = step["step"] |
|
9b34c98…
|
leo
|
111 |
logger.info(f"Agent step: {step_name}") |
|
9b34c98…
|
leo
|
112 |
|
|
9b34c98…
|
leo
|
113 |
for attempt in range(1, self.max_retries + 1): |
|
9b34c98…
|
leo
|
114 |
try: |
|
9b34c98…
|
leo
|
115 |
result = self._run_step(step_name, input_path, output_dir) |
|
9b34c98…
|
leo
|
116 |
self._results[step_name] = result |
|
9b34c98…
|
leo
|
117 |
|
|
9b34c98…
|
leo
|
118 |
# Adaptive: check if we should add more steps |
|
9b34c98…
|
leo
|
119 |
self._adapt_plan(step_name, result) |
|
9b34c98…
|
leo
|
120 |
return |
|
9b34c98…
|
leo
|
121 |
|
|
9b34c98…
|
leo
|
122 |
except Exception as e: |
|
9b34c98…
|
leo
|
123 |
logger.warning( |
|
9b34c98…
|
leo
|
124 |
f"Step {step_name} failed (attempt {attempt}/{self.max_retries}): {e}" |
|
9b34c98…
|
leo
|
125 |
) |
|
9b34c98…
|
leo
|
126 |
if attempt == self.max_retries: |
|
9b34c98…
|
leo
|
127 |
logger.error(f"Step {step_name} failed after {self.max_retries} attempts") |
|
9b34c98…
|
leo
|
128 |
self._results[step_name] = {"error": str(e)} |
|
9b34c98…
|
leo
|
129 |
|
|
9b34c98…
|
leo
|
130 |
# Try fallback strategy |
|
9b34c98…
|
leo
|
131 |
fallback = self._get_fallback(step_name) |
|
9b34c98…
|
leo
|
132 |
if fallback: |
|
9b34c98…
|
leo
|
133 |
logger.info(f"Trying fallback for {step_name}: {fallback}") |
|
9b34c98…
|
leo
|
134 |
try: |
|
9b34c98…
|
leo
|
135 |
result = self._run_step(fallback, input_path, output_dir) |
|
9b34c98…
|
leo
|
136 |
self._results[step_name] = result |
|
9b34c98…
|
leo
|
137 |
except Exception as fe: |
|
9b34c98…
|
leo
|
138 |
logger.error(f"Fallback {fallback} also failed: {fe}") |
|
9b34c98…
|
leo
|
139 |
|
|
829e24a…
|
leo
|
140 |
def _run_step(self, step_name: str, input_path: Path, output_dir: Path) -> Any: |
|
9b34c98…
|
leo
|
141 |
"""Run a specific processing step.""" |
|
9b34c98…
|
leo
|
142 |
from video_processor.output_structure import create_video_output_dirs |
|
9b34c98…
|
leo
|
143 |
|
|
9b34c98…
|
leo
|
144 |
dirs = create_video_output_dirs(output_dir, input_path.stem) |
|
9b34c98…
|
leo
|
145 |
|
|
9b34c98…
|
leo
|
146 |
if step_name == "extract_frames": |
|
9b34c98…
|
leo
|
147 |
from video_processor.extractors.frame_extractor import extract_frames, save_frames |
|
9b34c98…
|
leo
|
148 |
|
|
9b34c98…
|
leo
|
149 |
frames = extract_frames(input_path) |
|
9b34c98…
|
leo
|
150 |
paths = save_frames(frames, dirs["frames"], "frame") |
|
9b34c98…
|
leo
|
151 |
return {"frames": frames, "paths": paths} |
|
9b34c98…
|
leo
|
152 |
|
|
9b34c98…
|
leo
|
153 |
elif step_name == "extract_audio": |
|
9b34c98…
|
leo
|
154 |
from video_processor.extractors.audio_extractor import AudioExtractor |
|
9b34c98…
|
leo
|
155 |
|
|
9b34c98…
|
leo
|
156 |
extractor = AudioExtractor() |
|
9b34c98…
|
leo
|
157 |
audio_path = extractor.extract_audio( |
|
9b34c98…
|
leo
|
158 |
input_path, output_path=dirs["root"] / "audio" / f"{input_path.stem}.wav" |
|
9b34c98…
|
leo
|
159 |
) |
|
9b34c98…
|
leo
|
160 |
props = extractor.get_audio_properties(audio_path) |
|
9b34c98…
|
leo
|
161 |
return {"audio_path": audio_path, "properties": props} |
|
9b34c98…
|
leo
|
162 |
|
|
9b34c98…
|
leo
|
163 |
elif step_name == "transcribe": |
|
9b34c98…
|
leo
|
164 |
audio_result = self._results.get("extract_audio", {}) |
|
9b34c98…
|
leo
|
165 |
audio_path = audio_result.get("audio_path") |
|
9b34c98…
|
leo
|
166 |
if not audio_path: |
|
9b34c98…
|
leo
|
167 |
raise RuntimeError("No audio available for transcription") |
|
9b34c98…
|
leo
|
168 |
|
|
9b34c98…
|
leo
|
169 |
transcription = self.pm.transcribe_audio(audio_path) |
|
9b34c98…
|
leo
|
170 |
text = transcription.get("text", "") |
|
9b34c98…
|
leo
|
171 |
|
|
9b34c98…
|
leo
|
172 |
# Save transcript |
|
9b34c98…
|
leo
|
173 |
dirs["transcript"].mkdir(parents=True, exist_ok=True) |
|
829e24a…
|
leo
|
174 |
(dirs["transcript"] / "transcript.json").write_text(json.dumps(transcription, indent=2)) |
|
9b34c98…
|
leo
|
175 |
(dirs["transcript"] / "transcript.txt").write_text(text) |
|
9b34c98…
|
leo
|
176 |
return transcription |
|
9b34c98…
|
leo
|
177 |
|
|
9b34c98…
|
leo
|
178 |
elif step_name == "detect_diagrams": |
|
9b34c98…
|
leo
|
179 |
from video_processor.analyzers.diagram_analyzer import DiagramAnalyzer |
|
9b34c98…
|
leo
|
180 |
|
|
9b34c98…
|
leo
|
181 |
frame_result = self._results.get("extract_frames", {}) |
|
9b34c98…
|
leo
|
182 |
paths = frame_result.get("paths", []) |
|
9b34c98…
|
leo
|
183 |
if not paths: |
|
9b34c98…
|
leo
|
184 |
return {"diagrams": [], "captures": []} |
|
9b34c98…
|
leo
|
185 |
|
|
9b34c98…
|
leo
|
186 |
analyzer = DiagramAnalyzer(provider_manager=self.pm) |
|
9b34c98…
|
leo
|
187 |
diagrams, captures = analyzer.process_frames( |
|
9b34c98…
|
leo
|
188 |
paths[:15], diagrams_dir=dirs["diagrams"], captures_dir=dirs["captures"] |
|
9b34c98…
|
leo
|
189 |
) |
|
9b34c98…
|
leo
|
190 |
return {"diagrams": diagrams, "captures": captures} |
|
9b34c98…
|
leo
|
191 |
|
|
9b34c98…
|
leo
|
192 |
elif step_name == "build_knowledge_graph": |
|
9b34c98…
|
leo
|
193 |
from video_processor.integrators.knowledge_graph import KnowledgeGraph |
|
9b34c98…
|
leo
|
194 |
|
|
9b34c98…
|
leo
|
195 |
transcript = self._results.get("transcribe", {}) |
|
0ad36b7…
|
noreply
|
196 |
kg_db_path = dirs["results"] / "knowledge_graph.db" |
|
0ad36b7…
|
noreply
|
197 |
kg = KnowledgeGraph(provider_manager=self.pm, db_path=kg_db_path) |
|
9b34c98…
|
leo
|
198 |
kg.process_transcript(transcript) |
|
9b34c98…
|
leo
|
199 |
|
|
9b34c98…
|
leo
|
200 |
diagram_result = self._results.get("detect_diagrams", {}) |
|
9b34c98…
|
leo
|
201 |
diagrams = diagram_result.get("diagrams", []) |
|
9b34c98…
|
leo
|
202 |
if diagrams: |
|
9b34c98…
|
leo
|
203 |
kg.process_diagrams([d.model_dump() for d in diagrams]) |
|
9b34c98…
|
leo
|
204 |
|
|
0981a08…
|
noreply
|
205 |
# Export JSON copy alongside the SQLite db |
|
9b34c98…
|
leo
|
206 |
kg.save(dirs["results"] / "knowledge_graph.json") |
|
9b34c98…
|
leo
|
207 |
return {"knowledge_graph": kg} |
|
9b34c98…
|
leo
|
208 |
|
|
9b34c98…
|
leo
|
209 |
elif step_name == "extract_key_points": |
|
9b34c98…
|
leo
|
210 |
transcript = self._results.get("transcribe", {}) |
|
9b34c98…
|
leo
|
211 |
text = transcript.get("text", "") |
|
9b34c98…
|
leo
|
212 |
if not text: |
|
9b34c98…
|
leo
|
213 |
return {"key_points": []} |
|
9b34c98…
|
leo
|
214 |
|
|
9b34c98…
|
leo
|
215 |
from video_processor.pipeline import _extract_key_points |
|
9b34c98…
|
leo
|
216 |
|
|
9b34c98…
|
leo
|
217 |
kps = _extract_key_points(self.pm, text) |
|
9b34c98…
|
leo
|
218 |
return {"key_points": kps} |
|
9b34c98…
|
leo
|
219 |
|
|
9b34c98…
|
leo
|
220 |
elif step_name == "extract_action_items": |
|
9b34c98…
|
leo
|
221 |
transcript = self._results.get("transcribe", {}) |
|
9b34c98…
|
leo
|
222 |
text = transcript.get("text", "") |
|
9b34c98…
|
leo
|
223 |
if not text: |
|
9b34c98…
|
leo
|
224 |
return {"action_items": []} |
|
9b34c98…
|
leo
|
225 |
|
|
9b34c98…
|
leo
|
226 |
from video_processor.pipeline import _extract_action_items |
|
9b34c98…
|
leo
|
227 |
|
|
9b34c98…
|
leo
|
228 |
items = _extract_action_items(self.pm, text) |
|
9b34c98…
|
leo
|
229 |
return {"action_items": items} |
|
9b34c98…
|
leo
|
230 |
|
|
9b34c98…
|
leo
|
231 |
elif step_name == "deep_analysis": |
|
9b34c98…
|
leo
|
232 |
return self._deep_analysis(output_dir) |
|
9b34c98…
|
leo
|
233 |
|
|
9b34c98…
|
leo
|
234 |
elif step_name == "cross_reference": |
|
9b34c98…
|
leo
|
235 |
return self._cross_reference() |
|
9b34c98…
|
leo
|
236 |
|
|
9b34c98…
|
leo
|
237 |
elif step_name == "generate_reports": |
|
9b34c98…
|
leo
|
238 |
return self._generate_reports(input_path, output_dir) |
|
9b34c98…
|
leo
|
239 |
|
|
9b34c98…
|
leo
|
240 |
elif step_name == "screengrab_fallback": |
|
9b34c98…
|
leo
|
241 |
# Already handled in detect_diagrams |
|
9b34c98…
|
leo
|
242 |
return {} |
|
9b34c98…
|
leo
|
243 |
|
|
9b34c98…
|
leo
|
244 |
else: |
|
9b34c98…
|
leo
|
245 |
raise ValueError(f"Unknown step: {step_name}") |
|
9b34c98…
|
leo
|
246 |
|
|
9b34c98…
|
leo
|
247 |
def _adapt_plan(self, completed_step: str, result: Any) -> None: |
|
9b34c98…
|
leo
|
248 |
"""Adapt the plan based on step results.""" |
|
9b34c98…
|
leo
|
249 |
|
|
9b34c98…
|
leo
|
250 |
if completed_step == "transcribe": |
|
9b34c98…
|
leo
|
251 |
text = result.get("text", "") if isinstance(result, dict) else "" |
|
9b34c98…
|
leo
|
252 |
# If transcript is very long, add deep analysis |
|
829e24a…
|
leo
|
253 |
if len(text) > 10000 and not any(s["step"] == "deep_analysis" for s in self._plan): |
|
9b34c98…
|
leo
|
254 |
self._plan.append({"step": "deep_analysis", "priority": "adaptive"}) |
|
9b34c98…
|
leo
|
255 |
logger.info("Agent adapted: adding deep analysis for long transcript") |
|
9b34c98…
|
leo
|
256 |
|
|
9b34c98…
|
leo
|
257 |
elif completed_step == "detect_diagrams": |
|
9b34c98…
|
leo
|
258 |
diagrams = result.get("diagrams", []) if isinstance(result, dict) else [] |
|
9b34c98…
|
leo
|
259 |
captures = result.get("captures", []) if isinstance(result, dict) else [] |
|
9b34c98…
|
leo
|
260 |
# If many diagrams found, ensure cross-referencing |
|
829e24a…
|
leo
|
261 |
if len(diagrams) >= 3 and not any(s["step"] == "cross_reference" for s in self._plan): |
|
9b34c98…
|
leo
|
262 |
self._plan.append({"step": "cross_reference", "priority": "adaptive"}) |
|
9b34c98…
|
leo
|
263 |
logger.info("Agent adapted: adding cross-reference for diagram-heavy video") |
|
9b34c98…
|
leo
|
264 |
|
|
9b34c98…
|
leo
|
265 |
if len(captures) > len(diagrams): |
|
9b34c98…
|
leo
|
266 |
self._insights.append( |
|
9b34c98…
|
leo
|
267 |
f"Many uncertain frames ({len(captures)} captures vs {len(diagrams)} diagrams) " |
|
9b34c98…
|
leo
|
268 |
"— consider re-processing with comprehensive depth" |
|
9b34c98…
|
leo
|
269 |
) |
|
9b34c98…
|
leo
|
270 |
|
|
9b34c98…
|
leo
|
271 |
def _get_fallback(self, step_name: str) -> Optional[str]: |
|
9b34c98…
|
leo
|
272 |
"""Get a fallback strategy for a failed step.""" |
|
9b34c98…
|
leo
|
273 |
fallbacks = { |
|
9b34c98…
|
leo
|
274 |
"detect_diagrams": "screengrab_fallback", |
|
9b34c98…
|
leo
|
275 |
} |
|
9b34c98…
|
leo
|
276 |
return fallbacks.get(step_name) |
|
9b34c98…
|
leo
|
277 |
|
|
9b34c98…
|
leo
|
278 |
def _deep_analysis(self, output_dir: Path) -> Dict: |
|
9b34c98…
|
leo
|
279 |
"""Perform deeper analysis on the transcript.""" |
|
9b34c98…
|
leo
|
280 |
transcript = self._results.get("transcribe", {}) |
|
9b34c98…
|
leo
|
281 |
text = transcript.get("text", "") |
|
9b34c98…
|
leo
|
282 |
if not text or not self.pm: |
|
9b34c98…
|
leo
|
283 |
return {} |
|
9b34c98…
|
leo
|
284 |
|
|
9b34c98…
|
leo
|
285 |
prompt = ( |
|
9b34c98…
|
leo
|
286 |
"Analyze this transcript in depth. Identify:\n" |
|
9b34c98…
|
leo
|
287 |
"1. Hidden assumptions or risks\n" |
|
9b34c98…
|
leo
|
288 |
"2. Decisions that were made (explicitly or implicitly)\n" |
|
9b34c98…
|
leo
|
289 |
"3. Topics that need follow-up\n" |
|
9b34c98…
|
leo
|
290 |
"4. Potential disagreements or tensions\n\n" |
|
9b34c98…
|
leo
|
291 |
f"TRANSCRIPT:\n{text[:10000]}\n\n" |
|
9b34c98…
|
leo
|
292 |
"Return a JSON object:\n" |
|
9b34c98…
|
leo
|
293 |
'{"decisions": [...], "risks": [...], "follow_ups": [...], "tensions": [...]}\n' |
|
9b34c98…
|
leo
|
294 |
"Return ONLY the JSON." |
|
9b34c98…
|
leo
|
295 |
) |
|
9b34c98…
|
leo
|
296 |
|
|
9b34c98…
|
leo
|
297 |
try: |
|
9b34c98…
|
leo
|
298 |
from video_processor.utils.json_parsing import parse_json_from_response |
|
9b34c98…
|
leo
|
299 |
|
|
9b34c98…
|
leo
|
300 |
raw = self.pm.chat([{"role": "user", "content": prompt}], temperature=0.4) |
|
9b34c98…
|
leo
|
301 |
parsed = parse_json_from_response(raw) |
|
9b34c98…
|
leo
|
302 |
if isinstance(parsed, dict): |
|
9b34c98…
|
leo
|
303 |
for category, items in parsed.items(): |
|
9b34c98…
|
leo
|
304 |
if isinstance(items, list): |
|
9b34c98…
|
leo
|
305 |
for item in items: |
|
9b34c98…
|
leo
|
306 |
self._insights.append(f"[{category}] {item}") |
|
9b34c98…
|
leo
|
307 |
return parsed |
|
9b34c98…
|
leo
|
308 |
except Exception as e: |
|
9b34c98…
|
leo
|
309 |
logger.warning(f"Deep analysis failed: {e}") |
|
9b34c98…
|
leo
|
310 |
|
|
9b34c98…
|
leo
|
311 |
return {} |
|
9b34c98…
|
leo
|
312 |
|
|
9b34c98…
|
leo
|
313 |
def _cross_reference(self) -> Dict: |
|
9b34c98…
|
leo
|
314 |
"""Cross-reference entities between transcript and diagrams.""" |
|
9b34c98…
|
leo
|
315 |
from video_processor.analyzers.content_analyzer import ContentAnalyzer |
|
9b34c98…
|
leo
|
316 |
|
|
9b34c98…
|
leo
|
317 |
kg_result = self._results.get("build_knowledge_graph", {}) |
|
9b34c98…
|
leo
|
318 |
kg = kg_result.get("knowledge_graph") |
|
9b34c98…
|
leo
|
319 |
if not kg: |
|
9b34c98…
|
leo
|
320 |
return {} |
|
9b34c98…
|
leo
|
321 |
|
|
9b34c98…
|
leo
|
322 |
kp_result = self._results.get("extract_key_points", {}) |
|
9b34c98…
|
leo
|
323 |
key_points = kp_result.get("key_points", []) |
|
9b34c98…
|
leo
|
324 |
|
|
9b34c98…
|
leo
|
325 |
diagram_result = self._results.get("detect_diagrams", {}) |
|
9b34c98…
|
leo
|
326 |
diagrams = diagram_result.get("diagrams", []) |
|
9b34c98…
|
leo
|
327 |
|
|
9b34c98…
|
leo
|
328 |
analyzer = ContentAnalyzer(provider_manager=self.pm) |
|
9b34c98…
|
leo
|
329 |
transcript = self._results.get("transcribe", {}) |
|
9b34c98…
|
leo
|
330 |
|
|
9b34c98…
|
leo
|
331 |
if key_points and diagrams: |
|
9b34c98…
|
leo
|
332 |
diagram_dicts = [d.model_dump() for d in diagrams] |
|
9b34c98…
|
leo
|
333 |
enriched = analyzer.enrich_key_points( |
|
9b34c98…
|
leo
|
334 |
key_points, diagram_dicts, transcript.get("text", "") |
|
9b34c98…
|
leo
|
335 |
) |
|
9b34c98…
|
leo
|
336 |
self._results["extract_key_points"]["key_points"] = enriched |
|
9b34c98…
|
leo
|
337 |
|
|
9b34c98…
|
leo
|
338 |
return {"enriched": True} |
|
9b34c98…
|
leo
|
339 |
|
|
9b34c98…
|
leo
|
340 |
def _generate_reports(self, input_path: Path, output_dir: Path) -> Dict: |
|
9b34c98…
|
leo
|
341 |
"""Generate all output reports.""" |
|
9b34c98…
|
leo
|
342 |
from video_processor.integrators.plan_generator import PlanGenerator |
|
9b34c98…
|
leo
|
343 |
from video_processor.output_structure import create_video_output_dirs |
|
9b34c98…
|
leo
|
344 |
|
|
9b34c98…
|
leo
|
345 |
dirs = create_video_output_dirs(output_dir, input_path.stem) |
|
9b34c98…
|
leo
|
346 |
|
|
9b34c98…
|
leo
|
347 |
transcript = self._results.get("transcribe", {}) |
|
9b34c98…
|
leo
|
348 |
kp_result = self._results.get("extract_key_points", {}) |
|
9b34c98…
|
leo
|
349 |
key_points = kp_result.get("key_points", []) |
|
9b34c98…
|
leo
|
350 |
ai_result = self._results.get("extract_action_items", {}) |
|
829e24a…
|
leo
|
351 |
ai_result.get("action_items", []) |
|
9b34c98…
|
leo
|
352 |
diagram_result = self._results.get("detect_diagrams", {}) |
|
9b34c98…
|
leo
|
353 |
diagrams = diagram_result.get("diagrams", []) |
|
9b34c98…
|
leo
|
354 |
kg_result = self._results.get("build_knowledge_graph", {}) |
|
9b34c98…
|
leo
|
355 |
kg = kg_result.get("knowledge_graph") |
|
9b34c98…
|
leo
|
356 |
|
|
9b34c98…
|
leo
|
357 |
gen = PlanGenerator(provider_manager=self.pm, knowledge_graph=kg) |
|
9b34c98…
|
leo
|
358 |
md_path = dirs["results"] / "analysis.md" |
|
9b34c98…
|
leo
|
359 |
gen.generate_markdown( |
|
9b34c98…
|
leo
|
360 |
transcript=transcript, |
|
9b34c98…
|
leo
|
361 |
key_points=[kp.model_dump() for kp in key_points], |
|
9b34c98…
|
leo
|
362 |
diagrams=[d.model_dump() for d in diagrams], |
|
9b34c98…
|
leo
|
363 |
knowledge_graph=kg.to_dict() if kg else {}, |
|
9b34c98…
|
leo
|
364 |
video_title=input_path.stem, |
|
9b34c98…
|
leo
|
365 |
output_path=md_path, |
|
9b34c98…
|
leo
|
366 |
) |
|
9b34c98…
|
leo
|
367 |
|
|
9b34c98…
|
leo
|
368 |
# Add agent insights to report |
|
9b34c98…
|
leo
|
369 |
if self._insights: |
|
9b34c98…
|
leo
|
370 |
insights_md = "\n## Agent Insights\n\n" |
|
9b34c98…
|
leo
|
371 |
for insight in self._insights: |
|
9b34c98…
|
leo
|
372 |
insights_md += f"- {insight}\n" |
|
9b34c98…
|
leo
|
373 |
with open(md_path, "a") as f: |
|
9b34c98…
|
leo
|
374 |
f.write(insights_md) |
|
9b34c98…
|
leo
|
375 |
|
|
9b34c98…
|
leo
|
376 |
return {"report_path": str(md_path)} |
|
9b34c98…
|
leo
|
377 |
|
|
9b34c98…
|
leo
|
378 |
def _build_manifest( |
|
9b34c98…
|
leo
|
379 |
self, |
|
9b34c98…
|
leo
|
380 |
input_path: Path, |
|
9b34c98…
|
leo
|
381 |
output_dir: Path, |
|
9b34c98…
|
leo
|
382 |
title: Optional[str], |
|
9b34c98…
|
leo
|
383 |
elapsed: float, |
|
9b34c98…
|
leo
|
384 |
) -> VideoManifest: |
|
9b34c98…
|
leo
|
385 |
"""Build the final manifest.""" |
|
9b34c98…
|
leo
|
386 |
frame_result = self._results.get("extract_frames", {}) |
|
9b34c98…
|
leo
|
387 |
audio_result = self._results.get("extract_audio", {}) |
|
9b34c98…
|
leo
|
388 |
diagram_result = self._results.get("detect_diagrams", {}) |
|
9b34c98…
|
leo
|
389 |
kp_result = self._results.get("extract_key_points", {}) |
|
9b34c98…
|
leo
|
390 |
ai_result = self._results.get("extract_action_items", {}) |
|
9b34c98…
|
leo
|
391 |
|
|
9b34c98…
|
leo
|
392 |
diagrams = diagram_result.get("diagrams", []) if isinstance(diagram_result, dict) else [] |
|
9b34c98…
|
leo
|
393 |
captures = diagram_result.get("captures", []) if isinstance(diagram_result, dict) else [] |
|
9b34c98…
|
leo
|
394 |
key_points = kp_result.get("key_points", []) if isinstance(kp_result, dict) else [] |
|
9b34c98…
|
leo
|
395 |
action_items = ai_result.get("action_items", []) if isinstance(ai_result, dict) else [] |
|
9b34c98…
|
leo
|
396 |
frames = frame_result.get("frames", []) if isinstance(frame_result, dict) else [] |
|
9b34c98…
|
leo
|
397 |
paths = frame_result.get("paths", []) if isinstance(frame_result, dict) else [] |
|
9b34c98…
|
leo
|
398 |
audio_props = audio_result.get("properties", {}) if isinstance(audio_result, dict) else {} |
|
9b34c98…
|
leo
|
399 |
|
|
9b34c98…
|
leo
|
400 |
return VideoManifest( |
|
9b34c98…
|
leo
|
401 |
video=VideoMetadata( |
|
9b34c98…
|
leo
|
402 |
title=title or f"Analysis of {input_path.stem}", |
|
9b34c98…
|
leo
|
403 |
source_path=str(input_path), |
|
9b34c98…
|
leo
|
404 |
duration_seconds=audio_props.get("duration"), |
|
9b34c98…
|
leo
|
405 |
), |
|
9b34c98…
|
leo
|
406 |
stats=ProcessingStats( |
|
9b34c98…
|
leo
|
407 |
duration_seconds=elapsed, |
|
9b34c98…
|
leo
|
408 |
frames_extracted=len(frames), |
|
9b34c98…
|
leo
|
409 |
diagrams_detected=len(diagrams), |
|
9b34c98…
|
leo
|
410 |
screen_captures=len(captures), |
|
9b34c98…
|
leo
|
411 |
transcript_duration_seconds=audio_props.get("duration"), |
|
9b34c98…
|
leo
|
412 |
models_used=self.pm.get_models_used(), |
|
9b34c98…
|
leo
|
413 |
), |
|
9b34c98…
|
leo
|
414 |
key_points=key_points, |
|
9b34c98…
|
leo
|
415 |
action_items=action_items, |
|
9b34c98…
|
leo
|
416 |
diagrams=diagrams, |
|
9b34c98…
|
leo
|
417 |
screen_captures=captures, |
|
9b34c98…
|
leo
|
418 |
frame_paths=[f"frames/{Path(p).name}" for p in paths], |
|
9b34c98…
|
leo
|
419 |
) |
|
9b34c98…
|
leo
|
420 |
|
|
9b34c98…
|
leo
|
421 |
@property |
|
9b34c98…
|
leo
|
422 |
def insights(self) -> List[str]: |
|
9b34c98…
|
leo
|
423 |
"""Return agent insights generated during processing.""" |
|
9b34c98…
|
leo
|
424 |
return list(self._insights) |