|
09a0b7a…
|
leo
|
1 |
"""Plan generation for creating structured markdown output.""" |
|
09a0b7a…
|
leo
|
2 |
|
|
09a0b7a…
|
leo
|
3 |
import logging |
|
09a0b7a…
|
leo
|
4 |
from pathlib import Path |
|
09a0b7a…
|
leo
|
5 |
from typing import Dict, List, Optional, Union |
|
09a0b7a…
|
leo
|
6 |
|
|
09a0b7a…
|
leo
|
7 |
from video_processor.integrators.knowledge_graph import KnowledgeGraph |
|
829e24a…
|
leo
|
8 |
from video_processor.models import VideoManifest |
|
09a0b7a…
|
leo
|
9 |
from video_processor.providers.manager import ProviderManager |
|
09a0b7a…
|
leo
|
10 |
|
|
09a0b7a…
|
leo
|
11 |
logger = logging.getLogger(__name__) |
|
09a0b7a…
|
leo
|
12 |
|
|
09a0b7a…
|
leo
|
13 |
|
|
09a0b7a…
|
leo
|
14 |
class PlanGenerator: |
|
09a0b7a…
|
leo
|
15 |
"""Generates structured markdown content from extracted data.""" |
|
09a0b7a…
|
leo
|
16 |
|
|
09a0b7a…
|
leo
|
17 |
def __init__( |
|
09a0b7a…
|
leo
|
18 |
self, |
|
09a0b7a…
|
leo
|
19 |
provider_manager: Optional[ProviderManager] = None, |
|
09a0b7a…
|
leo
|
20 |
knowledge_graph: Optional[KnowledgeGraph] = None, |
|
09a0b7a…
|
leo
|
21 |
): |
|
09a0b7a…
|
leo
|
22 |
self.pm = provider_manager |
|
09a0b7a…
|
leo
|
23 |
self.knowledge_graph = knowledge_graph or KnowledgeGraph(provider_manager=provider_manager) |
|
09a0b7a…
|
leo
|
24 |
|
|
09a0b7a…
|
leo
|
25 |
def _chat(self, prompt: str, max_tokens: int = 2048, temperature: float = 0.5) -> str: |
|
09a0b7a…
|
leo
|
26 |
if not self.pm: |
|
09a0b7a…
|
leo
|
27 |
return "" |
|
09a0b7a…
|
leo
|
28 |
return self.pm.chat( |
|
09a0b7a…
|
leo
|
29 |
[{"role": "user", "content": prompt}], |
|
09a0b7a…
|
leo
|
30 |
max_tokens=max_tokens, |
|
09a0b7a…
|
leo
|
31 |
temperature=temperature, |
|
09a0b7a…
|
leo
|
32 |
) |
|
09a0b7a…
|
leo
|
33 |
|
|
09a0b7a…
|
leo
|
34 |
def generate_summary(self, transcript: Dict) -> str: |
|
09a0b7a…
|
leo
|
35 |
"""Generate summary from transcript.""" |
|
09a0b7a…
|
leo
|
36 |
full_text = "" |
|
09a0b7a…
|
leo
|
37 |
if "segments" in transcript: |
|
09a0b7a…
|
leo
|
38 |
for segment in transcript["segments"]: |
|
09a0b7a…
|
leo
|
39 |
if "text" in segment: |
|
829e24a…
|
leo
|
40 |
speaker = ( |
|
829e24a…
|
leo
|
41 |
f"{segment.get('speaker', 'Speaker')}: " if "speaker" in segment else "" |
|
829e24a…
|
leo
|
42 |
) |
|
09a0b7a…
|
leo
|
43 |
full_text += f"{speaker}{segment['text']}\n\n" |
|
09a0b7a…
|
leo
|
44 |
|
|
09a0b7a…
|
leo
|
45 |
if not full_text.strip(): |
|
09a0b7a…
|
leo
|
46 |
full_text = transcript.get("text", "") |
|
09a0b7a…
|
leo
|
47 |
|
|
09a0b7a…
|
leo
|
48 |
return self._chat( |
|
09a0b7a…
|
leo
|
49 |
f"Provide a concise 3-5 paragraph summary of this transcript:\n\n{full_text[:6000]}", |
|
09a0b7a…
|
leo
|
50 |
max_tokens=800, |
|
09a0b7a…
|
leo
|
51 |
) |
|
09a0b7a…
|
leo
|
52 |
|
|
09a0b7a…
|
leo
|
53 |
def generate_markdown( |
|
09a0b7a…
|
leo
|
54 |
self, |
|
09a0b7a…
|
leo
|
55 |
transcript: Dict, |
|
09a0b7a…
|
leo
|
56 |
key_points: List[Dict], |
|
09a0b7a…
|
leo
|
57 |
diagrams: List[Dict], |
|
09a0b7a…
|
leo
|
58 |
knowledge_graph: Dict, |
|
09a0b7a…
|
leo
|
59 |
video_title: Optional[str] = None, |
|
09a0b7a…
|
leo
|
60 |
output_path: Optional[Union[str, Path]] = None, |
|
09a0b7a…
|
leo
|
61 |
) -> str: |
|
09a0b7a…
|
leo
|
62 |
"""Generate markdown report content.""" |
|
09a0b7a…
|
leo
|
63 |
summary = self.generate_summary(transcript) |
|
09a0b7a…
|
leo
|
64 |
title = video_title or "Video Analysis Report" |
|
09a0b7a…
|
leo
|
65 |
|
|
09a0b7a…
|
leo
|
66 |
md = [f"# {title}", "", "## Summary", "", summary, "", "## Key Points", ""] |
|
09a0b7a…
|
leo
|
67 |
|
|
09a0b7a…
|
leo
|
68 |
for point in key_points: |
|
09a0b7a…
|
leo
|
69 |
p = point.get("point", "") if isinstance(point, dict) else str(point) |
|
09a0b7a…
|
leo
|
70 |
md.append(f"- **{p}**") |
|
09a0b7a…
|
leo
|
71 |
details = point.get("details") if isinstance(point, dict) else None |
|
09a0b7a…
|
leo
|
72 |
if details: |
|
09a0b7a…
|
leo
|
73 |
if isinstance(details, list): |
|
09a0b7a…
|
leo
|
74 |
for d in details: |
|
09a0b7a…
|
leo
|
75 |
md.append(f" - {d}") |
|
09a0b7a…
|
leo
|
76 |
else: |
|
09a0b7a…
|
leo
|
77 |
md.append(f" {details}") |
|
09a0b7a…
|
leo
|
78 |
md.append("") |
|
09a0b7a…
|
leo
|
79 |
|
|
09a0b7a…
|
leo
|
80 |
if diagrams: |
|
09a0b7a…
|
leo
|
81 |
md.append("## Visual Elements") |
|
09a0b7a…
|
leo
|
82 |
md.append("") |
|
09a0b7a…
|
leo
|
83 |
for i, diagram in enumerate(diagrams): |
|
09a0b7a…
|
leo
|
84 |
md.append(f"### Diagram {i + 1}") |
|
09a0b7a…
|
leo
|
85 |
md.append("") |
|
09a0b7a…
|
leo
|
86 |
desc = diagram.get("description", "") |
|
09a0b7a…
|
leo
|
87 |
if desc: |
|
09a0b7a…
|
leo
|
88 |
md.append(desc) |
|
09a0b7a…
|
leo
|
89 |
md.append("") |
|
09a0b7a…
|
leo
|
90 |
if diagram.get("image_path"): |
|
09a0b7a…
|
leo
|
91 |
md.append(f"") |
|
09a0b7a…
|
leo
|
92 |
md.append("") |
|
09a0b7a…
|
leo
|
93 |
if diagram.get("mermaid"): |
|
09a0b7a…
|
leo
|
94 |
md.append("```mermaid") |
|
09a0b7a…
|
leo
|
95 |
md.append(diagram["mermaid"]) |
|
09a0b7a…
|
leo
|
96 |
md.append("```") |
|
09a0b7a…
|
leo
|
97 |
md.append("") |
|
09a0b7a…
|
leo
|
98 |
|
|
09a0b7a…
|
leo
|
99 |
if knowledge_graph and knowledge_graph.get("nodes"): |
|
09a0b7a…
|
leo
|
100 |
md.append("## Knowledge Graph") |
|
09a0b7a…
|
leo
|
101 |
md.append("") |
|
09a0b7a…
|
leo
|
102 |
kg = KnowledgeGraph.from_dict(knowledge_graph) |
|
09a0b7a…
|
leo
|
103 |
mermaid_code = kg.generate_mermaid(max_nodes=25) |
|
09a0b7a…
|
leo
|
104 |
md.append("```mermaid") |
|
09a0b7a…
|
leo
|
105 |
md.append(mermaid_code) |
|
09a0b7a…
|
leo
|
106 |
md.append("```") |
|
09a0b7a…
|
leo
|
107 |
md.append("") |
|
09a0b7a…
|
leo
|
108 |
|
|
09a0b7a…
|
leo
|
109 |
markdown_content = "\n".join(md) |
|
09a0b7a…
|
leo
|
110 |
|
|
09a0b7a…
|
leo
|
111 |
if output_path: |
|
09a0b7a…
|
leo
|
112 |
output_path = Path(output_path) |
|
09a0b7a…
|
leo
|
113 |
if not output_path.suffix: |
|
09a0b7a…
|
leo
|
114 |
output_path = output_path.with_suffix(".md") |
|
09a0b7a…
|
leo
|
115 |
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
09a0b7a…
|
leo
|
116 |
output_path.write_text(markdown_content) |
|
09a0b7a…
|
leo
|
117 |
logger.info(f"Saved markdown to {output_path}") |
|
09a0b7a…
|
leo
|
118 |
|
|
09a0b7a…
|
leo
|
119 |
return markdown_content |
|
09a0b7a…
|
leo
|
120 |
|
|
09a0b7a…
|
leo
|
121 |
def generate_batch_summary( |
|
09a0b7a…
|
leo
|
122 |
self, |
|
09a0b7a…
|
leo
|
123 |
manifests: List[VideoManifest], |
|
09a0b7a…
|
leo
|
124 |
kg: Optional[KnowledgeGraph] = None, |
|
09a0b7a…
|
leo
|
125 |
title: str = "Batch Processing Summary", |
|
09a0b7a…
|
leo
|
126 |
output_path: Optional[Union[str, Path]] = None, |
|
09a0b7a…
|
leo
|
127 |
) -> str: |
|
09a0b7a…
|
leo
|
128 |
"""Generate a batch summary across multiple videos.""" |
|
09a0b7a…
|
leo
|
129 |
md = [f"# {title}", ""] |
|
09a0b7a…
|
leo
|
130 |
|
|
09a0b7a…
|
leo
|
131 |
# Overview stats |
|
09a0b7a…
|
leo
|
132 |
total_diagrams = sum(len(m.diagrams) for m in manifests) |
|
09a0b7a…
|
leo
|
133 |
total_kp = sum(len(m.key_points) for m in manifests) |
|
09a0b7a…
|
leo
|
134 |
total_ai = sum(len(m.action_items) for m in manifests) |
|
09a0b7a…
|
leo
|
135 |
|
|
09a0b7a…
|
leo
|
136 |
md.append("## Overview") |
|
09a0b7a…
|
leo
|
137 |
md.append("") |
|
09a0b7a…
|
leo
|
138 |
md.append(f"- **Videos processed:** {len(manifests)}") |
|
09a0b7a…
|
leo
|
139 |
md.append(f"- **Total diagrams:** {total_diagrams}") |
|
09a0b7a…
|
leo
|
140 |
md.append(f"- **Total key points:** {total_kp}") |
|
09a0b7a…
|
leo
|
141 |
md.append(f"- **Total action items:** {total_ai}") |
|
09a0b7a…
|
leo
|
142 |
md.append("") |
|
09a0b7a…
|
leo
|
143 |
|
|
09a0b7a…
|
leo
|
144 |
# Per-video summaries |
|
09a0b7a…
|
leo
|
145 |
md.append("## Per-Video Summaries") |
|
09a0b7a…
|
leo
|
146 |
md.append("") |
|
09a0b7a…
|
leo
|
147 |
for m in manifests: |
|
09a0b7a…
|
leo
|
148 |
md.append(f"### {m.video.title}") |
|
09a0b7a…
|
leo
|
149 |
md.append("") |
|
09a0b7a…
|
leo
|
150 |
md.append(f"- Diagrams: {len(m.diagrams)}") |
|
09a0b7a…
|
leo
|
151 |
md.append(f"- Key points: {len(m.key_points)}") |
|
09a0b7a…
|
leo
|
152 |
md.append(f"- Action items: {len(m.action_items)}") |
|
09a0b7a…
|
leo
|
153 |
if m.video.duration_seconds: |
|
09a0b7a…
|
leo
|
154 |
md.append(f"- Duration: {m.video.duration_seconds:.0f}s") |
|
09a0b7a…
|
leo
|
155 |
md.append("") |
|
09a0b7a…
|
leo
|
156 |
|
|
09a0b7a…
|
leo
|
157 |
# Aggregated action items |
|
09a0b7a…
|
leo
|
158 |
if total_ai > 0: |
|
09a0b7a…
|
leo
|
159 |
md.append("## All Action Items") |
|
09a0b7a…
|
leo
|
160 |
md.append("") |
|
09a0b7a…
|
leo
|
161 |
for m in manifests: |
|
09a0b7a…
|
leo
|
162 |
for ai in m.action_items: |
|
09a0b7a…
|
leo
|
163 |
line = f"- **{ai.action}**" |
|
09a0b7a…
|
leo
|
164 |
if ai.assignee: |
|
09a0b7a…
|
leo
|
165 |
line += f" ({ai.assignee})" |
|
09a0b7a…
|
leo
|
166 |
if ai.deadline: |
|
09a0b7a…
|
leo
|
167 |
line += f" — {ai.deadline}" |
|
09a0b7a…
|
leo
|
168 |
line += f" _{m.video.title}_" |
|
09a0b7a…
|
leo
|
169 |
md.append(line) |
|
09a0b7a…
|
leo
|
170 |
md.append("") |
|
09a0b7a…
|
leo
|
171 |
|
|
09a0b7a…
|
leo
|
172 |
# Knowledge graph |
|
09a0b7a…
|
leo
|
173 |
if kg and kg.nodes: |
|
09a0b7a…
|
leo
|
174 |
md.append("## Merged Knowledge Graph") |
|
09a0b7a…
|
leo
|
175 |
md.append("") |
|
09a0b7a…
|
leo
|
176 |
md.append("```mermaid") |
|
09a0b7a…
|
leo
|
177 |
md.append(kg.generate_mermaid(max_nodes=30)) |
|
09a0b7a…
|
leo
|
178 |
md.append("```") |
|
09a0b7a…
|
leo
|
179 |
md.append("") |
|
09a0b7a…
|
leo
|
180 |
|
|
09a0b7a…
|
leo
|
181 |
markdown_content = "\n".join(md) |
|
09a0b7a…
|
leo
|
182 |
|
|
09a0b7a…
|
leo
|
183 |
if output_path: |
|
09a0b7a…
|
leo
|
184 |
output_path = Path(output_path) |
|
09a0b7a…
|
leo
|
185 |
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
09a0b7a…
|
leo
|
186 |
output_path.write_text(markdown_content) |
|
09a0b7a…
|
leo
|
187 |
logger.info(f"Saved batch summary to {output_path}") |
|
09a0b7a…
|
leo
|
188 |
|
|
09a0b7a…
|
leo
|
189 |
return markdown_content |