|
1
|
"""Plan generation for creating structured markdown output.""" |
|
2
|
|
|
3
|
import logging |
|
4
|
from pathlib import Path |
|
5
|
from typing import Dict, List, Optional, Union |
|
6
|
|
|
7
|
from video_processor.integrators.knowledge_graph import KnowledgeGraph |
|
8
|
from video_processor.models import VideoManifest |
|
9
|
from video_processor.providers.manager import ProviderManager |
|
10
|
|
|
11
|
logger = logging.getLogger(__name__) |
|
12
|
|
|
13
|
|
|
14
|
class PlanGenerator: |
|
15
|
"""Generates structured markdown content from extracted data.""" |
|
16
|
|
|
17
|
def __init__( |
|
18
|
self, |
|
19
|
provider_manager: Optional[ProviderManager] = None, |
|
20
|
knowledge_graph: Optional[KnowledgeGraph] = None, |
|
21
|
): |
|
22
|
self.pm = provider_manager |
|
23
|
self.knowledge_graph = knowledge_graph or KnowledgeGraph(provider_manager=provider_manager) |
|
24
|
|
|
25
|
def _chat(self, prompt: str, max_tokens: int = 2048, temperature: float = 0.5) -> str: |
|
26
|
if not self.pm: |
|
27
|
return "" |
|
28
|
return self.pm.chat( |
|
29
|
[{"role": "user", "content": prompt}], |
|
30
|
max_tokens=max_tokens, |
|
31
|
temperature=temperature, |
|
32
|
) |
|
33
|
|
|
34
|
def generate_summary(self, transcript: Dict) -> str: |
|
35
|
"""Generate summary from transcript.""" |
|
36
|
full_text = "" |
|
37
|
if "segments" in transcript: |
|
38
|
for segment in transcript["segments"]: |
|
39
|
if "text" in segment: |
|
40
|
speaker = ( |
|
41
|
f"{segment.get('speaker', 'Speaker')}: " if "speaker" in segment else "" |
|
42
|
) |
|
43
|
full_text += f"{speaker}{segment['text']}\n\n" |
|
44
|
|
|
45
|
if not full_text.strip(): |
|
46
|
full_text = transcript.get("text", "") |
|
47
|
|
|
48
|
return self._chat( |
|
49
|
f"Provide a concise 3-5 paragraph summary of this transcript:\n\n{full_text[:6000]}", |
|
50
|
max_tokens=800, |
|
51
|
) |
|
52
|
|
|
53
|
def generate_markdown( |
|
54
|
self, |
|
55
|
transcript: Dict, |
|
56
|
key_points: List[Dict], |
|
57
|
diagrams: List[Dict], |
|
58
|
knowledge_graph: Dict, |
|
59
|
video_title: Optional[str] = None, |
|
60
|
output_path: Optional[Union[str, Path]] = None, |
|
61
|
) -> str: |
|
62
|
"""Generate markdown report content.""" |
|
63
|
summary = self.generate_summary(transcript) |
|
64
|
title = video_title or "Video Analysis Report" |
|
65
|
|
|
66
|
md = [f"# {title}", "", "## Summary", "", summary, "", "## Key Points", ""] |
|
67
|
|
|
68
|
for point in key_points: |
|
69
|
p = point.get("point", "") if isinstance(point, dict) else str(point) |
|
70
|
md.append(f"- **{p}**") |
|
71
|
details = point.get("details") if isinstance(point, dict) else None |
|
72
|
if details: |
|
73
|
if isinstance(details, list): |
|
74
|
for d in details: |
|
75
|
md.append(f" - {d}") |
|
76
|
else: |
|
77
|
md.append(f" {details}") |
|
78
|
md.append("") |
|
79
|
|
|
80
|
if diagrams: |
|
81
|
md.append("## Visual Elements") |
|
82
|
md.append("") |
|
83
|
for i, diagram in enumerate(diagrams): |
|
84
|
md.append(f"### Diagram {i + 1}") |
|
85
|
md.append("") |
|
86
|
desc = diagram.get("description", "") |
|
87
|
if desc: |
|
88
|
md.append(desc) |
|
89
|
md.append("") |
|
90
|
if diagram.get("image_path"): |
|
91
|
md.append(f"") |
|
92
|
md.append("") |
|
93
|
if diagram.get("mermaid"): |
|
94
|
md.append("```mermaid") |
|
95
|
md.append(diagram["mermaid"]) |
|
96
|
md.append("```") |
|
97
|
md.append("") |
|
98
|
|
|
99
|
if knowledge_graph and knowledge_graph.get("nodes"): |
|
100
|
md.append("## Knowledge Graph") |
|
101
|
md.append("") |
|
102
|
kg = KnowledgeGraph.from_dict(knowledge_graph) |
|
103
|
mermaid_code = kg.generate_mermaid(max_nodes=25) |
|
104
|
md.append("```mermaid") |
|
105
|
md.append(mermaid_code) |
|
106
|
md.append("```") |
|
107
|
md.append("") |
|
108
|
|
|
109
|
markdown_content = "\n".join(md) |
|
110
|
|
|
111
|
if output_path: |
|
112
|
output_path = Path(output_path) |
|
113
|
if not output_path.suffix: |
|
114
|
output_path = output_path.with_suffix(".md") |
|
115
|
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
116
|
output_path.write_text(markdown_content) |
|
117
|
logger.info(f"Saved markdown to {output_path}") |
|
118
|
|
|
119
|
return markdown_content |
|
120
|
|
|
121
|
def generate_batch_summary( |
|
122
|
self, |
|
123
|
manifests: List[VideoManifest], |
|
124
|
kg: Optional[KnowledgeGraph] = None, |
|
125
|
title: str = "Batch Processing Summary", |
|
126
|
output_path: Optional[Union[str, Path]] = None, |
|
127
|
) -> str: |
|
128
|
"""Generate a batch summary across multiple videos.""" |
|
129
|
md = [f"# {title}", ""] |
|
130
|
|
|
131
|
# Overview stats |
|
132
|
total_diagrams = sum(len(m.diagrams) for m in manifests) |
|
133
|
total_kp = sum(len(m.key_points) for m in manifests) |
|
134
|
total_ai = sum(len(m.action_items) for m in manifests) |
|
135
|
|
|
136
|
md.append("## Overview") |
|
137
|
md.append("") |
|
138
|
md.append(f"- **Videos processed:** {len(manifests)}") |
|
139
|
md.append(f"- **Total diagrams:** {total_diagrams}") |
|
140
|
md.append(f"- **Total key points:** {total_kp}") |
|
141
|
md.append(f"- **Total action items:** {total_ai}") |
|
142
|
md.append("") |
|
143
|
|
|
144
|
# Per-video summaries |
|
145
|
md.append("## Per-Video Summaries") |
|
146
|
md.append("") |
|
147
|
for m in manifests: |
|
148
|
md.append(f"### {m.video.title}") |
|
149
|
md.append("") |
|
150
|
md.append(f"- Diagrams: {len(m.diagrams)}") |
|
151
|
md.append(f"- Key points: {len(m.key_points)}") |
|
152
|
md.append(f"- Action items: {len(m.action_items)}") |
|
153
|
if m.video.duration_seconds: |
|
154
|
md.append(f"- Duration: {m.video.duration_seconds:.0f}s") |
|
155
|
md.append("") |
|
156
|
|
|
157
|
# Aggregated action items |
|
158
|
if total_ai > 0: |
|
159
|
md.append("## All Action Items") |
|
160
|
md.append("") |
|
161
|
for m in manifests: |
|
162
|
for ai in m.action_items: |
|
163
|
line = f"- **{ai.action}**" |
|
164
|
if ai.assignee: |
|
165
|
line += f" ({ai.assignee})" |
|
166
|
if ai.deadline: |
|
167
|
line += f" — {ai.deadline}" |
|
168
|
line += f" _{m.video.title}_" |
|
169
|
md.append(line) |
|
170
|
md.append("") |
|
171
|
|
|
172
|
# Knowledge graph |
|
173
|
if kg and kg.nodes: |
|
174
|
md.append("## Merged Knowledge Graph") |
|
175
|
md.append("") |
|
176
|
md.append("```mermaid") |
|
177
|
md.append(kg.generate_mermaid(max_nodes=30)) |
|
178
|
md.append("```") |
|
179
|
md.append("") |
|
180
|
|
|
181
|
markdown_content = "\n".join(md) |
|
182
|
|
|
183
|
if output_path: |
|
184
|
output_path = Path(output_path) |
|
185
|
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
186
|
output_path.write_text(markdown_content) |
|
187
|
logger.info(f"Saved batch summary to {output_path}") |
|
188
|
|
|
189
|
return markdown_content |
|
190
|
|