|
1
|
"""Skill: Generate a GitHub wiki from knowledge graph and artifacts.""" |
|
2
|
|
|
3
|
import json |
|
4
|
import logging |
|
5
|
import subprocess |
|
6
|
from pathlib import Path |
|
7
|
from typing import Dict, List, Optional |
|
8
|
|
|
9
|
from video_processor.agent.skills.base import ( |
|
10
|
AgentContext, |
|
11
|
Artifact, |
|
12
|
Skill, |
|
13
|
register_skill, |
|
14
|
) |
|
15
|
|
|
16
|
logger = logging.getLogger(__name__) |
|
17
|
|
|
18
|
|
|
19
|
def _sanitize_filename(name: str) -> str: |
|
20
|
"""Convert entity name to a wiki-safe filename.""" |
|
21
|
return name.replace("/", "-").replace("\\", "-").replace(" ", "-").replace(".", "-") |
|
22
|
|
|
23
|
|
|
24
|
def _wiki_link(name: str) -> str: |
|
25
|
"""Create a GitHub wiki-style markdown link.""" |
|
26
|
safe = _sanitize_filename(name) |
|
27
|
return f"[{name}]({safe})" |
|
28
|
|
|
29
|
|
|
30
|
def generate_wiki( |
|
31
|
kg_data: dict, |
|
32
|
artifacts: Optional[List[Artifact]] = None, |
|
33
|
title: str = "Knowledge Base", |
|
34
|
) -> Dict[str, str]: |
|
35
|
"""Generate a dict of {filename: markdown_content} for a GitHub wiki. |
|
36
|
|
|
37
|
Returns pages for: Home, _Sidebar, entity type indexes, individual |
|
38
|
entity pages, and any planning artifacts. |
|
39
|
""" |
|
40
|
pages: Dict[str, str] = {} |
|
41
|
artifacts = artifacts or [] |
|
42
|
|
|
43
|
nodes = kg_data.get("nodes", []) |
|
44
|
relationships = kg_data.get("relationships", []) |
|
45
|
|
|
46
|
# Group entities by type |
|
47
|
by_type: Dict[str, list] = {} |
|
48
|
node_lookup: Dict[str, dict] = {} |
|
49
|
for node in nodes: |
|
50
|
name = node.get("name", node.get("id", "")) |
|
51
|
ntype = node.get("type", "concept") |
|
52
|
by_type.setdefault(ntype, []).append(node) |
|
53
|
node_lookup[name.lower()] = node |
|
54
|
|
|
55
|
# Build relationship index (outgoing and incoming per entity) |
|
56
|
outgoing: Dict[str, list] = {} |
|
57
|
incoming: Dict[str, list] = {} |
|
58
|
for rel in relationships: |
|
59
|
src = rel.get("source", "") |
|
60
|
tgt = rel.get("target", "") |
|
61
|
rtype = rel.get("type", "related_to") |
|
62
|
outgoing.setdefault(src, []).append((tgt, rtype)) |
|
63
|
incoming.setdefault(tgt, []).append((src, rtype)) |
|
64
|
|
|
65
|
# --- Home page --- |
|
66
|
home_parts = [ |
|
67
|
f"# {title}", |
|
68
|
"", |
|
69
|
f"**{len(nodes)}** entities | **{len(relationships)}** relationships", |
|
70
|
"", |
|
71
|
"## Entity Types", |
|
72
|
"", |
|
73
|
] |
|
74
|
for etype, elist in sorted(by_type.items()): |
|
75
|
home_parts.append(f"- {_wiki_link(etype.title())} ({len(elist)})") |
|
76
|
|
|
77
|
if artifacts: |
|
78
|
home_parts.append("") |
|
79
|
home_parts.append("## Planning Artifacts") |
|
80
|
home_parts.append("") |
|
81
|
for art in artifacts: |
|
82
|
safe = _sanitize_filename(art.name) |
|
83
|
home_parts.append(f"- [{art.name}]({safe})") |
|
84
|
|
|
85
|
pages["Home"] = "\n".join(home_parts) |
|
86
|
|
|
87
|
# --- Sidebar --- |
|
88
|
sidebar_parts = [f"**{title}**", "", "**Navigation**", "", "- [Home](Home)", ""] |
|
89
|
sidebar_parts.append("**Entity Types**") |
|
90
|
sidebar_parts.append("") |
|
91
|
for etype in sorted(by_type.keys()): |
|
92
|
sidebar_parts.append(f"- {_wiki_link(etype.title())}") |
|
93
|
|
|
94
|
if artifacts: |
|
95
|
sidebar_parts.append("") |
|
96
|
sidebar_parts.append("**Artifacts**") |
|
97
|
sidebar_parts.append("") |
|
98
|
for art in artifacts: |
|
99
|
safe = _sanitize_filename(art.name) |
|
100
|
sidebar_parts.append(f"- [{art.name}]({safe})") |
|
101
|
|
|
102
|
pages["_Sidebar"] = "\n".join(sidebar_parts) |
|
103
|
|
|
104
|
# --- Type index pages --- |
|
105
|
for etype, elist in sorted(by_type.items()): |
|
106
|
page_name = _sanitize_filename(etype.title()) |
|
107
|
parts = [ |
|
108
|
f"# {etype.title()}", |
|
109
|
"", |
|
110
|
f"{len(elist)} entities of type **{etype}**.", |
|
111
|
"", |
|
112
|
"| Entity | Descriptions |", |
|
113
|
"|--------|-------------|", |
|
114
|
] |
|
115
|
for node in sorted(elist, key=lambda n: n.get("name", "")): |
|
116
|
name = node.get("name", "") |
|
117
|
descs = node.get("descriptions", []) |
|
118
|
desc_text = "; ".join(descs[:2]) if descs else "—" |
|
119
|
parts.append(f"| {_wiki_link(name)} | {desc_text} |") |
|
120
|
|
|
121
|
pages[page_name] = "\n".join(parts) |
|
122
|
|
|
123
|
# --- Individual entity pages --- |
|
124
|
for node in nodes: |
|
125
|
name = node.get("name", "") |
|
126
|
if not name: |
|
127
|
continue |
|
128
|
ntype = node.get("type", "concept") |
|
129
|
descs = node.get("descriptions", []) |
|
130
|
page_name = _sanitize_filename(name) |
|
131
|
|
|
132
|
parts = [ |
|
133
|
f"# {name}", |
|
134
|
"", |
|
135
|
f"**Type:** {ntype}", |
|
136
|
"", |
|
137
|
] |
|
138
|
|
|
139
|
if descs: |
|
140
|
parts.append("## Descriptions") |
|
141
|
parts.append("") |
|
142
|
for d in descs: |
|
143
|
parts.append(f"- {d}") |
|
144
|
parts.append("") |
|
145
|
|
|
146
|
# Outgoing relationships |
|
147
|
outs = outgoing.get(name, []) |
|
148
|
if outs: |
|
149
|
parts.append("## Relationships") |
|
150
|
parts.append("") |
|
151
|
parts.append("| Target | Type |") |
|
152
|
parts.append("|--------|------|") |
|
153
|
for tgt, rtype in outs: |
|
154
|
parts.append(f"| {_wiki_link(tgt)} | {rtype} |") |
|
155
|
parts.append("") |
|
156
|
|
|
157
|
# Incoming relationships |
|
158
|
ins = incoming.get(name, []) |
|
159
|
if ins: |
|
160
|
parts.append("## Referenced By") |
|
161
|
parts.append("") |
|
162
|
parts.append("| Source | Type |") |
|
163
|
parts.append("|--------|------|") |
|
164
|
for src, rtype in ins: |
|
165
|
parts.append(f"| {_wiki_link(src)} | {rtype} |") |
|
166
|
parts.append("") |
|
167
|
|
|
168
|
# Occurrences / sources |
|
169
|
occs = node.get("occurrences", []) |
|
170
|
if occs: |
|
171
|
parts.append("## Sources") |
|
172
|
parts.append("") |
|
173
|
for occ in occs: |
|
174
|
src = occ.get("source", "unknown") |
|
175
|
ts = occ.get("timestamp", "") |
|
176
|
text = occ.get("text", "") |
|
177
|
line = f"- **{src}**" |
|
178
|
if ts: |
|
179
|
line += f" @ {ts}" |
|
180
|
if text: |
|
181
|
line += f": _{text}_" |
|
182
|
parts.append(line) |
|
183
|
parts.append("") |
|
184
|
|
|
185
|
pages[page_name] = "\n".join(parts) |
|
186
|
|
|
187
|
# --- Artifact pages --- |
|
188
|
for art in artifacts: |
|
189
|
page_name = _sanitize_filename(art.name) |
|
190
|
if art.format == "json": |
|
191
|
try: |
|
192
|
data = json.loads(art.content) |
|
193
|
content = f"```json\n{json.dumps(data, indent=2)}\n```" |
|
194
|
except json.JSONDecodeError: |
|
195
|
content = art.content |
|
196
|
else: |
|
197
|
content = art.content |
|
198
|
|
|
199
|
pages[page_name] = f"# {art.name}\n\n{content}" |
|
200
|
|
|
201
|
return pages |
|
202
|
|
|
203
|
|
|
204
|
def write_wiki(pages: Dict[str, str], output_dir: Path) -> List[Path]: |
|
205
|
"""Write wiki pages to a directory as .md files.""" |
|
206
|
output_dir.mkdir(parents=True, exist_ok=True) |
|
207
|
paths = [] |
|
208
|
for name, content in pages.items(): |
|
209
|
path = output_dir / f"{name}.md" |
|
210
|
path.write_text(content, encoding="utf-8") |
|
211
|
paths.append(path) |
|
212
|
return paths |
|
213
|
|
|
214
|
|
|
215
|
def push_wiki(wiki_dir: Path, repo: str, message: str = "Update wiki") -> bool: |
|
216
|
"""Push wiki pages to a GitHub wiki repo. |
|
217
|
|
|
218
|
Clones the wiki repo, copies pages, commits and pushes. |
|
219
|
The repo should be in 'owner/repo' format. |
|
220
|
""" |
|
221
|
wiki_url = f"https://github.com/{repo}.wiki.git" |
|
222
|
|
|
223
|
# Clone existing wiki (or init if empty) |
|
224
|
clone_dir = wiki_dir / ".wiki_clone" |
|
225
|
if clone_dir.exists(): |
|
226
|
subprocess.run(["rm", "-rf", str(clone_dir)], check=True) |
|
227
|
|
|
228
|
result = subprocess.run( |
|
229
|
["git", "clone", wiki_url, str(clone_dir)], |
|
230
|
capture_output=True, |
|
231
|
text=True, |
|
232
|
) |
|
233
|
|
|
234
|
if result.returncode != 0: |
|
235
|
# Wiki might not exist yet — init a new repo |
|
236
|
clone_dir.mkdir(parents=True, exist_ok=True) |
|
237
|
subprocess.run(["git", "init"], cwd=clone_dir, capture_output=True) |
|
238
|
subprocess.run( |
|
239
|
["git", "remote", "add", "origin", wiki_url], |
|
240
|
cwd=clone_dir, |
|
241
|
capture_output=True, |
|
242
|
) |
|
243
|
|
|
244
|
# Copy wiki pages into clone |
|
245
|
for md_file in wiki_dir.glob("*.md"): |
|
246
|
if md_file.parent == wiki_dir: |
|
247
|
dest = clone_dir / md_file.name |
|
248
|
dest.write_text(md_file.read_text(encoding="utf-8"), encoding="utf-8") |
|
249
|
|
|
250
|
# Commit and push |
|
251
|
subprocess.run(["git", "add", "-A"], cwd=clone_dir, capture_output=True) |
|
252
|
commit_result = subprocess.run( |
|
253
|
["git", "commit", "-m", message], |
|
254
|
cwd=clone_dir, |
|
255
|
capture_output=True, |
|
256
|
text=True, |
|
257
|
) |
|
258
|
if commit_result.returncode != 0: |
|
259
|
logger.info("No wiki changes to commit") |
|
260
|
return True |
|
261
|
|
|
262
|
push_result = subprocess.run( |
|
263
|
["git", "push", "origin", "master"], |
|
264
|
cwd=clone_dir, |
|
265
|
capture_output=True, |
|
266
|
text=True, |
|
267
|
) |
|
268
|
if push_result.returncode != 0: |
|
269
|
# Try main branch |
|
270
|
push_result = subprocess.run( |
|
271
|
["git", "push", "origin", "main"], |
|
272
|
cwd=clone_dir, |
|
273
|
capture_output=True, |
|
274
|
text=True, |
|
275
|
) |
|
276
|
|
|
277
|
if push_result.returncode == 0: |
|
278
|
logger.info(f"Wiki pushed to {wiki_url}") |
|
279
|
return True |
|
280
|
else: |
|
281
|
logger.error(f"Wiki push failed: {push_result.stderr}") |
|
282
|
return False |
|
283
|
|
|
284
|
|
|
285
|
class WikiGeneratorSkill(Skill): |
|
286
|
name = "wiki_generator" |
|
287
|
description = "Generate a GitHub wiki from knowledge graph and artifacts" |
|
288
|
|
|
289
|
def execute(self, context: AgentContext, **kwargs) -> Artifact: |
|
290
|
kg_data = context.knowledge_graph.to_dict() |
|
291
|
pages = generate_wiki( |
|
292
|
kg_data, |
|
293
|
artifacts=context.artifacts, |
|
294
|
title=kwargs.get("title", "Knowledge Base"), |
|
295
|
) |
|
296
|
|
|
297
|
# Return a summary artifact; actual pages are written via write_wiki() |
|
298
|
page_list = sorted(pages.keys()) |
|
299
|
summary_parts = [ |
|
300
|
f"Generated {len(pages)} wiki pages:", |
|
301
|
"", |
|
302
|
] |
|
303
|
for name in page_list: |
|
304
|
summary_parts.append(f"- {name}.md") |
|
305
|
|
|
306
|
return Artifact( |
|
307
|
name="Wiki", |
|
308
|
content="\n".join(summary_parts), |
|
309
|
artifact_type="wiki", |
|
310
|
format="markdown", |
|
311
|
metadata={"pages": pages}, |
|
312
|
) |
|
313
|
|
|
314
|
|
|
315
|
register_skill(WikiGeneratorSkill()) |
|
316
|
|