| | @@ -0,0 +1,500 @@ |
| 1 | +"""Generate structured markdown documents from knowledge graphs.
|
| 2 | +
|
| 3 | +No LLM required — pure template-based generation from KG data.
|
| 4 | +Produces federated, curated notes suitable for Obsidian, Notion,
|
| 5 | +GitHub, or any markdown-based workflow.
|
| 6 | +"""
|
| 7 | +
|
| 8 | +import csv
|
| 9 | +import io
|
| 10 | +import logging
|
| 11 | +from datetime import datetime
|
| 12 | +from pathlib import Path
|
| 13 | +from typing import Dict, List, Optional
|
| 14 | +
|
| 15 | +logger = logging.getLogger(__name__)
|
| 16 | +
|
| 17 | +
|
| 18 | +def _heading(text: str, level: int = 1) -> str:
|
| 19 | + return f"{'#' * level} {text}"
|
| 20 | +
|
| 21 | +
|
| 22 | +def _table(headers: List[str], rows: List[List[str]]) -> str:
|
| 23 | + lines = ["| " + " | ".join(headers) + " |"]
|
| 24 | + lines.append("| " + " | ".join("---" for _ in headers) + " |")
|
| 25 | + for row in rows:
|
| 26 | + lines.append("| " + " | ".join(str(c) for c in row) + " |")
|
| 27 | + return "\n".join(lines)
|
| 28 | +
|
| 29 | +
|
| 30 | +def _badge(label: str, value: str) -> str:
|
| 31 | + return f"**{label}:** {value}"
|
| 32 | +
|
| 33 | +
|
| 34 | +# ---------------------------------------------------------------------------
|
| 35 | +# Individual document generators
|
| 36 | +# ---------------------------------------------------------------------------
|
| 37 | +
|
| 38 | +
|
| 39 | +def generate_entity_brief(entity: dict, relationships: list) -> str:
|
| 40 | + """Generate a one-pager markdown brief for a single entity."""
|
| 41 | + name = entity.get("name", "Untitled")
|
| 42 | + etype = entity.get("type", "concept")
|
| 43 | + descs = entity.get("descriptions", [])
|
| 44 | + occs = entity.get("occurrences", [])
|
| 45 | +
|
| 46 | + outgoing = [(r["target"], r["type"]) for r in relationships if r.get("source") == name]
|
| 47 | + incoming = [(r["source"], r["type"]) for r in relationships if r.get("target") == name]
|
| 48 | +
|
| 49 | + parts = [
|
| 50 | + _heading(name),
|
| 51 | + "",
|
| 52 | + _badge("Type", etype),
|
| 53 | + "",
|
| 54 | + ]
|
| 55 | +
|
| 56 | + if descs:
|
| 57 | + parts.append(_heading("Summary", 2))
|
| 58 | + parts.append("")
|
| 59 | + for d in descs:
|
| 60 | + parts.append(f"- {d}")
|
| 61 | + parts.append("")
|
| 62 | +
|
| 63 | + if outgoing:
|
| 64 | + parts.append(_heading("Relates To", 2))
|
| 65 | + parts.append("")
|
| 66 | + parts.append(_table(["Entity", "Relationship"], [[t, r] for t, r in outgoing]))
|
| 67 | + parts.append("")
|
| 68 | +
|
| 69 | + if incoming:
|
| 70 | + parts.append(_heading("Referenced By", 2))
|
| 71 | + parts.append("")
|
| 72 | + parts.append(_table(["Entity", "Relationship"], [[s, r] for s, r in incoming]))
|
| 73 | + parts.append("")
|
| 74 | +
|
| 75 | + if occs:
|
| 76 | + parts.append(_heading("Sources", 2))
|
| 77 | + parts.append("")
|
| 78 | + for occ in occs:
|
| 79 | + src = occ.get("source", "unknown")
|
| 80 | + ts = occ.get("timestamp", "")
|
| 81 | + text = occ.get("text", "")
|
| 82 | + line = f"- **{src}**"
|
| 83 | + if ts:
|
| 84 | + line += f" ({ts})"
|
| 85 | + if text:
|
| 86 | + line += f" — {text}"
|
| 87 | + parts.append(line)
|
| 88 | + parts.append("")
|
| 89 | +
|
| 90 | + return "\n".join(parts)
|
| 91 | +
|
| 92 | +
|
| 93 | +def generate_executive_summary(kg_data: dict) -> str:
|
| 94 | + """Generate a high-level executive summary from the KG."""
|
| 95 | + nodes = kg_data.get("nodes", [])
|
| 96 | + rels = kg_data.get("relationships", [])
|
| 97 | +
|
| 98 | + by_type: Dict[str, list] = {}
|
| 99 | + for n in nodes:
|
| 100 | + t = n.get("type", "concept")
|
| 101 | + by_type.setdefault(t, []).append(n)
|
| 102 | +
|
| 103 | + parts = [
|
| 104 | + _heading("Executive Summary"),
|
| 105 | + "",
|
| 106 | + f"Knowledge base contains **{len(nodes)} entities** "
|
| 107 | + f"and **{len(rels)} relationships** across "
|
| 108 | + f"**{len(by_type)} categories**.",
|
| 109 | + "",
|
| 110 | + _heading("Entity Breakdown", 2),
|
| 111 | + "",
|
| 112 | + _table(
|
| 113 | + ["Type", "Count", "Examples"],
|
| 114 | + [
|
| 115 | + [
|
| 116 | + etype,
|
| 117 | + str(len(elist)),
|
| 118 | + ", ".join(e.get("name", "") for e in elist[:3]),
|
| 119 | + ]
|
| 120 | + for etype, elist in sorted(by_type.items(), key=lambda x: -len(x[1]))
|
| 121 | + ],
|
| 122 | + ),
|
| 123 | + "",
|
| 124 | + ]
|
| 125 | +
|
| 126 | + # Top connected entities
|
| 127 | + degree: Dict[str, int] = {}
|
| 128 | + for r in rels:
|
| 129 | + degree[r.get("source", "")] = degree.get(r.get("source", ""), 0) + 1
|
| 130 | + degree[r.get("target", "")] = degree.get(r.get("target", ""), 0) + 1
|
| 131 | +
|
| 132 | + top = sorted(degree.items(), key=lambda x: -x[1])[:10]
|
| 133 | + if top:
|
| 134 | + parts.append(_heading("Key Entities (by connections)", 2))
|
| 135 | + parts.append("")
|
| 136 | + parts.append(
|
| 137 | + _table(
|
| 138 | + ["Entity", "Connections"],
|
| 139 | + [[name, str(deg)] for name, deg in top],
|
| 140 | + )
|
| 141 | + )
|
| 142 | + parts.append("")
|
| 143 | +
|
| 144 | + # Relationship type breakdown
|
| 145 | + rel_types: Dict[str, int] = {}
|
| 146 | + for r in rels:
|
| 147 | + rt = r.get("type", "related_to")
|
| 148 | + rel_types[rt] = rel_types.get(rt, 0) + 1
|
| 149 | +
|
| 150 | + if rel_types:
|
| 151 | + parts.append(_heading("Relationship Types", 2))
|
| 152 | + parts.append("")
|
| 153 | + parts.append(
|
| 154 | + _table(
|
| 155 | + ["Type", "Count"],
|
| 156 | + [[rt, str(c)] for rt, c in sorted(rel_types.items(), key=lambda x: -x[1])],
|
| 157 | + )
|
| 158 | + )
|
| 159 | + parts.append("")
|
| 160 | +
|
| 161 | + return "\n".join(parts)
|
| 162 | +
|
| 163 | +
|
| 164 | +def generate_meeting_notes(kg_data: dict, title: Optional[str] = None) -> str:
|
| 165 | + """Generate meeting notes format from KG data."""
|
| 166 | + nodes = kg_data.get("nodes", [])
|
| 167 | + rels = kg_data.get("relationships", [])
|
| 168 | + title = title or "Meeting Notes"
|
| 169 | +
|
| 170 | + # Categorize by planning-relevant types
|
| 171 | + decisions = [n for n in nodes if n.get("type") in ("decision", "constraint")]
|
| 172 | + actions = [n for n in nodes if n.get("type") in ("goal", "feature", "milestone")]
|
| 173 | + people = [n for n in nodes if n.get("type") == "person"]
|
| 174 | + topics = [n for n in nodes if n.get("type") in ("concept", "technology", "topic")]
|
| 175 | +
|
| 176 | + parts = [
|
| 177 | + _heading(title),
|
| 178 | + "",
|
| 179 | + f"*Generated {datetime.now().strftime('%Y-%m-%d %H:%M')}*",
|
| 180 | + "",
|
| 181 | + ]
|
| 182 | +
|
| 183 | + if topics:
|
| 184 | + parts.append(_heading("Discussion Topics", 2))
|
| 185 | + parts.append("")
|
| 186 | + for t in topics:
|
| 187 | + descs = t.get("descriptions", [])
|
| 188 | + desc = descs[0] if descs else ""
|
| 189 | + parts.append(f"- **{t['name']}**: {desc}")
|
| 190 | + parts.append("")
|
| 191 | +
|
| 192 | + if people:
|
| 193 | + parts.append(_heading("Participants", 2))
|
| 194 | + parts.append("")
|
| 195 | + for p in people:
|
| 196 | + parts.append(f"- {p['name']}")
|
| 197 | + parts.append("")
|
| 198 | +
|
| 199 | + if decisions:
|
| 200 | + parts.append(_heading("Decisions & Constraints", 2))
|
| 201 | + parts.append("")
|
| 202 | + for d in decisions:
|
| 203 | + descs = d.get("descriptions", [])
|
| 204 | + desc = descs[0] if descs else ""
|
| 205 | + parts.append(f"- **{d['name']}**: {desc}")
|
| 206 | + parts.append("")
|
| 207 | +
|
| 208 | + if actions:
|
| 209 | + parts.append(_heading("Action Items", 2))
|
| 210 | + parts.append("")
|
| 211 | + for a in actions:
|
| 212 | + descs = a.get("descriptions", [])
|
| 213 | + desc = descs[0] if descs else ""
|
| 214 | + # Find who it's related to
|
| 215 | + owners = [
|
| 216 | + r["target"]
|
| 217 | + for r in rels
|
| 218 | + if r.get("source") == a["name"] and r.get("type") in ("assigned_to", "owned_by")
|
| 219 | + ]
|
| 220 | + owner_str = f" (@{', '.join(owners)})" if owners else ""
|
| 221 | + parts.append(f"- [ ] **{a['name']}**{owner_str}: {desc}")
|
| 222 | + parts.append("")
|
| 223 | +
|
| 224 | + # Open questions (entities without many relationships)
|
| 225 | + degree_map: Dict[str, int] = {}
|
| 226 | + for r in rels:
|
| 227 | + degree_map[r.get("source", "")] = degree_map.get(r.get("source", ""), 0) + 1
|
| 228 | + degree_map[r.get("target", "")] = degree_map.get(r.get("target", ""), 0) + 1
|
| 229 | +
|
| 230 | + orphans = [n for n in nodes if degree_map.get(n.get("name", ""), 0) <= 1 and n not in people]
|
| 231 | + if orphans:
|
| 232 | + parts.append(_heading("Open Questions / Loose Ends", 2))
|
| 233 | + parts.append("")
|
| 234 | + for o in orphans[:10]:
|
| 235 | + parts.append(f"- {o['name']}")
|
| 236 | + parts.append("")
|
| 237 | +
|
| 238 | + return "\n".join(parts)
|
| 239 | +
|
| 240 | +
|
| 241 | +def generate_glossary(kg_data: dict) -> str:
|
| 242 | + """Generate a glossary/dictionary of all entities."""
|
| 243 | + nodes = sorted(kg_data.get("nodes", []), key=lambda n: n.get("name", "").lower())
|
| 244 | +
|
| 245 | + parts = [
|
| 246 | + _heading("Glossary"),
|
| 247 | + "",
|
| 248 | + ]
|
| 249 | +
|
| 250 | + for node in nodes:
|
| 251 | + name = node.get("name", "")
|
| 252 | + etype = node.get("type", "concept")
|
| 253 | + descs = node.get("descriptions", [])
|
| 254 | + desc = descs[0] if descs else "No description available."
|
| 255 | + parts.append(f"**{name}** *({etype})*")
|
| 256 | + parts.append(f": {desc}")
|
| 257 | + parts.append("")
|
| 258 | +
|
| 259 | + return "\n".join(parts)
|
| 260 | +
|
| 261 | +
|
| 262 | +def generate_relationship_map(kg_data: dict) -> str:
|
| 263 | + """Generate a relationship map as a markdown document with Mermaid diagram."""
|
| 264 | + rels = kg_data.get("relationships", [])
|
| 265 | + nodes = kg_data.get("nodes", [])
|
| 266 | +
|
| 267 | + parts = [
|
| 268 | + _heading("Relationship Map"),
|
| 269 | + "",
|
| 270 | + f"*{len(nodes)} entities, {len(rels)} relationships*",
|
| 271 | + "",
|
| 272 | + ]
|
| 273 | +
|
| 274 | + # Group by relationship type
|
| 275 | + by_type: Dict[str, list] = {}
|
| 276 | + for r in rels:
|
| 277 | + rt = r.get("type", "related_to")
|
| 278 | + by_type.setdefault(rt, []).append(r)
|
| 279 | +
|
| 280 | + for rt, rlist in sorted(by_type.items()):
|
| 281 | + parts.append(_heading(rt.replace("_", " ").title(), 2))
|
| 282 | + parts.append("")
|
| 283 | + parts.append(
|
| 284 | + _table(
|
| 285 | + ["Source", "Target"],
|
| 286 | + [[r.get("source", ""), r.get("target", "")] for r in rlist],
|
| 287 | + )
|
| 288 | + )
|
| 289 | + parts.append("")
|
| 290 | +
|
| 291 | + # Mermaid diagram (top 20 nodes by degree)
|
| 292 | + degree: Dict[str, int] = {}
|
| 293 | + for r in rels:
|
| 294 | + degree[r.get("source", "")] = degree.get(r.get("source", ""), 0) + 1
|
| 295 | + degree[r.get("target", "")] = degree.get(r.get("target", ""), 0) + 1
|
| 296 | +
|
| 297 | + top_nodes = {name for name, _ in sorted(degree.items(), key=lambda x: -x[1])[:20]}
|
| 298 | +
|
| 299 | + if top_nodes:
|
| 300 | + parts.append(_heading("Visual Map", 2))
|
| 301 | + parts.append("")
|
| 302 | + parts.append("```mermaid")
|
| 303 | + parts.append("graph LR")
|
| 304 | +
|
| 305 | + def safe(s):
|
| 306 | + return "".join(c if c.isalnum() or c == "_" else "_" for c in s)
|
| 307 | +
|
| 308 | + seen = set()
|
| 309 | + for r in rels:
|
| 310 | + src, tgt = r.get("source", ""), r.get("target", "")
|
| 311 | + if src in top_nodes and tgt in top_nodes:
|
| 312 | + key = (src, tgt)
|
| 313 | + if key not in seen:
|
| 314 | + parts.append(
|
| 315 | + f' {safe(src)}["{src}"] -->|{r.get("type", "")}| {safe(tgt)}["{tgt}"]'
|
| 316 | + )
|
| 317 | + seen.add(key)
|
| 318 | + parts.append("```")
|
| 319 | + parts.append("")
|
| 320 | +
|
| 321 | + return "\n".join(parts)
|
| 322 | +
|
| 323 | +
|
| 324 | +def generate_status_report(kg_data: dict, title: Optional[str] = None) -> str:
|
| 325 | + """Generate a project status report from KG data."""
|
| 326 | + nodes = kg_data.get("nodes", [])
|
| 327 | + rels = kg_data.get("relationships", [])
|
| 328 | + title = title or "Status Report"
|
| 329 | +
|
| 330 | + milestones = [n for n in nodes if n.get("type") == "milestone"]
|
| 331 | + features = [n for n in nodes if n.get("type") == "feature"]
|
| 332 | + risks = [n for n in nodes if n.get("type") in ("risk", "constraint")]
|
| 333 | + requirements = [n for n in nodes if n.get("type") == "requirement"]
|
| 334 | +
|
| 335 | + parts = [
|
| 336 | + _heading(title),
|
| 337 | + "",
|
| 338 | + f"*Generated {datetime.now().strftime('%Y-%m-%d %H:%M')}*",
|
| 339 | + "",
|
| 340 | + ]
|
| 341 | +
|
| 342 | + parts.append(_heading("Overview", 2))
|
| 343 | + parts.append("")
|
| 344 | + parts.append(f"- **Entities:** {len(nodes)}")
|
| 345 | + parts.append(f"- **Relationships:** {len(rels)}")
|
| 346 | + parts.append(f"- **Features:** {len(features)}")
|
| 347 | + parts.append(f"- **Milestones:** {len(milestones)}")
|
| 348 | + parts.append(f"- **Requirements:** {len(requirements)}")
|
| 349 | + parts.append(f"- **Risks/Constraints:** {len(risks)}")
|
| 350 | + parts.append("")
|
| 351 | +
|
| 352 | + if milestones:
|
| 353 | + parts.append(_heading("Milestones", 2))
|
| 354 | + parts.append("")
|
| 355 | + for m in milestones:
|
| 356 | + descs = m.get("descriptions", [])
|
| 357 | + parts.append(f"- **{m['name']}**: {descs[0] if descs else 'TBD'}")
|
| 358 | + parts.append("")
|
| 359 | +
|
| 360 | + if features:
|
| 361 | + parts.append(_heading("Features", 2))
|
| 362 | + parts.append("")
|
| 363 | + parts.append(
|
| 364 | + _table(
|
| 365 | + ["Feature", "Description"],
|
| 366 | + [[f["name"], (f.get("descriptions") or [""])[0][:60]] for f in features],
|
| 367 | + )
|
| 368 | + )
|
| 369 | + parts.append("")
|
| 370 | +
|
| 371 | + if risks:
|
| 372 | + parts.append(_heading("Risks & Constraints", 2))
|
| 373 | + parts.append("")
|
| 374 | + for r in risks:
|
| 375 | + descs = r.get("descriptions", [])
|
| 376 | + parts.append(f"- **{r['name']}**: {descs[0] if descs else ''}")
|
| 377 | + parts.append("")
|
| 378 | +
|
| 379 | + return "\n".join(parts)
|
| 380 | +
|
| 381 | +
|
| 382 | +def generate_entity_index(kg_data: dict) -> str:
|
| 383 | + """Generate a master index of all entities grouped by type."""
|
| 384 | + nodes = kg_data.get("nodes", [])
|
| 385 | +
|
| 386 | + by_type: Dict[str, list] = {}
|
| 387 | + for n in nodes:
|
| 388 | + t = n.get("type", "concept")
|
| 389 | + by_type.setdefault(t, []).append(n)
|
| 390 | +
|
| 391 | + parts = [
|
| 392 | + _heading("Entity Index"),
|
| 393 | + "",
|
| 394 | + f"*{len(nodes)} entities across {len(by_type)} types*",
|
| 395 | + "",
|
| 396 | + ]
|
| 397 | +
|
| 398 | + for etype, elist in sorted(by_type.items()):
|
| 399 | + parts.append(_heading(f"{etype.title()} ({len(elist)})", 2))
|
| 400 | + parts.append("")
|
| 401 | + for e in sorted(elist, key=lambda x: x.get("name", "")):
|
| 402 | + descs = e.get("descriptions", [])
|
| 403 | + desc = f" — {descs[0]}" if descs else ""
|
| 404 | + parts.append(f"- **{e['name']}**{desc}")
|
| 405 | + parts.append("")
|
| 406 | +
|
| 407 | + return "\n".join(parts)
|
| 408 | +
|
| 409 | +
|
| 410 | +def generate_csv_export(kg_data: dict) -> str:
|
| 411 | + """Generate CSV of entities for spreadsheet import."""
|
| 412 | + nodes = kg_data.get("nodes", [])
|
| 413 | + rels = kg_data.get("relationships", [])
|
| 414 | +
|
| 415 | + # Build adjacency info
|
| 416 | + related: Dict[str, list] = {}
|
| 417 | + for r in rels:
|
| 418 | + src = r.get("source", "")
|
| 419 | + tgt = r.get("target", "")
|
| 420 | + related.setdefault(src, []).append(tgt)
|
| 421 | +
|
| 422 | + output = io.StringIO()
|
| 423 | + writer = csv.writer(output)
|
| 424 | + writer.writerow(["Name", "Type", "Description", "Related To", "Source"])
|
| 425 | +
|
| 426 | + for n in sorted(nodes, key=lambda x: x.get("name", "")):
|
| 427 | + name = n.get("name", "")
|
| 428 | + etype = n.get("type", "")
|
| 429 | + descs = n.get("descriptions", [])
|
| 430 | + desc = descs[0] if descs else ""
|
| 431 | + rels_str = "; ".join(related.get(name, []))
|
| 432 | + sources = n.get("occurrences", [])
|
| 433 | + src_str = sources[0].get("source", "") if sources else ""
|
| 434 | + writer.writerow([name, etype, desc, rels_str, src_str])
|
| 435 | +
|
| 436 | + return output.getvalue()
|
| 437 | +
|
| 438 | +
|
| 439 | +# ---------------------------------------------------------------------------
|
| 440 | +# Document types registry
|
| 441 | +# ---------------------------------------------------------------------------
|
| 442 | +
|
| 443 | +DOCUMENT_TYPES = {
|
| 444 | + "summary": ("Executive Summary", generate_executive_summary),
|
| 445 | + "meeting-notes": ("Meeting Notes", generate_meeting_notes),
|
| 446 | + "glossary": ("Glossary", generate_glossary),
|
| 447 | + "relationship-map": ("Relationship Map", generate_relationship_map),
|
| 448 | + "status-report": ("Status Report", generate_status_report),
|
| 449 | + "entity-index": ("Entity Index", generate_entity_index),
|
| 450 | + "csv": ("CSV Export", generate_csv_export),
|
| 451 | +}
|
| 452 | +
|
| 453 | +
|
| 454 | +def generate_all(
|
| 455 | + kg_data: dict,
|
| 456 | + output_dir: Path,
|
| 457 | + doc_types: Optional[List[str]] = None,
|
| 458 | + title: Optional[str] = None,
|
| 459 | +) -> List[Path]:
|
| 460 | + """Generate multiple document types and write to output directory.
|
| 461 | +
|
| 462 | + If doc_types is None, generates all available types.
|
| 463 | + Returns list of created file paths.
|
| 464 | + """
|
| 465 | + output_dir.mkdir(parents=True, exist_ok=True)
|
| 466 | + types_to_generate = doc_types or list(DOCUMENT_TYPES.keys())
|
| 467 | + created = []
|
| 468 | +
|
| 469 | + for dtype in types_to_generate:
|
| 470 | + if dtype not in DOCUMENT_TYPES:
|
| 471 | + logger.warning(f"Unknown document type: {dtype}")
|
| 472 | + continue
|
| 473 | +
|
| 474 | + label, generator = DOCUMENT_TYPES[dtype]
|
| 475 | + try:
|
| 476 | + content = generator(kg_data)
|
| 477 | + ext = ".csv" if dtype == "csv" else ".md"
|
| 478 | + filename = f"{dtype}{ext}"
|
| 479 | + path = output_dir / filename
|
| 480 | + path.write_text(content, encoding="utf-8")
|
| 481 | + created.append(path)
|
| 482 | + logger.info(f"Generated {label} → {path}")
|
| 483 | + except Exception as e:
|
| 484 | + logger.error(f"Failed to generate {label}: {e}")
|
| 485 | +
|
| 486 | + # Also generate individual entity briefs
|
| 487 | + briefs_dir = output_dir / "entities"
|
| 488 | + briefs_dir.mkdir(exist_ok=True)
|
| 489 | + rels = kg_data.get("relationships", [])
|
| 490 | + for node in kg_data.get("nodes", []):
|
| 491 | + name = node.get("name", "")
|
| 492 | + if not name:
|
| 493 | + continue
|
| 494 | + safe = name.replace("/", "-").replace("\\", "-").replace(" ", "-")
|
| 495 | + brief = generate_entity_brief(node, rels)
|
| 496 | + path = briefs_dir / f"{safe}.md"
|
| 497 | + path.write_text(brief, encoding="utf-8")
|
| 498 | + created.append(path)
|
| 499 | +
|
| 500 | + return created
|