PlanOpticon

Merge pull request #119 from ConflictHQ/feat/pdf-pptx-export feat(export): PDF and PPTX export formats

noreply 2026-03-08 01:02 trunk merge
Commit 54d5d79ce0bf4344c6e781eb8c38f43e6661f3a60be6c223f1ad3ff544916b72
--- pyproject.toml
+++ pyproject.toml
@@ -51,10 +51,11 @@
5151
"tenacity>=8.2.0",
5252
]
5353
5454
[project.optional-dependencies]
5555
pdf = ["pymupdf>=1.24.0"]
56
+exports = ["reportlab>=4.0", "python-pptx>=1.0"]
5657
gpu = ["torch>=2.0.0", "torchvision>=0.15.0"]
5758
gdrive = ["google-auth>=2.0.0", "google-auth-oauthlib>=1.0.0", "google-api-python-client>=2.0.0"]
5859
dropbox = ["dropbox>=12.0.0"]
5960
azure = ["openai>=1.0.0"]
6061
together = ["openai>=1.0.0"]
6162
6263
ADDED tests/test_exporters.py
--- pyproject.toml
+++ pyproject.toml
@@ -51,10 +51,11 @@
51 "tenacity>=8.2.0",
52 ]
53
54 [project.optional-dependencies]
55 pdf = ["pymupdf>=1.24.0"]
 
56 gpu = ["torch>=2.0.0", "torchvision>=0.15.0"]
57 gdrive = ["google-auth>=2.0.0", "google-auth-oauthlib>=1.0.0", "google-api-python-client>=2.0.0"]
58 dropbox = ["dropbox>=12.0.0"]
59 azure = ["openai>=1.0.0"]
60 together = ["openai>=1.0.0"]
61
62 DDED tests/test_exporters.py
--- pyproject.toml
+++ pyproject.toml
@@ -51,10 +51,11 @@
51 "tenacity>=8.2.0",
52 ]
53
54 [project.optional-dependencies]
55 pdf = ["pymupdf>=1.24.0"]
56 exports = ["reportlab>=4.0", "python-pptx>=1.0"]
57 gpu = ["torch>=2.0.0", "torchvision>=0.15.0"]
58 gdrive = ["google-auth>=2.0.0", "google-auth-oauthlib>=1.0.0", "google-api-python-client>=2.0.0"]
59 dropbox = ["dropbox>=12.0.0"]
60 azure = ["openai>=1.0.0"]
61 together = ["openai>=1.0.0"]
62
63 DDED tests/test_exporters.py
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -0,0 +1,157 @@
1
+"""Tests for PDF and PPTX exporters."""
2
+
3
+import pytest
4
+
5
+from video_processor.exporters.pdf_export import generate_pdf
6
+from video_processor.exporters.pptx_export import generate_pptx
7
+
8
+
9
+def _sample_kg():
10
+ """Return a sample knowledge graph dict for testing."""
11
+ return {
12
+ "nodes": [
13
+ {"name": "Python", "type": "technology", "descriptions": ["A programming language"]},
14
+ {"name": "Django", "type": "technology", "descriptions": ["A web framework"]},
15
+ {"name": "Alice", "type": "person", "descriptions": ["Software engineer"]},
16
+ {"name": "Bob", "type": "person", "descriptions": ["Product manager"]},
17
+ {"name": "Acme Corp", "type": "organization", "descriptions": ["A tech company"]},
18
+ ],
19
+ "relationships": [
20
+ {"source": "Alice", "target": "Python", "type": "uses"},
21
+ {"source": "Alice", "target": "Bob", "type": "works_with"},
22
+ {"source": "Django", "target": "Python", "type": "built_on"},
23
+ {"source": "Alice", "target": "Acme Corp", "type": "employed_by"},
24
+ ],
25
+ }
26
+
27
+
28
+def _empty_kg():
29
+ return {"nodes": [], "relationships": []}
30
+
31
+
32
+class TestPDFExport:
33
+ @pytest.fixture(autouse=True)
34
+ def _check_reportlab(self):
35
+ pytest.importorskip("reportlab")
36
+
37
+ def test_generate_pdf(self, tmp_path):
38
+ out = tmp_path / "report.pdf"
39
+ result = generate_pdf(_sample_kg(), out, title="Test Report")
40
+ assert result == out
41
+ assert out.exists()
42
+ assert out.stat().st_size > 0
43
+
44
+ def test_generate_pdf_empty_kg(self, tmp_path):
45
+ out = tmp_path / "empty.pdf"
46
+ result = generate_pdf(_empty_kg(), out)
47
+ assert result == out
48
+ assert out.exists()
49
+
50
+ def test_generate_pdf_creates_parent_dirs(self, tmp_path):
51
+ out = tmp_path / "sub" / "dir" / "report.pdf"
52
+ result = generate_pdf(_sample_kg(), out)
53
+ assert result == out
54
+ assert out.exists()
55
+
56
+ def test_generate_pdf_default_title(self, tmp_path):
57
+ out = tmp_path / "default.pdf"
58
+ generate_pdf(_sample_kg(), out)
59
+ assert out.exists()
60
+
61
+ def test_generate_pdf_with_diagrams_dir(self, tmp_path):
62
+ diag_dir = tmp_path / "diagrams"
63
+ diag_dir.mkdir()
64
+ out = tmp_path / "report.pdf"
65
+ # No PNGs in dir — should still work
66
+ result = generate_pdf(_sample_kg(), out, diagrams_dir=diag_dir)
67
+ assert result == out
68
+
69
+ def test_generate_pdf_no_reportlab(self, tmp_path, monkeypatch):
70
+ """Verify ImportError propagates when reportlab is missing."""
71
+ import builtins
72
+
73
+ real_import = builtins.__import__
74
+
75
+ def mock_import(name, *args, **kwargs):
76
+ if name.startswith("reportlab"):
77
+ raise ImportError("No module named 'reportlab'")
78
+ return real_import(name, *args, **kwargs)
79
+
80
+ monkeypatch.setattr(builtins, "__import__", mock_import)
81
+ with pytest.raises(ImportError):
82
+ generate_pdf(_sample_kg(), tmp_path / "fail.pdf")
83
+
84
+
85
+class TestPPTXExport:
86
+ @pytest.fixture(autouse=True)
87
+ def _check_pptx(self):
88
+ pytest.importorskip("pptx")
89
+
90
+ def test_generate_pptx(self, tmp_path):
91
+ out = tmp_path / "slides.pptx"
92
+ result = generate_pptx(_sample_kg(), out, title="Test Deck")
93
+ assert result == out
94
+ assert out.exists()
95
+ assert out.stat().st_size > 0
96
+
97
+ def test_generate_pptx_empty_kg(self, tmp_path):
98
+ out = tmp_path / "empty.pptx"
99
+ result = generate_pptx(_empty_kg(), out)
100
+ assert result == out
101
+ assert out.exists()
102
+
103
+ def test_generate_pptx_creates_parent_dirs(self, tmp_path):
104
+ out = tmp_path / "sub" / "dir" / "slides.pptx"
105
+ result = generate_pptx(_sample_kg(), out)
106
+ assert result == out
107
+ assert out.exists()
108
+
109
+ def test_generate_pptx_with_diagrams_dir(self, tmp_path):
110
+ diag_dir = tmp_path / "diagrams"
111
+ diag_dir.mkdir()
112
+ out = tmp_path / "slides.pptx"
113
+ result = generate_pptx(_sample_kg(), out, diagrams_dir=diag_dir)
114
+ assert result == out
115
+
116
+ def test_pptx_slide_count(self, tmp_path):
117
+ """Verify expected number of slides are created."""
118
+ from pptx import Presentation
119
+
120
+ out = tmp_path / "count.pptx"
121
+ generate_pptx(_sample_kg(), out)
122
+ prs = Presentation(str(out))
123
+ # Title + Overview + Key Entities + Rel Types + 1 entity batch = 5
124
+ assert len(prs.slides) == 5
125
+
126
+ def test_pptx_many_entities_batched(self, tmp_path):
127
+ """Entities are batched into multiple slides when >12."""
128
+ from pptx import Presentation
129
+
130
+ kg = {
131
+ "nodes": [
132
+ {"name": f"Entity{i}", "type": "concept", "descriptions": [f"desc {i}"]}
133
+ for i in range(25)
134
+ ],
135
+ "relationships": [],
136
+ }
137
+ out = tmp_path / "many.pptx"
138
+ generate_pptx(kg, out)
139
+ prs = Presentation(str(out))
140
+ # Title + Overview + 3 entity batches (12 + 12 + 1) = 5
141
+ # No Key Entities or Rel Types slides (no relationships)
142
+ assert len(prs.slides) == 5
143
+
144
+ def test_generate_pptx_no_pptx(self, tmp_path, monkeypatch):
145
+ """Verify ImportError propagates when python-pptx is missing."""
146
+ import builtins
147
+
148
+ real_import = builtins.__import__
149
+
150
+ def mock_import(name, *args, **kwargs):
151
+ if name.startswith("pptx"):
152
+ raise ImportError("No module named 'pptx'")
153
+ return real_import(name, *args, **kwargs)
154
+
155
+ monkeypatch.setattr(builtins, "__import__", mock_import)
156
+ with pytest.raises(ImportError):
157
+ generate_pptx(_sample_kg(), tmp_path / "fail.pptx")
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@@ -0,0 +1,157 @@
1 """Tests for PDF and PPTX exporters."""
2
3 import pytest
4
5 from video_processor.exporters.pdf_export import generate_pdf
6 from video_processor.exporters.pptx_export import generate_pptx
7
8
9 def _sample_kg():
10 """Return a sample knowledge graph dict for testing."""
11 return {
12 "nodes": [
13 {"name": "Python", "type": "technology", "descriptions": ["A programming language"]},
14 {"name": "Django", "type": "technology", "descriptions": ["A web framework"]},
15 {"name": "Alice", "type": "person", "descriptions": ["Software engineer"]},
16 {"name": "Bob", "type": "person", "descriptions": ["Product manager"]},
17 {"name": "Acme Corp", "type": "organization", "descriptions": ["A tech company"]},
18 ],
19 "relationships": [
20 {"source": "Alice", "target": "Python", "type": "uses"},
21 {"source": "Alice", "target": "Bob", "type": "works_with"},
22 {"source": "Django", "target": "Python", "type": "built_on"},
23 {"source": "Alice", "target": "Acme Corp", "type": "employed_by"},
24 ],
25 }
26
27
28 def _empty_kg():
29 return {"nodes": [], "relationships": []}
30
31
32 class TestPDFExport:
33 @pytest.fixture(autouse=True)
34 def _check_reportlab(self):
35 pytest.importorskip("reportlab")
36
37 def test_generate_pdf(self, tmp_path):
38 out = tmp_path / "report.pdf"
39 result = generate_pdf(_sample_kg(), out, title="Test Report")
40 assert result == out
41 assert out.exists()
42 assert out.stat().st_size > 0
43
44 def test_generate_pdf_empty_kg(self, tmp_path):
45 out = tmp_path / "empty.pdf"
46 result = generate_pdf(_empty_kg(), out)
47 assert result == out
48 assert out.exists()
49
50 def test_generate_pdf_creates_parent_dirs(self, tmp_path):
51 out = tmp_path / "sub" / "dir" / "report.pdf"
52 result = generate_pdf(_sample_kg(), out)
53 assert result == out
54 assert out.exists()
55
56 def test_generate_pdf_default_title(self, tmp_path):
57 out = tmp_path / "default.pdf"
58 generate_pdf(_sample_kg(), out)
59 assert out.exists()
60
61 def test_generate_pdf_with_diagrams_dir(self, tmp_path):
62 diag_dir = tmp_path / "diagrams"
63 diag_dir.mkdir()
64 out = tmp_path / "report.pdf"
65 # No PNGs in dir — should still work
66 result = generate_pdf(_sample_kg(), out, diagrams_dir=diag_dir)
67 assert result == out
68
69 def test_generate_pdf_no_reportlab(self, tmp_path, monkeypatch):
70 """Verify ImportError propagates when reportlab is missing."""
71 import builtins
72
73 real_import = builtins.__import__
74
75 def mock_import(name, *args, **kwargs):
76 if name.startswith("reportlab"):
77 raise ImportError("No module named 'reportlab'")
78 return real_import(name, *args, **kwargs)
79
80 monkeypatch.setattr(builtins, "__import__", mock_import)
81 with pytest.raises(ImportError):
82 generate_pdf(_sample_kg(), tmp_path / "fail.pdf")
83
84
85 class TestPPTXExport:
86 @pytest.fixture(autouse=True)
87 def _check_pptx(self):
88 pytest.importorskip("pptx")
89
90 def test_generate_pptx(self, tmp_path):
91 out = tmp_path / "slides.pptx"
92 result = generate_pptx(_sample_kg(), out, title="Test Deck")
93 assert result == out
94 assert out.exists()
95 assert out.stat().st_size > 0
96
97 def test_generate_pptx_empty_kg(self, tmp_path):
98 out = tmp_path / "empty.pptx"
99 result = generate_pptx(_empty_kg(), out)
100 assert result == out
101 assert out.exists()
102
103 def test_generate_pptx_creates_parent_dirs(self, tmp_path):
104 out = tmp_path / "sub" / "dir" / "slides.pptx"
105 result = generate_pptx(_sample_kg(), out)
106 assert result == out
107 assert out.exists()
108
109 def test_generate_pptx_with_diagrams_dir(self, tmp_path):
110 diag_dir = tmp_path / "diagrams"
111 diag_dir.mkdir()
112 out = tmp_path / "slides.pptx"
113 result = generate_pptx(_sample_kg(), out, diagrams_dir=diag_dir)
114 assert result == out
115
116 def test_pptx_slide_count(self, tmp_path):
117 """Verify expected number of slides are created."""
118 from pptx import Presentation
119
120 out = tmp_path / "count.pptx"
121 generate_pptx(_sample_kg(), out)
122 prs = Presentation(str(out))
123 # Title + Overview + Key Entities + Rel Types + 1 entity batch = 5
124 assert len(prs.slides) == 5
125
126 def test_pptx_many_entities_batched(self, tmp_path):
127 """Entities are batched into multiple slides when >12."""
128 from pptx import Presentation
129
130 kg = {
131 "nodes": [
132 {"name": f"Entity{i}", "type": "concept", "descriptions": [f"desc {i}"]}
133 for i in range(25)
134 ],
135 "relationships": [],
136 }
137 out = tmp_path / "many.pptx"
138 generate_pptx(kg, out)
139 prs = Presentation(str(out))
140 # Title + Overview + 3 entity batches (12 + 12 + 1) = 5
141 # No Key Entities or Rel Types slides (no relationships)
142 assert len(prs.slides) == 5
143
144 def test_generate_pptx_no_pptx(self, tmp_path, monkeypatch):
145 """Verify ImportError propagates when python-pptx is missing."""
146 import builtins
147
148 real_import = builtins.__import__
149
150 def mock_import(name, *args, **kwargs):
151 if name.startswith("pptx"):
152 raise ImportError("No module named 'pptx'")
153 return real_import(name, *args, **kwargs)
154
155 monkeypatch.setattr(builtins, "__import__", mock_import)
156 with pytest.raises(ImportError):
157 generate_pptx(_sample_kg(), tmp_path / "fail.pptx")
--- video_processor/cli/commands.py
+++ video_processor/cli/commands.py
@@ -1576,10 +1576,84 @@
15761576
kg_data = kg.to_dict()
15771577
created = export_to_notion_md(kg_data, out_dir)
15781578
15791579
click.echo(f"Exported Notion markdown: {len(created)} files in {out_dir}/")
15801580
1581
+
1582
+@export.command("pdf")
1583
+@click.argument("db_path", type=click.Path(exists=True))
1584
+@click.option("-o", "--output", type=click.Path(), default=None, help="Output PDF file path")
1585
+@click.option("--title", type=str, default=None, help="Report title")
1586
+@click.option(
1587
+ "--diagrams",
1588
+ type=click.Path(exists=True),
1589
+ default=None,
1590
+ help="Directory with diagram PNGs to embed",
1591
+)
1592
+def export_pdf(db_path, output, title, diagrams):
1593
+ """Generate a PDF report from a knowledge graph.
1594
+
1595
+ Requires: pip install reportlab
1596
+
1597
+ Examples:
1598
+
1599
+ planopticon export pdf knowledge_graph.db
1600
+
1601
+ planopticon export pdf kg.db -o report.pdf --title "Q1 Review"
1602
+
1603
+ planopticon export pdf kg.db --diagrams ./diagrams/
1604
+ """
1605
+ from video_processor.exporters.pdf_export import generate_pdf
1606
+ from video_processor.integrators.knowledge_graph import KnowledgeGraph
1607
+
1608
+ db_path = Path(db_path)
1609
+ out_path = Path(output) if output else Path.cwd() / "export" / "report.pdf"
1610
+ diagrams_path = Path(diagrams) if diagrams else None
1611
+
1612
+ kg = KnowledgeGraph(db_path=db_path)
1613
+ kg_data = kg.to_dict()
1614
+
1615
+ result = generate_pdf(kg_data, out_path, title=title, diagrams_dir=diagrams_path)
1616
+ click.echo(f"Generated PDF: {result}")
1617
+
1618
+
1619
+@export.command("pptx")
1620
+@click.argument("db_path", type=click.Path(exists=True))
1621
+@click.option("-o", "--output", type=click.Path(), default=None, help="Output PPTX file path")
1622
+@click.option("--title", type=str, default=None, help="Presentation title")
1623
+@click.option(
1624
+ "--diagrams",
1625
+ type=click.Path(exists=True),
1626
+ default=None,
1627
+ help="Directory with diagram PNGs to embed",
1628
+)
1629
+def export_pptx(db_path, output, title, diagrams):
1630
+ """Generate a PPTX slide deck from a knowledge graph.
1631
+
1632
+ Requires: pip install python-pptx
1633
+
1634
+ Examples:
1635
+
1636
+ planopticon export pptx knowledge_graph.db
1637
+
1638
+ planopticon export pptx kg.db -o slides.pptx --title "Architecture Overview"
1639
+
1640
+ planopticon export pptx kg.db --diagrams ./diagrams/
1641
+ """
1642
+ from video_processor.exporters.pptx_export import generate_pptx
1643
+ from video_processor.integrators.knowledge_graph import KnowledgeGraph
1644
+
1645
+ db_path = Path(db_path)
1646
+ out_path = Path(output) if output else Path.cwd() / "export" / "presentation.pptx"
1647
+ diagrams_path = Path(diagrams) if diagrams else None
1648
+
1649
+ kg = KnowledgeGraph(db_path=db_path)
1650
+ kg_data = kg.to_dict()
1651
+
1652
+ result = generate_pptx(kg_data, out_path, title=title, diagrams_dir=diagrams_path)
1653
+ click.echo(f"Generated PPTX: {result}")
1654
+
15811655
15821656
@export.command("exchange")
15831657
@click.argument("db_path", type=click.Path(exists=True))
15841658
@click.option(
15851659
"-o",
15861660
--- video_processor/cli/commands.py
+++ video_processor/cli/commands.py
@@ -1576,10 +1576,84 @@
1576 kg_data = kg.to_dict()
1577 created = export_to_notion_md(kg_data, out_dir)
1578
1579 click.echo(f"Exported Notion markdown: {len(created)} files in {out_dir}/")
1580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1581
1582 @export.command("exchange")
1583 @click.argument("db_path", type=click.Path(exists=True))
1584 @click.option(
1585 "-o",
1586
--- video_processor/cli/commands.py
+++ video_processor/cli/commands.py
@@ -1576,10 +1576,84 @@
1576 kg_data = kg.to_dict()
1577 created = export_to_notion_md(kg_data, out_dir)
1578
1579 click.echo(f"Exported Notion markdown: {len(created)} files in {out_dir}/")
1580
1581
1582 @export.command("pdf")
1583 @click.argument("db_path", type=click.Path(exists=True))
1584 @click.option("-o", "--output", type=click.Path(), default=None, help="Output PDF file path")
1585 @click.option("--title", type=str, default=None, help="Report title")
1586 @click.option(
1587 "--diagrams",
1588 type=click.Path(exists=True),
1589 default=None,
1590 help="Directory with diagram PNGs to embed",
1591 )
1592 def export_pdf(db_path, output, title, diagrams):
1593 """Generate a PDF report from a knowledge graph.
1594
1595 Requires: pip install reportlab
1596
1597 Examples:
1598
1599 planopticon export pdf knowledge_graph.db
1600
1601 planopticon export pdf kg.db -o report.pdf --title "Q1 Review"
1602
1603 planopticon export pdf kg.db --diagrams ./diagrams/
1604 """
1605 from video_processor.exporters.pdf_export import generate_pdf
1606 from video_processor.integrators.knowledge_graph import KnowledgeGraph
1607
1608 db_path = Path(db_path)
1609 out_path = Path(output) if output else Path.cwd() / "export" / "report.pdf"
1610 diagrams_path = Path(diagrams) if diagrams else None
1611
1612 kg = KnowledgeGraph(db_path=db_path)
1613 kg_data = kg.to_dict()
1614
1615 result = generate_pdf(kg_data, out_path, title=title, diagrams_dir=diagrams_path)
1616 click.echo(f"Generated PDF: {result}")
1617
1618
1619 @export.command("pptx")
1620 @click.argument("db_path", type=click.Path(exists=True))
1621 @click.option("-o", "--output", type=click.Path(), default=None, help="Output PPTX file path")
1622 @click.option("--title", type=str, default=None, help="Presentation title")
1623 @click.option(
1624 "--diagrams",
1625 type=click.Path(exists=True),
1626 default=None,
1627 help="Directory with diagram PNGs to embed",
1628 )
1629 def export_pptx(db_path, output, title, diagrams):
1630 """Generate a PPTX slide deck from a knowledge graph.
1631
1632 Requires: pip install python-pptx
1633
1634 Examples:
1635
1636 planopticon export pptx knowledge_graph.db
1637
1638 planopticon export pptx kg.db -o slides.pptx --title "Architecture Overview"
1639
1640 planopticon export pptx kg.db --diagrams ./diagrams/
1641 """
1642 from video_processor.exporters.pptx_export import generate_pptx
1643 from video_processor.integrators.knowledge_graph import KnowledgeGraph
1644
1645 db_path = Path(db_path)
1646 out_path = Path(output) if output else Path.cwd() / "export" / "presentation.pptx"
1647 diagrams_path = Path(diagrams) if diagrams else None
1648
1649 kg = KnowledgeGraph(db_path=db_path)
1650 kg_data = kg.to_dict()
1651
1652 result = generate_pptx(kg_data, out_path, title=title, diagrams_dir=diagrams_path)
1653 click.echo(f"Generated PPTX: {result}")
1654
1655
1656 @export.command("exchange")
1657 @click.argument("db_path", type=click.Path(exists=True))
1658 @click.option(
1659 "-o",
1660
--- video_processor/exporters/__init__.py
+++ video_processor/exporters/__init__.py
@@ -1,1 +1,1 @@
1
-"""Document exporters for generating markdown, CSV, and structured notes."""
1
+"""Document exporters for generating markdown, CSV, PDF, PPTX, and structured notes."""
22
33
ADDED video_processor/exporters/pdf_export.py
44
ADDED video_processor/exporters/pptx_export.py
--- video_processor/exporters/__init__.py
+++ video_processor/exporters/__init__.py
@@ -1,1 +1,1 @@
1 """Document exporters for generating markdown, CSV, and structured notes."""
2
3 DDED video_processor/exporters/pdf_export.py
4 DDED video_processor/exporters/pptx_export.py
--- video_processor/exporters/__init__.py
+++ video_processor/exporters/__init__.py
@@ -1,1 +1,1 @@
1 """Document exporters for generating markdown, CSV, PDF, PPTX, and structured notes."""
2
3 DDED video_processor/exporters/pdf_export.py
4 DDED video_processor/exporters/pptx_export.py
--- a/video_processor/exporters/pdf_export.py
+++ b/video_processor/exporters/pdf_export.py
@@ -0,0 +1,277 @@
1
+"""Generate PDF reports from knowledge graph data.
2
+
3
+Uses reportlab for PDF generation. Falls back gracefully if not installed.
4
+No LLM required — pure template-based generation from KG data.
5
+"""
6
+
7
+import logging
8
+from datetime import datetime
9
+from pathlib import Path
10
+from typing import Any, Dict, List, Optional
11
+
12
+logger = logging.getLogger(__name__)
13
+
14
+
15
+def _get_styles():
16
+ """Import and configure reportlab styles."""
17
+ from reportlab.lib import colors
18
+ from reportlab.lib.pagesizes import letter
19
+ from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
20
+ from reportlab.lib.units import inch
21
+
22
+ styles = getSampleStyleSheet()
23
+
24
+ styles.add(
25
+ ParagraphStyle(
26
+ "KGTitle",
27
+ parent=styles["Title"],
28
+ fontSize=24,
29
+ spaceAfter=20,
30
+ textColor=colors.HexColor("#1a1a2e"),
31
+ )
32
+ )
33
+ styles.add(
34
+ ParagraphStyle(
35
+ "KGHeading2",
36
+ parent=styles["Heading2"],
37
+ fontSize=16,
38
+ spaceBefore=16,
39
+ spaceAfter=8,
40
+ textColor=colors.HexColor("#16213e"),
41
+ )
42
+ )
43
+ styles.add(
44
+ ParagraphStyle(
45
+ "KGBody",
46
+ parent=styles["Normal"],
47
+ fontSize=10,
48
+ leading=14,
49
+ spaceBefore=4,
50
+ spaceAfter=4,
51
+ )
52
+ )
53
+ styles.add(
54
+ ParagraphStyle(
55
+ "KGBullet",
56
+ parent=styles["Normal"],
57
+ fontSize=10,
58
+ leading=14,
59
+ leftIndent=20,
60
+ bulletIndent=10,
61
+ spaceBefore=2,
62
+ spaceAfter=2,
63
+ )
64
+ )
65
+
66
+ return styles, letter, inch, colors
67
+
68
+
69
+def _build_entity_table(nodes: List[dict], colors) -> Any:
70
+ """Build a table of entities grouped by type."""
71
+ from reportlab.lib.units import inch
72
+ from reportlab.platypus import Table, TableStyle
73
+
74
+ by_type: Dict[str, list] = {}
75
+ for n in nodes:
76
+ t = n.get("type", "concept")
77
+ by_type.setdefault(t, []).append(n)
78
+
79
+ data = [["Type", "Count", "Examples"]]
80
+ for etype, elist in sorted(by_type.items(), key=lambda x: -len(x[1])):
81
+ examples = ", ".join(e.get("name", "") for e in elist[:3])
82
+ if len(elist) > 3:
83
+ examples += f" (+{len(elist) - 3} more)"
84
+ data.append([etype.title(), str(len(elist)), examples])
85
+
86
+ table = Table(data, colWidths=[1.2 * inch, 0.8 * inch, 4.0 * inch])
87
+ table.setStyle(
88
+ TableStyle(
89
+ [
90
+ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8eaf6")),
91
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
92
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
93
+ ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
94
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
95
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
96
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
97
+ ]
98
+ )
99
+ )
100
+ return table
101
+
102
+
103
+def _build_relationship_table(rels: List[dict], colors, max_rows: int = 30) -> Any:
104
+ """Build a table of relationships."""
105
+ from reportlab.lib.units import inch
106
+ from reportlab.platypus import Table, TableStyle
107
+
108
+ data = [["Source", "Relationship", "Target"]]
109
+ for r in rels[:max_rows]:
110
+ data.append([r.get("source", ""), r.get("type", ""), r.get("target", "")])
111
+ if len(rels) > max_rows:
112
+ data.append(["...", f"({len(rels) - max_rows} more)", "..."])
113
+
114
+ table = Table(data, colWidths=[2.0 * inch, 2.0 * inch, 2.0 * inch])
115
+ table.setStyle(
116
+ TableStyle(
117
+ [
118
+ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8eaf6")),
119
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
120
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
121
+ ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
122
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
123
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
124
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
125
+ ]
126
+ )
127
+ )
128
+ return table
129
+
130
+
131
+def _build_key_entities_table(rels: List[dict], colors) -> Any:
132
+ """Build a table of top entities by connection count."""
133
+ from reportlab.lib.units import inch
134
+ from reportlab.platypus import Table, TableStyle
135
+
136
+ degree: Dict[str, int] = {}
137
+ for r in rels:
138
+ degree[r.get("source", "")] = degree.get(r.get("source", ""), 0) + 1
139
+ degree[r.get("target", "")] = degree.get(r.get("target", ""), 0) + 1
140
+
141
+ top = sorted(degree.items(), key=lambda x: -x[1])[:10]
142
+ if not top:
143
+ return None
144
+
145
+ data = [["Entity", "Connections"]]
146
+ for name, deg in top:
147
+ data.append([name, str(deg)])
148
+
149
+ table = Table(data, colWidths=[4.0 * inch, 1.5 * inch])
150
+ table.setStyle(
151
+ TableStyle(
152
+ [
153
+ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8eaf6")),
154
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
155
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
156
+ ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
157
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
158
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
159
+ ]
160
+ )
161
+ )
162
+ return table
163
+
164
+
165
+def generate_pdf(
166
+ kg_data: dict,
167
+ output_path: Path,
168
+ title: Optional[str] = None,
169
+ diagrams_dir: Optional[Path] = None,
170
+) -> Path:
171
+ """Generate a PDF report from knowledge graph data.
172
+
173
+ Args:
174
+ kg_data: Knowledge graph dict with 'nodes' and 'relationships'.
175
+ output_path: Path to write the PDF file.
176
+ title: Optional report title.
177
+ diagrams_dir: Optional directory containing diagram images to embed.
178
+
179
+ Returns:
180
+ Path to the generated PDF.
181
+
182
+ Raises:
183
+ ImportError: If reportlab is not installed.
184
+ """
185
+ from reportlab.platypus import (
186
+ Paragraph,
187
+ SimpleDocTemplate,
188
+ Spacer,
189
+ )
190
+
191
+ styles, letter, inch, colors = _get_styles()
192
+
193
+ output_path = Path(output_path)
194
+ output_path.parent.mkdir(parents=True, exist_ok=True)
195
+
196
+ doc = SimpleDocTemplate(
197
+ str(output_path),
198
+ pagesize=letter,
199
+ topMargin=0.75 * inch,
200
+ bottomMargin=0.75 * inch,
201
+ leftMargin=0.75 * inch,
202
+ rightMargin=0.75 * inch,
203
+ )
204
+
205
+ story = []
206
+ nodes = kg_data.get("nodes", [])
207
+ rels = kg_data.get("relationships", [])
208
+
209
+ # Title
210
+ report_title = title or "Knowledge Graph Report"
211
+ story.append(Paragraph(report_title, styles["KGTitle"]))
212
+ story.append(
213
+ Paragraph(
214
+ f"Generated {datetime.now().strftime('%Y-%m-%d %H:%M')} • "
215
+ f"{len(nodes)} entities • {len(rels)} relationships",
216
+ styles["KGBody"],
217
+ )
218
+ )
219
+ story.append(Spacer(1, 20))
220
+
221
+ # Entity breakdown
222
+ if nodes:
223
+ story.append(Paragraph("Entity Breakdown", styles["KGHeading2"]))
224
+ story.append(_build_entity_table(nodes, colors))
225
+ story.append(Spacer(1, 12))
226
+
227
+ # Key entities
228
+ if rels:
229
+ key_table = _build_key_entities_table(rels, colors)
230
+ if key_table:
231
+ story.append(Paragraph("Key Entities (by connections)", styles["KGHeading2"]))
232
+ story.append(key_table)
233
+ story.append(Spacer(1, 12))
234
+
235
+ # Embed diagram images
236
+ if diagrams_dir and diagrams_dir.exists():
237
+ _embed_diagrams(story, styles, diagrams_dir, inch)
238
+
239
+ # Relationship table
240
+ if rels:
241
+ story.append(Paragraph("Relationships", styles["KGHeading2"]))
242
+ story.append(_build_relationship_table(rels, colors))
243
+ story.append(Spacer(1, 12))
244
+
245
+ # Entity details
246
+ if nodes:
247
+ story.append(Paragraph("Entity Details", styles["KGHeading2"]))
248
+ for node in sorted(nodes, key=lambda n: n.get("name", "")):
249
+ name = node.get("name", "")
250
+ etype = node.get("type", "concept")
251
+ descs = node.get("descriptions", [])
252
+ desc = descs[0] if descs else "No description."
253
+ story.append(Paragraph(f"<b>{name}</b> <i>({etype})</i>: {desc}", styles["KGBullet"]))
254
+
255
+ doc.build(story)
256
+ logger.info(f"Generated PDF report: {output_path}")
257
+ return output_path
258
+
259
+
260
+def _embed_diagrams(story, styles, diagrams_dir: Path, inch):
261
+ """Embed diagram PNG images from a directory."""
262
+ from reportlab.platypus import Image, Paragraph, Spacer
263
+
264
+ pngs = sorted(diagrams_dir.glob("*.png"))
265
+ if not pngs:
266
+ return
267
+
268
+ story.append(Paragraph("Diagrams", styles["KGHeading2"]))
269
+
270
+ for png in pngs:
271
+ try:
272
+ img = Image(str(png), width=5 * inch, height=3.5 * inch)
273
+ img.hAlign = "CENTER"
274
+ story.append(img)
275
+ story.append(Spacer(1, 8))
276
+ except Exception as e:
277
+ logger.warning(f"Could not embed diagram {png.name}: {e}")
--- a/video_processor/exporters/pdf_export.py
+++ b/video_processor/exporters/pdf_export.py
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/video_processor/exporters/pdf_export.py
+++ b/video_processor/exporters/pdf_export.py
@@ -0,0 +1,277 @@
1 """Generate PDF reports from knowledge graph data.
2
3 Uses reportlab for PDF generation. Falls back gracefully if not installed.
4 No LLM required — pure template-based generation from KG data.
5 """
6
7 import logging
8 from datetime import datetime
9 from pathlib import Path
10 from typing import Any, Dict, List, Optional
11
12 logger = logging.getLogger(__name__)
13
14
15 def _get_styles():
16 """Import and configure reportlab styles."""
17 from reportlab.lib import colors
18 from reportlab.lib.pagesizes import letter
19 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
20 from reportlab.lib.units import inch
21
22 styles = getSampleStyleSheet()
23
24 styles.add(
25 ParagraphStyle(
26 "KGTitle",
27 parent=styles["Title"],
28 fontSize=24,
29 spaceAfter=20,
30 textColor=colors.HexColor("#1a1a2e"),
31 )
32 )
33 styles.add(
34 ParagraphStyle(
35 "KGHeading2",
36 parent=styles["Heading2"],
37 fontSize=16,
38 spaceBefore=16,
39 spaceAfter=8,
40 textColor=colors.HexColor("#16213e"),
41 )
42 )
43 styles.add(
44 ParagraphStyle(
45 "KGBody",
46 parent=styles["Normal"],
47 fontSize=10,
48 leading=14,
49 spaceBefore=4,
50 spaceAfter=4,
51 )
52 )
53 styles.add(
54 ParagraphStyle(
55 "KGBullet",
56 parent=styles["Normal"],
57 fontSize=10,
58 leading=14,
59 leftIndent=20,
60 bulletIndent=10,
61 spaceBefore=2,
62 spaceAfter=2,
63 )
64 )
65
66 return styles, letter, inch, colors
67
68
69 def _build_entity_table(nodes: List[dict], colors) -> Any:
70 """Build a table of entities grouped by type."""
71 from reportlab.lib.units import inch
72 from reportlab.platypus import Table, TableStyle
73
74 by_type: Dict[str, list] = {}
75 for n in nodes:
76 t = n.get("type", "concept")
77 by_type.setdefault(t, []).append(n)
78
79 data = [["Type", "Count", "Examples"]]
80 for etype, elist in sorted(by_type.items(), key=lambda x: -len(x[1])):
81 examples = ", ".join(e.get("name", "") for e in elist[:3])
82 if len(elist) > 3:
83 examples += f" (+{len(elist) - 3} more)"
84 data.append([etype.title(), str(len(elist)), examples])
85
86 table = Table(data, colWidths=[1.2 * inch, 0.8 * inch, 4.0 * inch])
87 table.setStyle(
88 TableStyle(
89 [
90 ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8eaf6")),
91 ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
92 ("FONTSIZE", (0, 0), (-1, -1), 9),
93 ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
94 ("VALIGN", (0, 0), (-1, -1), "TOP"),
95 ("TOPPADDING", (0, 0), (-1, -1), 4),
96 ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
97 ]
98 )
99 )
100 return table
101
102
103 def _build_relationship_table(rels: List[dict], colors, max_rows: int = 30) -> Any:
104 """Build a table of relationships."""
105 from reportlab.lib.units import inch
106 from reportlab.platypus import Table, TableStyle
107
108 data = [["Source", "Relationship", "Target"]]
109 for r in rels[:max_rows]:
110 data.append([r.get("source", ""), r.get("type", ""), r.get("target", "")])
111 if len(rels) > max_rows:
112 data.append(["...", f"({len(rels) - max_rows} more)", "..."])
113
114 table = Table(data, colWidths=[2.0 * inch, 2.0 * inch, 2.0 * inch])
115 table.setStyle(
116 TableStyle(
117 [
118 ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8eaf6")),
119 ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
120 ("FONTSIZE", (0, 0), (-1, -1), 9),
121 ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
122 ("VALIGN", (0, 0), (-1, -1), "TOP"),
123 ("TOPPADDING", (0, 0), (-1, -1), 4),
124 ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
125 ]
126 )
127 )
128 return table
129
130
131 def _build_key_entities_table(rels: List[dict], colors) -> Any:
132 """Build a table of top entities by connection count."""
133 from reportlab.lib.units import inch
134 from reportlab.platypus import Table, TableStyle
135
136 degree: Dict[str, int] = {}
137 for r in rels:
138 degree[r.get("source", "")] = degree.get(r.get("source", ""), 0) + 1
139 degree[r.get("target", "")] = degree.get(r.get("target", ""), 0) + 1
140
141 top = sorted(degree.items(), key=lambda x: -x[1])[:10]
142 if not top:
143 return None
144
145 data = [["Entity", "Connections"]]
146 for name, deg in top:
147 data.append([name, str(deg)])
148
149 table = Table(data, colWidths=[4.0 * inch, 1.5 * inch])
150 table.setStyle(
151 TableStyle(
152 [
153 ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8eaf6")),
154 ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
155 ("FONTSIZE", (0, 0), (-1, -1), 9),
156 ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
157 ("TOPPADDING", (0, 0), (-1, -1), 4),
158 ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
159 ]
160 )
161 )
162 return table
163
164
165 def generate_pdf(
166 kg_data: dict,
167 output_path: Path,
168 title: Optional[str] = None,
169 diagrams_dir: Optional[Path] = None,
170 ) -> Path:
171 """Generate a PDF report from knowledge graph data.
172
173 Args:
174 kg_data: Knowledge graph dict with 'nodes' and 'relationships'.
175 output_path: Path to write the PDF file.
176 title: Optional report title.
177 diagrams_dir: Optional directory containing diagram images to embed.
178
179 Returns:
180 Path to the generated PDF.
181
182 Raises:
183 ImportError: If reportlab is not installed.
184 """
185 from reportlab.platypus import (
186 Paragraph,
187 SimpleDocTemplate,
188 Spacer,
189 )
190
191 styles, letter, inch, colors = _get_styles()
192
193 output_path = Path(output_path)
194 output_path.parent.mkdir(parents=True, exist_ok=True)
195
196 doc = SimpleDocTemplate(
197 str(output_path),
198 pagesize=letter,
199 topMargin=0.75 * inch,
200 bottomMargin=0.75 * inch,
201 leftMargin=0.75 * inch,
202 rightMargin=0.75 * inch,
203 )
204
205 story = []
206 nodes = kg_data.get("nodes", [])
207 rels = kg_data.get("relationships", [])
208
209 # Title
210 report_title = title or "Knowledge Graph Report"
211 story.append(Paragraph(report_title, styles["KGTitle"]))
212 story.append(
213 Paragraph(
214 f"Generated {datetime.now().strftime('%Y-%m-%d %H:%M')} &bull; "
215 f"{len(nodes)} entities &bull; {len(rels)} relationships",
216 styles["KGBody"],
217 )
218 )
219 story.append(Spacer(1, 20))
220
221 # Entity breakdown
222 if nodes:
223 story.append(Paragraph("Entity Breakdown", styles["KGHeading2"]))
224 story.append(_build_entity_table(nodes, colors))
225 story.append(Spacer(1, 12))
226
227 # Key entities
228 if rels:
229 key_table = _build_key_entities_table(rels, colors)
230 if key_table:
231 story.append(Paragraph("Key Entities (by connections)", styles["KGHeading2"]))
232 story.append(key_table)
233 story.append(Spacer(1, 12))
234
235 # Embed diagram images
236 if diagrams_dir and diagrams_dir.exists():
237 _embed_diagrams(story, styles, diagrams_dir, inch)
238
239 # Relationship table
240 if rels:
241 story.append(Paragraph("Relationships", styles["KGHeading2"]))
242 story.append(_build_relationship_table(rels, colors))
243 story.append(Spacer(1, 12))
244
245 # Entity details
246 if nodes:
247 story.append(Paragraph("Entity Details", styles["KGHeading2"]))
248 for node in sorted(nodes, key=lambda n: n.get("name", "")):
249 name = node.get("name", "")
250 etype = node.get("type", "concept")
251 descs = node.get("descriptions", [])
252 desc = descs[0] if descs else "No description."
253 story.append(Paragraph(f"<b>{name}</b> <i>({etype})</i>: {desc}", styles["KGBullet"]))
254
255 doc.build(story)
256 logger.info(f"Generated PDF report: {output_path}")
257 return output_path
258
259
260 def _embed_diagrams(story, styles, diagrams_dir: Path, inch):
261 """Embed diagram PNG images from a directory."""
262 from reportlab.platypus import Image, Paragraph, Spacer
263
264 pngs = sorted(diagrams_dir.glob("*.png"))
265 if not pngs:
266 return
267
268 story.append(Paragraph("Diagrams", styles["KGHeading2"]))
269
270 for png in pngs:
271 try:
272 img = Image(str(png), width=5 * inch, height=3.5 * inch)
273 img.hAlign = "CENTER"
274 story.append(img)
275 story.append(Spacer(1, 8))
276 except Exception as e:
277 logger.warning(f"Could not embed diagram {png.name}: {e}")
--- a/video_processor/exporters/pptx_export.py
+++ b/video_processor/exporters/pptx_export.py
@@ -0,0 +1,202 @@
1
+"""Generate PPTX slide decks from knowledge graph data.
2
+
3
+Uses python-pptx for slide generation. Falls back gracefully if not installed.
4
+No LLM required — pure template-based generation from KG data.
5
+"""
6
+
7
+import logging
8
+from datetime import datetime
9
+from pathlib import Path
10
+from typing import Dict, List, Optional
11
+
12
+logger = logging.getLogger(__name__)
13
+
14
+
15
+def _add_title_slide(prs, title: str, subtitle: str):
16
+ """Add a title slide."""
17
+ from pptx.util import Pt
18
+
19
+ slide = prs.slides.add_slide(prs.slide_layouts[0])
20
+ slide.shapes.title.text = title
21
+ body = slide.placeholders[1]
22
+ body.text = subtitle
23
+ for paragraph in body.text_frame.paragraphs:
24
+ for run in paragraph.runs:
25
+ run.font.size = Pt(14)
26
+
27
+
28
+def _add_content_slide(prs, title: str, bullets: List[str]):
29
+ """Add a slide with bullet points."""
30
+ from pptx.util import Pt
31
+
32
+ slide = prs.slides.add_slide(prs.slide_layouts[1])
33
+ slide.shapes.title.text = title
34
+ body = slide.placeholders[1]
35
+ tf = body.text_frame
36
+ tf.clear()
37
+
38
+ for i, bullet in enumerate(bullets):
39
+ if i == 0:
40
+ tf.paragraphs[0].text = bullet
41
+ for run in tf.paragraphs[0].runs:
42
+ run.font.size = Pt(14)
43
+ else:
44
+ p = tf.add_paragraph()
45
+ p.text = bullet
46
+ for run in p.runs:
47
+ run.font.size = Pt(14)
48
+
49
+
50
+def _add_table_slide(prs, title: str, headers: List[str], rows: List[List[str]]):
51
+ """Add a slide with a table."""
52
+ from pptx.util import Emu, Inches, Pt
53
+
54
+ slide = prs.slides.add_slide(prs.slide_layouts[5]) # Blank layout
55
+ slide.shapes.title.text = title
56
+
57
+ num_rows = len(rows) + 1
58
+ num_cols = len(headers)
59
+
60
+ left = Inches(0.5)
61
+ top = Inches(1.5)
62
+ width = Inches(9.0)
63
+ row_height = Emu(int(Inches(0.35)))
64
+ height = row_height * num_rows
65
+
66
+ table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, height)
67
+ table = table_shape.table
68
+
69
+ for i, header in enumerate(headers):
70
+ cell = table.cell(0, i)
71
+ cell.text = header
72
+ for paragraph in cell.text_frame.paragraphs:
73
+ paragraph.font.size = Pt(11)
74
+ paragraph.font.bold = True
75
+
76
+ for r_idx, row in enumerate(rows):
77
+ for c_idx, val in enumerate(row):
78
+ cell = table.cell(r_idx + 1, c_idx)
79
+ cell.text = str(val)
80
+ for paragraph in cell.text_frame.paragraphs:
81
+ paragraph.font.size = Pt(10)
82
+
83
+
84
+def _add_image_slide(prs, title: str, image_path: Path):
85
+ """Add a slide with an embedded image."""
86
+ from pptx.util import Inches
87
+
88
+ slide = prs.slides.add_slide(prs.slide_layouts[5]) # Blank
89
+ slide.shapes.title.text = title
90
+
91
+ left = Inches(1.0)
92
+ top = Inches(1.5)
93
+ width = Inches(8.0)
94
+ slide.shapes.add_picture(str(image_path), left, top, width=width)
95
+
96
+
97
+def generate_pptx(
98
+ kg_data: dict,
99
+ output_path: Path,
100
+ title: Optional[str] = None,
101
+ diagrams_dir: Optional[Path] = None,
102
+) -> Path:
103
+ """Generate a PPTX slide deck from knowledge graph data.
104
+
105
+ Args:
106
+ kg_data: Knowledge graph dict with 'nodes' and 'relationships'.
107
+ output_path: Path to write the PPTX file.
108
+ title: Optional presentation title.
109
+ diagrams_dir: Optional directory containing diagram images to embed.
110
+
111
+ Returns:
112
+ Path to the generated PPTX.
113
+
114
+ Raises:
115
+ ImportError: If python-pptx is not installed.
116
+ """
117
+ from pptx import Presentation
118
+
119
+ output_path = Path(output_path)
120
+ output_path.parent.mkdir(parents=True, exist_ok=True)
121
+
122
+ prs = Presentation()
123
+ nodes = kg_data.get("nodes", [])
124
+ rels = kg_data.get("relationships", [])
125
+
126
+ report_title = title or "Knowledge Graph"
127
+ now = datetime.now().strftime("%Y-%m-%d %H:%M")
128
+
129
+ # Title slide
130
+ _add_title_slide(
131
+ prs,
132
+ report_title,
133
+ f"Generated {now}\n{len(nodes)} entities \u2022 {len(rels)} relationships",
134
+ )
135
+
136
+ # Overview slide
137
+ by_type: Dict[str, list] = {}
138
+ for n in nodes:
139
+ t = n.get("type", "concept")
140
+ by_type.setdefault(t, []).append(n)
141
+
142
+ overview_bullets = [f"{len(nodes)} entities across {len(by_type)} types"]
143
+ for etype, elist in sorted(by_type.items(), key=lambda x: -len(x[1])):
144
+ examples = ", ".join(e.get("name", "") for e in elist[:3])
145
+ overview_bullets.append(f"{etype.title()} ({len(elist)}): {examples}")
146
+ _add_content_slide(prs, "Overview", overview_bullets)
147
+
148
+ # Key entities slide
149
+ degree: Dict[str, int] = {}
150
+ for r in rels:
151
+ degree[r.get("source", "")] = degree.get(r.get("source", ""), 0) + 1
152
+ degree[r.get("target", "")] = degree.get(r.get("target", ""), 0) + 1
153
+
154
+ top = sorted(degree.items(), key=lambda x: -x[1])[:10]
155
+ if top:
156
+ _add_table_slide(
157
+ prs,
158
+ "Key Entities",
159
+ ["Entity", "Connections"],
160
+ [[name, str(deg)] for name, deg in top],
161
+ )
162
+
163
+ # Diagram slides
164
+ if diagrams_dir and diagrams_dir.exists():
165
+ pngs = sorted(diagrams_dir.glob("*.png"))
166
+ for i, png in enumerate(pngs):
167
+ _add_image_slide(prs, f"Diagram {i + 1}", png)
168
+
169
+ # Relationship types slide
170
+ rel_types: Dict[str, int] = {}
171
+ for r in rels:
172
+ rt = r.get("type", "related_to")
173
+ rel_types[rt] = rel_types.get(rt, 0) + 1
174
+
175
+ if rel_types:
176
+ _add_table_slide(
177
+ prs,
178
+ "Relationship Types",
179
+ ["Type", "Count"],
180
+ [[rt, str(c)] for rt, c in sorted(rel_types.items(), key=lambda x: -x[1])],
181
+ )
182
+
183
+ # Entity detail slides (batched, max 12 per slide)
184
+ batch_size = 12
185
+ for batch_start in range(0, len(nodes), batch_size):
186
+ batch = nodes[batch_start : batch_start + batch_size]
187
+ bullets = []
188
+ for node in batch:
189
+ name = node.get("name", "")
190
+ etype = node.get("type", "concept")
191
+ descs = node.get("descriptions", [])
192
+ desc = descs[0][:80] if descs else ""
193
+ bullets.append(f"{name} ({etype}): {desc}")
194
+
195
+ slide_num = batch_start // batch_size + 1
196
+ total_pages = (len(nodes) + batch_size - 1) // batch_size
197
+ page_label = f" ({slide_num}/{total_pages})" if total_pages > 1 else ""
198
+ _add_content_slide(prs, f"Entities{page_label}", bullets)
199
+
200
+ prs.save(str(output_path))
201
+ logger.info(f"Generated PPTX: {output_path}")
202
+ return output_path
--- a/video_processor/exporters/pptx_export.py
+++ b/video_processor/exporters/pptx_export.py
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/video_processor/exporters/pptx_export.py
+++ b/video_processor/exporters/pptx_export.py
@@ -0,0 +1,202 @@
1 """Generate PPTX slide decks from knowledge graph data.
2
3 Uses python-pptx for slide generation. Falls back gracefully if not installed.
4 No LLM required — pure template-based generation from KG data.
5 """
6
7 import logging
8 from datetime import datetime
9 from pathlib import Path
10 from typing import Dict, List, Optional
11
12 logger = logging.getLogger(__name__)
13
14
15 def _add_title_slide(prs, title: str, subtitle: str):
16 """Add a title slide."""
17 from pptx.util import Pt
18
19 slide = prs.slides.add_slide(prs.slide_layouts[0])
20 slide.shapes.title.text = title
21 body = slide.placeholders[1]
22 body.text = subtitle
23 for paragraph in body.text_frame.paragraphs:
24 for run in paragraph.runs:
25 run.font.size = Pt(14)
26
27
28 def _add_content_slide(prs, title: str, bullets: List[str]):
29 """Add a slide with bullet points."""
30 from pptx.util import Pt
31
32 slide = prs.slides.add_slide(prs.slide_layouts[1])
33 slide.shapes.title.text = title
34 body = slide.placeholders[1]
35 tf = body.text_frame
36 tf.clear()
37
38 for i, bullet in enumerate(bullets):
39 if i == 0:
40 tf.paragraphs[0].text = bullet
41 for run in tf.paragraphs[0].runs:
42 run.font.size = Pt(14)
43 else:
44 p = tf.add_paragraph()
45 p.text = bullet
46 for run in p.runs:
47 run.font.size = Pt(14)
48
49
50 def _add_table_slide(prs, title: str, headers: List[str], rows: List[List[str]]):
51 """Add a slide with a table."""
52 from pptx.util import Emu, Inches, Pt
53
54 slide = prs.slides.add_slide(prs.slide_layouts[5]) # Blank layout
55 slide.shapes.title.text = title
56
57 num_rows = len(rows) + 1
58 num_cols = len(headers)
59
60 left = Inches(0.5)
61 top = Inches(1.5)
62 width = Inches(9.0)
63 row_height = Emu(int(Inches(0.35)))
64 height = row_height * num_rows
65
66 table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, height)
67 table = table_shape.table
68
69 for i, header in enumerate(headers):
70 cell = table.cell(0, i)
71 cell.text = header
72 for paragraph in cell.text_frame.paragraphs:
73 paragraph.font.size = Pt(11)
74 paragraph.font.bold = True
75
76 for r_idx, row in enumerate(rows):
77 for c_idx, val in enumerate(row):
78 cell = table.cell(r_idx + 1, c_idx)
79 cell.text = str(val)
80 for paragraph in cell.text_frame.paragraphs:
81 paragraph.font.size = Pt(10)
82
83
84 def _add_image_slide(prs, title: str, image_path: Path):
85 """Add a slide with an embedded image."""
86 from pptx.util import Inches
87
88 slide = prs.slides.add_slide(prs.slide_layouts[5]) # Blank
89 slide.shapes.title.text = title
90
91 left = Inches(1.0)
92 top = Inches(1.5)
93 width = Inches(8.0)
94 slide.shapes.add_picture(str(image_path), left, top, width=width)
95
96
97 def generate_pptx(
98 kg_data: dict,
99 output_path: Path,
100 title: Optional[str] = None,
101 diagrams_dir: Optional[Path] = None,
102 ) -> Path:
103 """Generate a PPTX slide deck from knowledge graph data.
104
105 Args:
106 kg_data: Knowledge graph dict with 'nodes' and 'relationships'.
107 output_path: Path to write the PPTX file.
108 title: Optional presentation title.
109 diagrams_dir: Optional directory containing diagram images to embed.
110
111 Returns:
112 Path to the generated PPTX.
113
114 Raises:
115 ImportError: If python-pptx is not installed.
116 """
117 from pptx import Presentation
118
119 output_path = Path(output_path)
120 output_path.parent.mkdir(parents=True, exist_ok=True)
121
122 prs = Presentation()
123 nodes = kg_data.get("nodes", [])
124 rels = kg_data.get("relationships", [])
125
126 report_title = title or "Knowledge Graph"
127 now = datetime.now().strftime("%Y-%m-%d %H:%M")
128
129 # Title slide
130 _add_title_slide(
131 prs,
132 report_title,
133 f"Generated {now}\n{len(nodes)} entities \u2022 {len(rels)} relationships",
134 )
135
136 # Overview slide
137 by_type: Dict[str, list] = {}
138 for n in nodes:
139 t = n.get("type", "concept")
140 by_type.setdefault(t, []).append(n)
141
142 overview_bullets = [f"{len(nodes)} entities across {len(by_type)} types"]
143 for etype, elist in sorted(by_type.items(), key=lambda x: -len(x[1])):
144 examples = ", ".join(e.get("name", "") for e in elist[:3])
145 overview_bullets.append(f"{etype.title()} ({len(elist)}): {examples}")
146 _add_content_slide(prs, "Overview", overview_bullets)
147
148 # Key entities slide
149 degree: Dict[str, int] = {}
150 for r in rels:
151 degree[r.get("source", "")] = degree.get(r.get("source", ""), 0) + 1
152 degree[r.get("target", "")] = degree.get(r.get("target", ""), 0) + 1
153
154 top = sorted(degree.items(), key=lambda x: -x[1])[:10]
155 if top:
156 _add_table_slide(
157 prs,
158 "Key Entities",
159 ["Entity", "Connections"],
160 [[name, str(deg)] for name, deg in top],
161 )
162
163 # Diagram slides
164 if diagrams_dir and diagrams_dir.exists():
165 pngs = sorted(diagrams_dir.glob("*.png"))
166 for i, png in enumerate(pngs):
167 _add_image_slide(prs, f"Diagram {i + 1}", png)
168
169 # Relationship types slide
170 rel_types: Dict[str, int] = {}
171 for r in rels:
172 rt = r.get("type", "related_to")
173 rel_types[rt] = rel_types.get(rt, 0) + 1
174
175 if rel_types:
176 _add_table_slide(
177 prs,
178 "Relationship Types",
179 ["Type", "Count"],
180 [[rt, str(c)] for rt, c in sorted(rel_types.items(), key=lambda x: -x[1])],
181 )
182
183 # Entity detail slides (batched, max 12 per slide)
184 batch_size = 12
185 for batch_start in range(0, len(nodes), batch_size):
186 batch = nodes[batch_start : batch_start + batch_size]
187 bullets = []
188 for node in batch:
189 name = node.get("name", "")
190 etype = node.get("type", "concept")
191 descs = node.get("descriptions", [])
192 desc = descs[0][:80] if descs else ""
193 bullets.append(f"{name} ({etype}): {desc}")
194
195 slide_num = batch_start // batch_size + 1
196 total_pages = (len(nodes) + batch_size - 1) // batch_size
197 page_label = f" ({slide_num}/{total_pages})" if total_pages > 1 else ""
198 _add_content_slide(prs, f"Entities{page_label}", bullets)
199
200 prs.save(str(output_path))
201 logger.info(f"Generated PPTX: {output_path}")
202 return output_path
--- video_processor/models.py
+++ video_processor/models.py
@@ -35,10 +35,11 @@
3535
3636
markdown = "markdown"
3737
json = "json"
3838
html = "html"
3939
pdf = "pdf"
40
+ pptx = "pptx"
4041
svg = "svg"
4142
png = "png"
4243
4344
4445
class TranscriptSegment(BaseModel):
4546
--- video_processor/models.py
+++ video_processor/models.py
@@ -35,10 +35,11 @@
35
36 markdown = "markdown"
37 json = "json"
38 html = "html"
39 pdf = "pdf"
 
40 svg = "svg"
41 png = "png"
42
43
44 class TranscriptSegment(BaseModel):
45
--- video_processor/models.py
+++ video_processor/models.py
@@ -35,10 +35,11 @@
35
36 markdown = "markdown"
37 json = "json"
38 html = "html"
39 pdf = "pdf"
40 pptx = "pptx"
41 svg = "svg"
42 png = "png"
43
44
45 class TranscriptSegment(BaseModel):
46

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button