PlanOpticon

Phase 4: Add structured knowledge output with complete templates Stories 4.1-4.4: Add 6 missing prompt templates (summary_generation, key_points_extraction, entity_extraction, relationship_extraction, diagram_analysis, mermaid_generation). Robust JSON parsing utility. KnowledgeGraph rewritten to use ProviderManager, pydantic Entity and Relationship models, with merge/from_dict support.

leo 2026-02-14 22:18 trunk
Commit 321f2f56aa2fde0d9deb82f6977857e541d668b88d70c234dc70711417d1f517
--- a/tests/test_json_parsing.py
+++ b/tests/test_json_parsing.py
@@ -0,0 +1,20 @@
1
+"""Tests for robust JSON parsiimport pytestsing from LLM responses."""
2
+
3
+from video_processor.utils.json_parsing import parse_json_from_response
4
+
5
+
6
+class TestParseJsonFromResponse:
7
+ def test_direct_dict(self):
8
+ assert parse_json_from_response('{"key": "value"}') == {"key": "value"}
9
+
10
+ def test_direct'[1, 2, 3]'n_from_response("[1, 2, 3]") == [1, 2, 3]
11
+
12
+ def test_markdown_fenced_json(self):
13
+ text = '```json\n{"key": "value"}\n```'
14
+ assert parse_json_from_response(text) == {"key": "value"}
15
+
16
+ d'```\n[1, 2]\n```'
17
+ assert parse_j[1, 2]
18
+
19
+ def test_json_embedded_in_text(self):
20
+ text = 'Here is the resul
--- a/tests/test_json_parsing.py
+++ b/tests/test_json_parsing.py
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_json_parsing.py
+++ b/tests/test_json_parsing.py
@@ -0,0 +1,20 @@
1 """Tests for robust JSON parsiimport pytestsing from LLM responses."""
2
3 from video_processor.utils.json_parsing import parse_json_from_response
4
5
6 class TestParseJsonFromResponse:
7 def test_direct_dict(self):
8 assert parse_json_from_response('{"key": "value"}') == {"key": "value"}
9
10 def test_direct'[1, 2, 3]'n_from_response("[1, 2, 3]") == [1, 2, 3]
11
12 def test_markdown_fenced_json(self):
13 text = '```json\n{"key": "value"}\n```'
14 assert parse_json_from_response(text) == {"key": "value"}
15
16 d'```\n[1, 2]\n```'
17 assert parse_j[1, 2]
18
19 def test_json_embedded_in_text(self):
20 text = 'Here is the resul
--- a/video_processor/integrators/knowledge_graph.py
+++ b/video_processor/integrators/knowledge_graph.py
@@ -0,0 +1,36 @@
1
+"""Knowledge graph integration for organizing extracted content."""
2
+
3
+import json
4
+import logging
5
+from pathlib import Path
6
+from typing import Dict, Lvideo_processor.models import Entity, KnowledgeGraphData, Relationship
7
+from video_processor.ecord
8
+from video_processor.providers.manager import ProviderManager
9
+from video_processor.utils.json_parsing import parse_json_from_response
10
+
11
+logger = logging.getLogger(__name__)
12
+
13
+
14
+class KnowledgeGraph:
15
+ """Integrates extracted content into a structured knowledge graph."""
16
+
17
+ def __init__(
18
+ self,
19
+ pral[GraphStore] = None,
20
+ nodes: Dict[str, dict] = {}
21
+ self.rel List[dict] = []kF@QO,_@1Af,H:eid = entity.name9@1TD,O: if eid in self.nodesN@1jW,P:nodes[eid]["occurrences"]T@10G,1:{L@2SG,K: "source": sourceQ@w0,D:"timestamp": R@1J7,8@ew,7:"text":l@19l,M@w0,1:}H@2SG,1:)H@2SG,2:ifK@1C0,N@38G,w:elf.nodes[eid]["descriptions"].update(entity.descriptions)
22
+C@2Ry,4:elseN@1jW,9:nodes[eidM@Ik,B: "id": eO@1oW,J:"name": entity.nameM@w0,J:"type": entity.typeM@w0,c:"descriptions": set(entity.descri(self, text: str) -> List[Entity integration for organizing ext"""Knowledge graph intety.descritionshipU@10F,1:{L@2SG,O: (people, concepts, technologies, "
23
+N@2jW,G:"occ"organizations, time references)targetQ@w0,G:"type": rel.typeQ@w0,O:"content_source": sourceQ@w0,D:"timestamp": R@ '
24
+ B@1Ne,2:'[17@Nq,1:'D@1rU,1:'Z@Ox,7:]\n\n'
25
+C@d0,R:"Return ONLY the JSON array1x@SG,M@UV,J@2eF,5P@i8,8@1BQ,W:return entities
26
+
27
+ def extractU@Hr,j:, entities: List[Entity]) -> List[RelationshipM@I~,i:relationships between entities using LLM."""
28
+8@2FR,n:entity_names = ", ".join(e.name for e in entities)9@HE,B:prompt = (
29
+C@dl,6:"GivenM@LS,a: and entities, identify relationshipss@Ln,W:f"ENTITIES: {entity_names}\n\n"
30
+9@1mc,S: 'Return a JSON array: '
31
+ B@1Ab,z:'[{"source": "entity A", "target": "entity B", '
32
+ 'Z@Q~,7:]\n\n'
33
+C@d0,R:"Return ONLY the JSON array1b@SG,A:rels = []
34
+8@35i,M@UV,J@2eF,13@i8,4E@cJ,6:return3C@nv,b: = self.extract_entities(text)
35
+ U@r7,S:relationships(text, entitiesLJ@s4,16@1Ci,24@1EC,S@1HT,M@1GR,2:):G@1Ir,H:"text" in segmentJ@17G,X:ource = f"transcript_segment_{i}"H@2iG,U:timestamp = segment.get("startL@1Ij,g@1IC,K@2Y0,s@1J6,l@1Ju,c@1Ka,e@1L8,f@1Li,K@110,k@1Mc,J@2kl,1:[J@1nl,5: }L@2iG,A:if speakerI@17G,J@1eG,N:{speaker}_segment_{i}"
36
+H@2iG,H@1f0,F:segment["text"]2p@1Zw,7:diagramG@1I0,1dO@1cx,EhqQs;
--- a/video_processor/integrators/knowledge_graph.py
+++ b/video_processor/integrators/knowledge_graph.py
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/video_processor/integrators/knowledge_graph.py
+++ b/video_processor/integrators/knowledge_graph.py
@@ -0,0 +1,36 @@
1 """Knowledge graph integration for organizing extracted content."""
2
3 import json
4 import logging
5 from pathlib import Path
6 from typing import Dict, Lvideo_processor.models import Entity, KnowledgeGraphData, Relationship
7 from video_processor.ecord
8 from video_processor.providers.manager import ProviderManager
9 from video_processor.utils.json_parsing import parse_json_from_response
10
11 logger = logging.getLogger(__name__)
12
13
14 class KnowledgeGraph:
15 """Integrates extracted content into a structured knowledge graph."""
16
17 def __init__(
18 self,
19 pral[GraphStore] = None,
20 nodes: Dict[str, dict] = {}
21 self.rel List[dict] = []kF@QO,_@1Af,H:eid = entity.name9@1TD,O: if eid in self.nodesN@1jW,P:nodes[eid]["occurrences"]T@10G,1:{L@2SG,K: "source": sourceQ@w0,D:"timestamp": R@1J7,8@ew,7:"text":l@19l,M@w0,1:}H@2SG,1:)H@2SG,2:ifK@1C0,N@38G,w:elf.nodes[eid]["descriptions"].update(entity.descriptions)
22 C@2Ry,4:elseN@1jW,9:nodes[eidM@Ik,B: "id": eO@1oW,J:"name": entity.nameM@w0,J:"type": entity.typeM@w0,c:"descriptions": set(entity.descri(self, text: str) -> List[Entity integration for organizing ext"""Knowledge graph intety.descritionshipU@10F,1:{L@2SG,O: (people, concepts, technologies, "
23 N@2jW,G:"occ"organizations, time references)targetQ@w0,G:"type": rel.typeQ@w0,O:"content_source": sourceQ@w0,D:"timestamp": R@ '
24 B@1Ne,2:'[17@Nq,1:'D@1rU,1:'Z@Ox,7:]\n\n'
25 C@d0,R:"Return ONLY the JSON array1x@SG,M@UV,J@2eF,5P@i8,8@1BQ,W:return entities
26
27 def extractU@Hr,j:, entities: List[Entity]) -> List[RelationshipM@I~,i:relationships between entities using LLM."""
28 8@2FR,n:entity_names = ", ".join(e.name for e in entities)9@HE,B:prompt = (
29 C@dl,6:"GivenM@LS,a: and entities, identify relationshipss@Ln,W:f"ENTITIES: {entity_names}\n\n"
30 9@1mc,S: 'Return a JSON array: '
31 B@1Ab,z:'[{"source": "entity A", "target": "entity B", '
32 'Z@Q~,7:]\n\n'
33 C@d0,R:"Return ONLY the JSON array1b@SG,A:rels = []
34 8@35i,M@UV,J@2eF,13@i8,4E@cJ,6:return3C@nv,b: = self.extract_entities(text)
35 U@r7,S:relationships(text, entitiesLJ@s4,16@1Ci,24@1EC,S@1HT,M@1GR,2:):G@1Ir,H:"text" in segmentJ@17G,X:ource = f"transcript_segment_{i}"H@2iG,U:timestamp = segment.get("startL@1Ij,g@1IC,K@2Y0,s@1J6,l@1Ju,c@1Ka,e@1L8,f@1Li,K@110,k@1Mc,J@2kl,1:[J@1nl,5: }L@2iG,A:if speakerI@17G,J@1eG,N:{speaker}_segment_{i}"
36 H@2iG,H@1f0,F:segment["text"]2p@1Zw,7:diagramG@1I0,1dO@1cx,EhqQs;
--- a/video_processor/utils/json_parsing.py
+++ b/video_processor/utils/json_parsing.py
@@ -0,0 +1,58 @@
1
+"""Robust JSON extraction from LLM responses."""
2
+
3
+import json
4
+import re
5
+from typing import Optional, Union
6
+
7
+
8
+def parse_json_from_response(text: str) -> Optional[Union[list, dict]]:
9
+ """
10
+ Extract JSON from an LLM response, handling markdown fences,
11
+ explanatory text, and minor formatting issues.
12
+
13
+ Strategies tried in order:
14
+ 1. Direct parse
15
+ 2. Strip markdown fences and parse
16
+ 3. Find [...] or {...} substring and parse
17
+ 4. Return None
18
+ """
19
+ if not text or not text.strip():
20
+ return None
21
+
22
+ cleaned = text.strip()
23
+
24
+ # Strategy 1: direct parse
25
+ try:
26
+ return json.loads(cleaned)
27
+ except json.JSONDecodeError:
28
+ pass
29
+
30
+ # Strategy 2: strip markdown fences
31
+ fence_pattern = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL)
32
+ match = fence_pattern.search(cleaned)
33
+ if match:
34
+ try:
35
+ return json.loads(match.group(1).strip())
36
+ except json.JSONDecodeError:
37
+ pass
38
+
39
+ # Strategy 3: find JSON array or object
40
+ # Try array first (often the outermost structure for lists)
41
+ for opener, closer in [("[", "]"), ("{", "}")]:
42
+ start = cleaned.find(opener)
43
+ if start < 0:
44
+ continue
45
+ # Find matching closer (handle nesting)
46
+ depth = 0
47
+ for i in range(start, len(cleaned)):
48
+ if cleaned[i] == opener:
49
+ depth += 1
50
+ elif cleaned[i] == closer:
51
+ depth -= 1
52
+ if depth == 0:
53
+ try:
54
+ return json.loads(cleaned[start : i + 1])
55
+ except json.JSONDecodeError:
56
+ break
57
+
58
+ return None
--- a/video_processor/utils/json_parsing.py
+++ b/video_processor/utils/json_parsing.py
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/video_processor/utils/json_parsing.py
+++ b/video_processor/utils/json_parsing.py
@@ -0,0 +1,58 @@
1 """Robust JSON extraction from LLM responses."""
2
3 import json
4 import re
5 from typing import Optional, Union
6
7
8 def parse_json_from_response(text: str) -> Optional[Union[list, dict]]:
9 """
10 Extract JSON from an LLM response, handling markdown fences,
11 explanatory text, and minor formatting issues.
12
13 Strategies tried in order:
14 1. Direct parse
15 2. Strip markdown fences and parse
16 3. Find [...] or {...} substring and parse
17 4. Return None
18 """
19 if not text or not text.strip():
20 return None
21
22 cleaned = text.strip()
23
24 # Strategy 1: direct parse
25 try:
26 return json.loads(cleaned)
27 except json.JSONDecodeError:
28 pass
29
30 # Strategy 2: strip markdown fences
31 fence_pattern = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL)
32 match = fence_pattern.search(cleaned)
33 if match:
34 try:
35 return json.loads(match.group(1).strip())
36 except json.JSONDecodeError:
37 pass
38
39 # Strategy 3: find JSON array or object
40 # Try array first (often the outermost structure for lists)
41 for opener, closer in [("[", "]"), ("{", "}")]:
42 start = cleaned.find(opener)
43 if start < 0:
44 continue
45 # Find matching closer (handle nesting)
46 depth = 0
47 for i in range(start, len(cleaned)):
48 if cleaned[i] == opener:
49 depth += 1
50 elif cleaned[i] == closer:
51 depth -= 1
52 if depth == 0:
53 try:
54 return json.loads(cleaned[start : i + 1])
55 except json.JSONDecodeError:
56 break
57
58 return None
--- a/video_processor/utils/prompt_templates.py
+++ b/video_processor/utils/prompt_templates.py
@@ -0,0 +1,330 @@
1
+"""Prompt templates for LLM-
2
+from pathlib import Path
3
+from string import Template
4
+from typing import Dict, Optional, Union
5
+
6
+logger = logging.getLogger(__name__)
7
+
8
+
9
+class PromptTemplate:
10
+ """Template manager for LLM prompts."""
11
+
12
+ def __init__(
13
+ self,
14
+ templates_dir: Optional[Union[str, Path]] = None,
15
+ default_templates: Optional[Dict[str, str]] = None,
16
+ ):
17
+ """
18
+ Initialize prompt template manager.
19
+
20
+ Parameters
21
+ ----------
22
+ templates_dir : str or Path, optional
23
+ Directory containing template files
24
+ default_templates : dict, optional
25
+ Default templates to use
26
+ """
27
+ self.templates_dir = Path(templates_dir) if templates_dir else None
28
+ self.templates = {}
29
+
30
+ # Load default templates
31
+ if default_templates:
32
+ self.templates.update(default_templates)
33
+
34
+ # Load templates from directory if provided
35
+ if self.templates_dir and self.templates_dir.exists():
36
+ self._load_templates_from_dir()
37
+
38
+ def _load_templates_from_dir(self) -> None:
39
+ """Load templates from template directory."""
40
+ if not self.templates_dir:
41
+ return
42
+
43
+ for template_file in self.templates_dir.glob("*.txt"):
44
+ template_name = template_file.stem
45
+ try:
46
+ with open(template_file, "r", encoding="utf-8") as f:
47
+ template_content = f.read()
48
+ self.templates[template_name] = template_content
49
+ logger.debug(f"Loaded template: {template_name}")
50
+ except Exception as e:
51
+ logger.warning(f"Error loading template {template_name}: {str(e)}")
52
+
53
+ def get_template(self, template_name: str) -> Optional[Template]:
54
+ """
55
+ Get template by name.
56
+
57
+ Parameters
58
+ ----------
59
+ template_name : str
60
+ Template name
61
+
62
+ Returns
63
+ -------
64
+ Template or None
65
+ Template object if found, None otherwise
66
+ """
67
+ if template_name not in self.templates:
68
+ logger.warning(f"Template not found: {template_name}")
69
+ return None
70
+
71
+ return Template(self.templates[template_name])
72
+
73
+ def format_prompt(self, template_name: str, **kwargs) -> Optional[str]:
74
+ """
75
+ Format prompt with provided parameters.
76
+
77
+ Parameters
78
+ ----------
79
+ template_name : str
80
+ Template name
81
+ **kwargs : dict
82
+ Template parameters
83
+
84
+ Returns
85
+ -------
86
+ str or None
87
+ Formatted prompt if template exists, None otherwise
88
+ """
89
+ template = self.get_template(template_name)
90
+ if not template:
91
+ return None
92
+
93
+ try:
94
+ return template.safe_substitute(**kwargs)
95
+ except Exception as e:
96
+ logger.error(f"Error formatting template {template_name}: {str(e)}")
97
+ return None
98
+
99
+ def add_template(self, template_name: str, template_content: str) -> None:
100
+ """
101
+ Add or update template.
102
+
103
+ Parameters
104
+ ----------
105
+ template_name : str
106
+ Template name
107
+ template_content : str
108
+ Template content
109
+ """
110
+ self.templates[template_name] = template_content
111
+
112
+ def save_template(self, template_name: str) -> bool:
113
+ """
114
+ Save template to file.
115
+
116
+ Parameters
117
+ ----------
118
+ template_name : str
119
+ Template name
120
+
121
+ Returns
122
+ -------
123
+ bool
124
+ True if successful, False otherwise
125
+ """
126
+ if not self.templates_dir:
127
+ logger.error("Templates directory not set")
128
+ return False
129
+
130
+ if template_name not in self.templates:
131
+ logger.warning(f"Template not found: {template_name}")
132
+ return False
133
+
134
+ try:
135
+ self.templates_dir.mkdir(parents=True, exist_ok=True)
136
+ template_path = self.templates_dir / f"{template_name}.txt"
137
+
138
+ with open(template_path, "w", encoding="utf-8") as f:
139
+ f.write(self.templates[template_name])
140
+
141
+ logger.debug(f"Saved template: {template_name}")
142
+ return True
143
+ except Exception as e:
144
+ logger.error(f"Error saving template {template_name}: {str(e)}")
145
+ return False
146
+
147
+
148
+# Default prompt templates
149
+DEFAULT_TEMPLATES = {
150
+ "content_analysis": """
151
+ Analyze the provided video content and extract key information:
152
+
153
+ TRANSCRIPT:
154
+ $transcript
155
+
156
+ VISUAL ELEMENTS (if available):
157
+ $visual_elements
158
+
159
+ Please extract and organize the following:
160
+ - Main topics and themes
161
+ - Key points for each topic
162
+ - Important details or facts
163
+ - Action items or follow-ups
164
+ - Relationships between concepts
165
+
166
+ Format the output as structured markdown.
167
+ """,
168
+ "diagram_extraction": """
169
+ Analyze the following image that contains a diagram, whiteboard content,
170
+ or other visual information.
171
+
172
+ Extract and convert this visual information into a structured representation.
173
+
174
+ If it's a flowchart, process diagram, or similar structured visual:
175
+ - Identify the components and their relationships
176
+ - Preserve the logical flow and structure
177
+ - Convert it to mermaid diagram syntax
178
+
179
+ If it's a whiteboard with text, bullet points, or unstructured content:
180
+ - Extract all text elements
181
+ - Preserve hierarchical organization if present
182
+ - Maintain any emphasized or highlighted elements
183
+
184
+ Image context: $image_context
185
+
186
+ Return the results as markdown with appropriate structure.
187
+ """,
188
+ "action_item_detection": """
189
+ Review the following transcript and identify all action items, commitments, or follow-up tasks.
190
+
191
+ TRANSCRIPT:
192
+ $transcript
193
+
194
+ For each action item, extract:
195
+ - The specific action to be taken
196
+ - Who is responsible (if mentioned)
197
+ - Any deadlines or timeframes
198
+ - Priority level (if indicated)
199
+ - Context or additional details
200
+
201
+ Format the results as a structured list of action items.
202
+ """,
203
+ "content_summary": """
204
+ Provide a concise summary of the following content:
205
+
206
+ $content
207
+
208
+ The summary should:
209
+ - Capture the main points and key takeaways
210
+ - Be approximately 3-5 paragraphs
211
+ - Focus on the most important information
212
+ - Maintain a neutral, objective tone
213
+
214
+ Format the summary as clear, readable text.
215
+ """,
216
+ "summary_generation": """
217
+ Generate a comprehensive summary of the following transcript content.
218
+
219
+ CONTENT:
220
+ $content
221
+
222
+ Provide a well-structured summary that:
223
+ - Captures the main topics discussed
224
+ - Highlights key decisions or conclusions
225
+ - Notes any important context or background
226
+ - Is 3-5 paragraphs long
227
+
228
+ Write in clear, professional prose.
229
+ """,
230
+ "key_points_extraction": """
231
+ Extract the key points from the following content.
232
+
233
+ CONTENT:
234
+ $content
235
+
236
+ Return a JSON array of key point objects. Each object should have:
237
+ - "point": the key point (1-2 sentences)
238
+ - "topic": category or topic area (optional)
239
+ - "details": supporting details (optional)
240
+
241
+ Example format:
242
+ [
243
+ {"point": "The system uses microservices architecture",
244
+ "topic": "Architecture", "details": "Each service handles a specific domain"},
245
+ ]
246
+
247
+ Return ONLY the JSON array, no additional text.
248
+ """,
249
+ "entity_extraction": """
250
+ Extract all notable entities (people, concepts, technologies, organizations,
251
+ time references) from the following content.
252
+ CONTENT:
253
+ $content
254
+
255
+ Return a JSON array of entity objects:
256
+ [
257
+ {"name": "entity name",
258
+ "type": "person|concept|technology|organization|time",
259
+ "description": "brief description"}
260
+
261
+ Return ONLY the JSON array, no additional text.
262
+ """,
263
+ "relationship_extraction": """
264
+ Given the following content and entities, identify relationships between them.
265
+
266
+ CONTENT:
267
+ $content
268
+
269
+ KNOWN ENTITIES:
270
+ $entities
271
+
272
+ Return a JSON array of relationship objects:
273
+ [
274
+ {"source": "entity A", "target": "entity B",
275
+ "type": "relationship type (e.g., uses, manages, depends_on, created_by, part_of)"}
276
+
277
+ Return ONLY the JSON array, no additional text.
278
+ """,
279
+ "diagram_analysis": """
280
+ Analyze the following text extracted from a diagram or visual element.
281
+
282
+ DIAGRAM TEXT:
283
+ $diagram_text
284
+
285
+ Identify:
286
+ 1. The type of diagram (flowchart, architecture, sequence, etc.)
287
+ 2. The main components and their roles
288
+ 3. The relationships between components
289
+ 4. Any data flows or process steps
290
+
291
+ Return a JSON object:
292
+ {
293
+ "diagram_type": "type",
294
+ "components": ["list of components"],
295
+ "relationships": ["component A -> component B: description"],
296
+ "summary": "brief description of what the diagram shows"
297
+ }
298
+
299
+ Return ONLY the JSON object, no additional text.
300
+ """,
301
+ "mermaid_generation": """
302
+ Convert the following diagram information into valid Mermaid diagram syntax.
303
+
304
+ Diagram Type: $diagram_type
305
+ Text Content: $text_content
306
+ Analysis: $semantic_analysis
307
+
308
+ Generate a Mermaid diagram that accurately represents the visual structure.
309
+ Use the appropriate Mermaid diagram type (graph, sequenceDiagram, classDiagram, etc.).
310
+
311
+ Return ONLY the Mermaid code, no markdown fences or explanations.
312
+ """,
313
+}
314
+
315
+# Create default prompt template manager
316
+default_prompt_manager = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
317
+Any, Dict, List, Optional, Union
318
+
319
+logger = loclass PromptTemplate:
320
+ """Template manager for LLM prompts."""
321
+
322
+ def __init__(
323
+ self, def __init__(
324
+ self,
325
+ templates_dir: Optional[Union[str, Path]] = None,
326
+
327
+ ):me] = template_contenameters
328
+er.warning(f"Error loading ts_dir : str or Path, optional
329
+ Directory containing template files
330
+ defaul
--- a/video_processor/utils/prompt_templates.py
+++ b/video_processor/utils/prompt_templates.py
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/video_processor/utils/prompt_templates.py
+++ b/video_processor/utils/prompt_templates.py
@@ -0,0 +1,330 @@
1 """Prompt templates for LLM-
2 from pathlib import Path
3 from string import Template
4 from typing import Dict, Optional, Union
5
6 logger = logging.getLogger(__name__)
7
8
9 class PromptTemplate:
10 """Template manager for LLM prompts."""
11
12 def __init__(
13 self,
14 templates_dir: Optional[Union[str, Path]] = None,
15 default_templates: Optional[Dict[str, str]] = None,
16 ):
17 """
18 Initialize prompt template manager.
19
20 Parameters
21 ----------
22 templates_dir : str or Path, optional
23 Directory containing template files
24 default_templates : dict, optional
25 Default templates to use
26 """
27 self.templates_dir = Path(templates_dir) if templates_dir else None
28 self.templates = {}
29
30 # Load default templates
31 if default_templates:
32 self.templates.update(default_templates)
33
34 # Load templates from directory if provided
35 if self.templates_dir and self.templates_dir.exists():
36 self._load_templates_from_dir()
37
38 def _load_templates_from_dir(self) -> None:
39 """Load templates from template directory."""
40 if not self.templates_dir:
41 return
42
43 for template_file in self.templates_dir.glob("*.txt"):
44 template_name = template_file.stem
45 try:
46 with open(template_file, "r", encoding="utf-8") as f:
47 template_content = f.read()
48 self.templates[template_name] = template_content
49 logger.debug(f"Loaded template: {template_name}")
50 except Exception as e:
51 logger.warning(f"Error loading template {template_name}: {str(e)}")
52
53 def get_template(self, template_name: str) -> Optional[Template]:
54 """
55 Get template by name.
56
57 Parameters
58 ----------
59 template_name : str
60 Template name
61
62 Returns
63 -------
64 Template or None
65 Template object if found, None otherwise
66 """
67 if template_name not in self.templates:
68 logger.warning(f"Template not found: {template_name}")
69 return None
70
71 return Template(self.templates[template_name])
72
73 def format_prompt(self, template_name: str, **kwargs) -> Optional[str]:
74 """
75 Format prompt with provided parameters.
76
77 Parameters
78 ----------
79 template_name : str
80 Template name
81 **kwargs : dict
82 Template parameters
83
84 Returns
85 -------
86 str or None
87 Formatted prompt if template exists, None otherwise
88 """
89 template = self.get_template(template_name)
90 if not template:
91 return None
92
93 try:
94 return template.safe_substitute(**kwargs)
95 except Exception as e:
96 logger.error(f"Error formatting template {template_name}: {str(e)}")
97 return None
98
99 def add_template(self, template_name: str, template_content: str) -> None:
100 """
101 Add or update template.
102
103 Parameters
104 ----------
105 template_name : str
106 Template name
107 template_content : str
108 Template content
109 """
110 self.templates[template_name] = template_content
111
112 def save_template(self, template_name: str) -> bool:
113 """
114 Save template to file.
115
116 Parameters
117 ----------
118 template_name : str
119 Template name
120
121 Returns
122 -------
123 bool
124 True if successful, False otherwise
125 """
126 if not self.templates_dir:
127 logger.error("Templates directory not set")
128 return False
129
130 if template_name not in self.templates:
131 logger.warning(f"Template not found: {template_name}")
132 return False
133
134 try:
135 self.templates_dir.mkdir(parents=True, exist_ok=True)
136 template_path = self.templates_dir / f"{template_name}.txt"
137
138 with open(template_path, "w", encoding="utf-8") as f:
139 f.write(self.templates[template_name])
140
141 logger.debug(f"Saved template: {template_name}")
142 return True
143 except Exception as e:
144 logger.error(f"Error saving template {template_name}: {str(e)}")
145 return False
146
147
148 # Default prompt templates
149 DEFAULT_TEMPLATES = {
150 "content_analysis": """
151 Analyze the provided video content and extract key information:
152
153 TRANSCRIPT:
154 $transcript
155
156 VISUAL ELEMENTS (if available):
157 $visual_elements
158
159 Please extract and organize the following:
160 - Main topics and themes
161 - Key points for each topic
162 - Important details or facts
163 - Action items or follow-ups
164 - Relationships between concepts
165
166 Format the output as structured markdown.
167 """,
168 "diagram_extraction": """
169 Analyze the following image that contains a diagram, whiteboard content,
170 or other visual information.
171
172 Extract and convert this visual information into a structured representation.
173
174 If it's a flowchart, process diagram, or similar structured visual:
175 - Identify the components and their relationships
176 - Preserve the logical flow and structure
177 - Convert it to mermaid diagram syntax
178
179 If it's a whiteboard with text, bullet points, or unstructured content:
180 - Extract all text elements
181 - Preserve hierarchical organization if present
182 - Maintain any emphasized or highlighted elements
183
184 Image context: $image_context
185
186 Return the results as markdown with appropriate structure.
187 """,
188 "action_item_detection": """
189 Review the following transcript and identify all action items, commitments, or follow-up tasks.
190
191 TRANSCRIPT:
192 $transcript
193
194 For each action item, extract:
195 - The specific action to be taken
196 - Who is responsible (if mentioned)
197 - Any deadlines or timeframes
198 - Priority level (if indicated)
199 - Context or additional details
200
201 Format the results as a structured list of action items.
202 """,
203 "content_summary": """
204 Provide a concise summary of the following content:
205
206 $content
207
208 The summary should:
209 - Capture the main points and key takeaways
210 - Be approximately 3-5 paragraphs
211 - Focus on the most important information
212 - Maintain a neutral, objective tone
213
214 Format the summary as clear, readable text.
215 """,
216 "summary_generation": """
217 Generate a comprehensive summary of the following transcript content.
218
219 CONTENT:
220 $content
221
222 Provide a well-structured summary that:
223 - Captures the main topics discussed
224 - Highlights key decisions or conclusions
225 - Notes any important context or background
226 - Is 3-5 paragraphs long
227
228 Write in clear, professional prose.
229 """,
230 "key_points_extraction": """
231 Extract the key points from the following content.
232
233 CONTENT:
234 $content
235
236 Return a JSON array of key point objects. Each object should have:
237 - "point": the key point (1-2 sentences)
238 - "topic": category or topic area (optional)
239 - "details": supporting details (optional)
240
241 Example format:
242 [
243 {"point": "The system uses microservices architecture",
244 "topic": "Architecture", "details": "Each service handles a specific domain"},
245 ]
246
247 Return ONLY the JSON array, no additional text.
248 """,
249 "entity_extraction": """
250 Extract all notable entities (people, concepts, technologies, organizations,
251 time references) from the following content.
252 CONTENT:
253 $content
254
255 Return a JSON array of entity objects:
256 [
257 {"name": "entity name",
258 "type": "person|concept|technology|organization|time",
259 "description": "brief description"}
260
261 Return ONLY the JSON array, no additional text.
262 """,
263 "relationship_extraction": """
264 Given the following content and entities, identify relationships between them.
265
266 CONTENT:
267 $content
268
269 KNOWN ENTITIES:
270 $entities
271
272 Return a JSON array of relationship objects:
273 [
274 {"source": "entity A", "target": "entity B",
275 "type": "relationship type (e.g., uses, manages, depends_on, created_by, part_of)"}
276
277 Return ONLY the JSON array, no additional text.
278 """,
279 "diagram_analysis": """
280 Analyze the following text extracted from a diagram or visual element.
281
282 DIAGRAM TEXT:
283 $diagram_text
284
285 Identify:
286 1. The type of diagram (flowchart, architecture, sequence, etc.)
287 2. The main components and their roles
288 3. The relationships between components
289 4. Any data flows or process steps
290
291 Return a JSON object:
292 {
293 "diagram_type": "type",
294 "components": ["list of components"],
295 "relationships": ["component A -> component B: description"],
296 "summary": "brief description of what the diagram shows"
297 }
298
299 Return ONLY the JSON object, no additional text.
300 """,
301 "mermaid_generation": """
302 Convert the following diagram information into valid Mermaid diagram syntax.
303
304 Diagram Type: $diagram_type
305 Text Content: $text_content
306 Analysis: $semantic_analysis
307
308 Generate a Mermaid diagram that accurately represents the visual structure.
309 Use the appropriate Mermaid diagram type (graph, sequenceDiagram, classDiagram, etc.).
310
311 Return ONLY the Mermaid code, no markdown fences or explanations.
312 """,
313 }
314
315 # Create default prompt template manager
316 default_prompt_manager = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
317 Any, Dict, List, Optional, Union
318
319 logger = loclass PromptTemplate:
320 """Template manager for LLM prompts."""
321
322 def __init__(
323 self, def __init__(
324 self,
325 templates_dir: Optional[Union[str, Path]] = None,
326
327 ):me] = template_contenameters
328 er.warning(f"Error loading ts_dir : str or Path, optional
329 Directory containing template files
330 defaul

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button