PlanOpticon

Phase 4: Add structured knowledge output with complete templates Stories 4.1-4.4: Add 6 missing prompt templates (summary_generation, key_points_extraction, entity_extraction, relationship_extraction, diagram_analysis, mermaid_generation). Robust JSON parsing utility. KnowledgeGraph rewritten to use ProviderManager, pydantic Entity and Relationship models, with merge/from_dict support.

leo 2026-02-14 22:18 trunk

Commit 321f2f56aa2fde0d9deb82f6977857e541d668b88d70c234dc70711417d1f517

Parent ccf1b1a5a37f52d…

4 files changed +20 +36 +58 +330

+ tests/test_json_parsing.py + video_processor/integrators/knowledge_graph.py + video_processor/utils/json_parsing.py + video_processor/utils/prompt_templates.py

A tests/test_json_parsing.py

+20

		--- a/tests/test_json_parsing.py
		+++ b/tests/test_json_parsing.py
		@@ -0,0 +1,20 @@
	1	+"""Tests for robust JSON parsiimport pytestsing from LLM responses."""
	2	+
	3	+from video_processor.utils.json_parsing import parse_json_from_response
	4	+
	5	+
	6	+class TestParseJsonFromResponse:
	7	+ def test_direct_dict(self):
	8	+ assert parse_json_from_response('{"key": "value"}') == {"key": "value"}
	9	+
	10	+ def test_direct'[1, 2, 3]'n_from_response("[1, 2, 3]") == [1, 2, 3]
	11	+
	12	+ def test_markdown_fenced_json(self):
	13	+ text = '```json\n{"key": "value"}\n```'
	14	+ assert parse_json_from_response(text) == {"key": "value"}
	15	+
	16	+ d'```\n[1, 2]\n```'
	17	+ assert parse_j[1, 2]
	18	+
	19	+ def test_json_embedded_in_text(self):
	20	+ text = 'Here is the resul

	--- a/tests/test_json_parsing.py
	+++ b/tests/test_json_parsing.py
	@@ -0,0 +1,20 @@

	--- a/tests/test_json_parsing.py
	+++ b/tests/test_json_parsing.py
	@@ -0,0 +1,20 @@
1	"""Tests for robust JSON parsiimport pytestsing from LLM responses."""
2
3	from video_processor.utils.json_parsing import parse_json_from_response
4
5
6	class TestParseJsonFromResponse:
7	def test_direct_dict(self):
8	assert parse_json_from_response('{"key": "value"}') == {"key": "value"}
9
10	def test_direct'[1, 2, 3]'n_from_response("[1, 2, 3]") == [1, 2, 3]
11
12	def test_markdown_fenced_json(self):
13	text = '```json\n{"key": "value"}\n```'
14	assert parse_json_from_response(text) == {"key": "value"}
15
16	d'```\n[1, 2]\n```'
17	assert parse_j[1, 2]
18
19	def test_json_embedded_in_text(self):
20	text = 'Here is the resul

A video_processor/integrators/knowledge_graph.py

+36

		--- a/video_processor/integrators/knowledge_graph.py
		+++ b/video_processor/integrators/knowledge_graph.py
		@@ -0,0 +1,36 @@
	1	+"""Knowledge graph integration for organizing extracted content."""
	2	+
	3	+import json
	4	+import logging
	5	+from pathlib import Path
	6	+from typing import Dict, Lvideo_processor.models import Entity, KnowledgeGraphData, Relationship
	7	+from video_processor.ecord
	8	+from video_processor.providers.manager import ProviderManager
	9	+from video_processor.utils.json_parsing import parse_json_from_response
	10	+
	11	+logger = logging.getLogger(__name__)
	12	+
	13	+
	14	+class KnowledgeGraph:
	15	+ """Integrates extracted content into a structured knowledge graph."""
	16	+
	17	+ def __init__(
	18	+ self,
	19	+ pral[GraphStore] = None,
	20	+ nodes: Dict[str, dict] = {}
	21	+ self.rel List[dict] = []kF@QO,_@1Af,H:eid = entity.name9@1TD,O: if eid in self.nodesN@1jW,P:nodes[eid]["occurrences"]T@10G,1:{L@2SG,K: "source": sourceQ@w0,D:"timestamp": R@1J7,8@ew,7:"text":l@19l,M@w0,1:}H@2SG,1:)H@2SG,2:ifK@1C0,N@38G,w:elf.nodes[eid]["descriptions"].update(entity.descriptions)
	22	+C@2Ry,4:elseN@1jW,9:nodes[eidM@Ik,B: "id": eO@1oW,J:"name": entity.nameM@w0,J:"type": entity.typeM@w0,c:"descriptions": set(entity.descri(self, text: str) -> List[Entity integration for organizing ext"""Knowledge graph intety.descritionshipU@10F,1:{L@2SG,O: (people, concepts, technologies, "
	23	+N@2jW,G:"occ"organizations, time references)targetQ@w0,G:"type": rel.typeQ@w0,O:"content_source": sourceQ@w0,D:"timestamp": R@ '
	24	+ B@1Ne,2:'[17@Nq,1:'D@1rU,1:'Z@Ox,7:]\n\n'
	25	+C@d0,R:"Return ONLY the JSON array1x@SG,M@UV,J@2eF,5P@i8,8@1BQ,W:return entities
	26	+
	27	+ def extractU@Hr,j:, entities: List[Entity]) -> List[RelationshipM@I~,i:relationships between entities using LLM."""
	28	+8@2FR,n:entity_names = ", ".join(e.name for e in entities)9@HE,B:prompt = (
	29	+C@dl,6:"GivenM@LS,a: and entities, identify relationshipss@Ln,W:f"ENTITIES: {entity_names}\n\n"
	30	+9@1mc,S: 'Return a JSON array: '
	31	+ B@1Ab,z:'[{"source": "entity A", "target": "entity B", '
	32	+ 'Z@Q~,7:]\n\n'
	33	+C@d0,R:"Return ONLY the JSON array1b@SG,A:rels = []
	34	+8@35i,M@UV,J@2eF,13@i8,4E@cJ,6:return3C@nv,b: = self.extract_entities(text)
	35	+ U@r7,S:relationships(text, entitiesLJ@s4,16@1Ci,24@1EC,S@1HT,M@1GR,2:):G@1Ir,H:"text" in segmentJ@17G,X:ource = f"transcript_segment_{i}"H@2iG,U:timestamp = segment.get("startL@1Ij,g@1IC,K@2Y0,s@1J6,l@1Ju,c@1Ka,e@1L8,f@1Li,K@110,k@1Mc,J@2kl,1:[J@1nl,5: }L@2iG,A:if speakerI@17G,J@1eG,N:{speaker}_segment_{i}"
	36	+H@2iG,H@1f0,F:segment["text"]2p@1Zw,7:diagramG@1I0,1dO@1cx,EhqQs;

	--- a/video_processor/integrators/knowledge_graph.py
	+++ b/video_processor/integrators/knowledge_graph.py
	@@ -0,0 +1,36 @@

	--- a/video_processor/integrators/knowledge_graph.py
	+++ b/video_processor/integrators/knowledge_graph.py
	@@ -0,0 +1,36 @@
1	"""Knowledge graph integration for organizing extracted content."""
2
3	import json
4	import logging
5	from pathlib import Path
6	from typing import Dict, Lvideo_processor.models import Entity, KnowledgeGraphData, Relationship
7	from video_processor.ecord
8	from video_processor.providers.manager import ProviderManager
9	from video_processor.utils.json_parsing import parse_json_from_response
10
11	logger = logging.getLogger(__name__)
12
13
14	class KnowledgeGraph:
15	"""Integrates extracted content into a structured knowledge graph."""
16
17	def __init__(
18	self,
19	pral[GraphStore] = None,
20	nodes: Dict[str, dict] = {}
21	self.rel List[dict] = []kF@QO,_@1Af,H:eid = entity.name9@1TD,O: if eid in self.nodesN@1jW,P:nodes[eid]["occurrences"]T@10G,1:{L@2SG,K: "source": sourceQ@w0,D:"timestamp": R@1J7,8@ew,7:"text":l@19l,M@w0,1:}H@2SG,1:)H@2SG,2:ifK@1C0,N@38G,w:elf.nodes[eid]["descriptions"].update(entity.descriptions)
22	C@2Ry,4:elseN@1jW,9:nodes[eidM@Ik,B: "id": eO@1oW,J:"name": entity.nameM@w0,J:"type": entity.typeM@w0,c:"descriptions": set(entity.descri(self, text: str) -> List[Entity integration for organizing ext"""Knowledge graph intety.descritionshipU@10F,1:{L@2SG,O: (people, concepts, technologies, "
23	N@2jW,G:"occ"organizations, time references)targetQ@w0,G:"type": rel.typeQ@w0,O:"content_source": sourceQ@w0,D:"timestamp": R@ '
24	B@1Ne,2:'[17@Nq,1:'D@1rU,1:'Z@Ox,7:]\n\n'
25	C@d0,R:"Return ONLY the JSON array1x@SG,M@UV,J@2eF,5P@i8,8@1BQ,W:return entities
26
27	def extractU@Hr,j:, entities: List[Entity]) -> List[RelationshipM@I~,i:relationships between entities using LLM."""
28	8@2FR,n:entity_names = ", ".join(e.name for e in entities)9@HE,B:prompt = (
29	C@dl,6:"GivenM@LS,a: and entities, identify relationshipss@Ln,W:f"ENTITIES: {entity_names}\n\n"
30	9@1mc,S: 'Return a JSON array: '
31	B@1Ab,z:'[{"source": "entity A", "target": "entity B", '
32	'Z@Q~,7:]\n\n'
33	C@d0,R:"Return ONLY the JSON array1b@SG,A:rels = []
34	8@35i,M@UV,J@2eF,13@i8,4E@cJ,6:return3C@nv,b: = self.extract_entities(text)
35	U@r7,S:relationships(text, entitiesLJ@s4,16@1Ci,24@1EC,S@1HT,M@1GR,2:):G@1Ir,H:"text" in segmentJ@17G,X:ource = f"transcript_segment_{i}"H@2iG,U:timestamp = segment.get("startL@1Ij,g@1IC,K@2Y0,s@1J6,l@1Ju,c@1Ka,e@1L8,f@1Li,K@110,k@1Mc,J@2kl,1:[J@1nl,5: }L@2iG,A:if speakerI@17G,J@1eG,N:{speaker}_segment_{i}"
36	H@2iG,H@1f0,F:segment["text"]2p@1Zw,7:diagramG@1I0,1dO@1cx,EhqQs;

A video_processor/utils/json_parsing.py

+58

		--- a/video_processor/utils/json_parsing.py
		+++ b/video_processor/utils/json_parsing.py
		@@ -0,0 +1,58 @@
	1	+"""Robust JSON extraction from LLM responses."""
	2	+
	3	+import json
	4	+import re
	5	+from typing import Optional, Union
	6	+
	7	+
	8	+def parse_json_from_response(text: str) -> Optional[Union[list, dict]]:
	9	+ """
	10	+ Extract JSON from an LLM response, handling markdown fences,
	11	+ explanatory text, and minor formatting issues.
	12	+
	13	+ Strategies tried in order:
	14	+ 1. Direct parse
	15	+ 2. Strip markdown fences and parse
	16	+ 3. Find [...] or {...} substring and parse
	17	+ 4. Return None
	18	+ """
	19	+ if not text or not text.strip():
	20	+ return None
	21	+
	22	+ cleaned = text.strip()
	23	+
	24	+ # Strategy 1: direct parse
	25	+ try:
	26	+ return json.loads(cleaned)
	27	+ except json.JSONDecodeError:
	28	+ pass
	29	+
	30	+ # Strategy 2: strip markdown fences
	31	+ fence_pattern = re.compile(r"```(?:json)?\s\n?(.?)\n?\s*```", re.DOTALL)
	32	+ match = fence_pattern.search(cleaned)
	33	+ if match:
	34	+ try:
	35	+ return json.loads(match.group(1).strip())
	36	+ except json.JSONDecodeError:
	37	+ pass
	38	+
	39	+ # Strategy 3: find JSON array or object
	40	+ # Try array first (often the outermost structure for lists)
	41	+ for opener, closer in [("[", "]"), ("{", "}")]:
	42	+ start = cleaned.find(opener)
	43	+ if start < 0:
	44	+ continue
	45	+ # Find matching closer (handle nesting)
	46	+ depth = 0
	47	+ for i in range(start, len(cleaned)):
	48	+ if cleaned[i] == opener:
	49	+ depth += 1
	50	+ elif cleaned[i] == closer:
	51	+ depth -= 1
	52	+ if depth == 0:
	53	+ try:
	54	+ return json.loads(cleaned[start : i + 1])
	55	+ except json.JSONDecodeError:
	56	+ break
	57	+
	58	+ return None

	--- a/video_processor/utils/json_parsing.py
	+++ b/video_processor/utils/json_parsing.py
	@@ -0,0 +1,58 @@

	--- a/video_processor/utils/json_parsing.py
	+++ b/video_processor/utils/json_parsing.py
	@@ -0,0 +1,58 @@
1	"""Robust JSON extraction from LLM responses."""
2
3	import json
4	import re
5	from typing import Optional, Union
6
7
8	def parse_json_from_response(text: str) -> Optional[Union[list, dict]]:
9	"""
10	Extract JSON from an LLM response, handling markdown fences,
11	explanatory text, and minor formatting issues.
12
13	Strategies tried in order:
14	1. Direct parse
15	2. Strip markdown fences and parse
16	3. Find [...] or {...} substring and parse
17	4. Return None
18	"""
19	if not text or not text.strip():
20	return None
21
22	cleaned = text.strip()
23
24	# Strategy 1: direct parse
25	try:
26	return json.loads(cleaned)
27	except json.JSONDecodeError:
28	pass
29
30	# Strategy 2: strip markdown fences
31	fence_pattern = re.compile(r"```(?:json)?\s\n?(.?)\n?\s*```", re.DOTALL)
32	match = fence_pattern.search(cleaned)
33	if match:
34	try:
35	return json.loads(match.group(1).strip())
36	except json.JSONDecodeError:
37	pass
38
39	# Strategy 3: find JSON array or object
40	# Try array first (often the outermost structure for lists)
41	for opener, closer in [("[", "]"), ("{", "}")]:
42	start = cleaned.find(opener)
43	if start < 0:
44	continue
45	# Find matching closer (handle nesting)
46	depth = 0
47	for i in range(start, len(cleaned)):
48	if cleaned[i] == opener:
49	depth += 1
50	elif cleaned[i] == closer:
51	depth -= 1
52	if depth == 0:
53	try:
54	return json.loads(cleaned[start : i + 1])
55	except json.JSONDecodeError:
56	break
57
58	return None

A video_processor/utils/prompt_templates.py

+330

		--- a/video_processor/utils/prompt_templates.py
		+++ b/video_processor/utils/prompt_templates.py
		@@ -0,0 +1,330 @@
	1	+"""Prompt templates for LLM-
	2	+from pathlib import Path
	3	+from string import Template
	4	+from typing import Dict, Optional, Union
	5	+
	6	+logger = logging.getLogger(__name__)
	7	+
	8	+
	9	+class PromptTemplate:
	10	+ """Template manager for LLM prompts."""
	11	+
	12	+ def __init__(
	13	+ self,
	14	+ templates_dir: Optional[Union[str, Path]] = None,
	15	+ default_templates: Optional[Dict[str, str]] = None,
	16	+ ):
	17	+ """
	18	+ Initialize prompt template manager.
	19	+
	20	+ Parameters
	21	+ ----------
	22	+ templates_dir : str or Path, optional
	23	+ Directory containing template files
	24	+ default_templates : dict, optional
	25	+ Default templates to use
	26	+ """
	27	+ self.templates_dir = Path(templates_dir) if templates_dir else None
	28	+ self.templates = {}
	29	+
	30	+ # Load default templates
	31	+ if default_templates:
	32	+ self.templates.update(default_templates)
	33	+
	34	+ # Load templates from directory if provided
	35	+ if self.templates_dir and self.templates_dir.exists():
	36	+ self._load_templates_from_dir()
	37	+
	38	+ def _load_templates_from_dir(self) -> None:
	39	+ """Load templates from template directory."""
	40	+ if not self.templates_dir:
	41	+ return
	42	+
	43	+ for template_file in self.templates_dir.glob("*.txt"):
	44	+ template_name = template_file.stem
	45	+ try:
	46	+ with open(template_file, "r", encoding="utf-8") as f:
	47	+ template_content = f.read()
	48	+ self.templates[template_name] = template_content
	49	+ logger.debug(f"Loaded template: {template_name}")
	50	+ except Exception as e:
	51	+ logger.warning(f"Error loading template {template_name}: {str(e)}")
	52	+
	53	+ def get_template(self, template_name: str) -> Optional[Template]:
	54	+ """
	55	+ Get template by name.
	56	+
	57	+ Parameters
	58	+ ----------
	59	+ template_name : str
	60	+ Template name
	61	+
	62	+ Returns
	63	+ -------
	64	+ Template or None
	65	+ Template object if found, None otherwise
	66	+ """
	67	+ if template_name not in self.templates:
	68	+ logger.warning(f"Template not found: {template_name}")
	69	+ return None
	70	+
	71	+ return Template(self.templates[template_name])
	72	+
	73	+ def format_prompt(self, template_name: str, **kwargs) -> Optional[str]:
	74	+ """
	75	+ Format prompt with provided parameters.
	76	+
	77	+ Parameters
	78	+ ----------
	79	+ template_name : str
	80	+ Template name
	81	+ **kwargs : dict
	82	+ Template parameters
	83	+
	84	+ Returns
	85	+ -------
	86	+ str or None
	87	+ Formatted prompt if template exists, None otherwise
	88	+ """
	89	+ template = self.get_template(template_name)
	90	+ if not template:
	91	+ return None
	92	+
	93	+ try:
	94	+ return template.safe_substitute(**kwargs)
	95	+ except Exception as e:
	96	+ logger.error(f"Error formatting template {template_name}: {str(e)}")
	97	+ return None
	98	+
	99	+ def add_template(self, template_name: str, template_content: str) -> None:
	100	+ """
	101	+ Add or update template.
	102	+
	103	+ Parameters
	104	+ ----------
	105	+ template_name : str
	106	+ Template name
	107	+ template_content : str
	108	+ Template content
	109	+ """
	110	+ self.templates[template_name] = template_content
	111	+
	112	+ def save_template(self, template_name: str) -> bool:
	113	+ """
	114	+ Save template to file.
	115	+
	116	+ Parameters
	117	+ ----------
	118	+ template_name : str
	119	+ Template name
	120	+
	121	+ Returns
	122	+ -------
	123	+ bool
	124	+ True if successful, False otherwise
	125	+ """
	126	+ if not self.templates_dir:
	127	+ logger.error("Templates directory not set")
	128	+ return False
	129	+
	130	+ if template_name not in self.templates:
	131	+ logger.warning(f"Template not found: {template_name}")
	132	+ return False
	133	+
	134	+ try:
	135	+ self.templates_dir.mkdir(parents=True, exist_ok=True)
	136	+ template_path = self.templates_dir / f"{template_name}.txt"
	137	+
	138	+ with open(template_path, "w", encoding="utf-8") as f:
	139	+ f.write(self.templates[template_name])
	140	+
	141	+ logger.debug(f"Saved template: {template_name}")
	142	+ return True
	143	+ except Exception as e:
	144	+ logger.error(f"Error saving template {template_name}: {str(e)}")
	145	+ return False
	146	+
	147	+
	148	+# Default prompt templates
	149	+DEFAULT_TEMPLATES = {
	150	+ "content_analysis": """
	151	+ Analyze the provided video content and extract key information:
	152	+
	153	+ TRANSCRIPT:
	154	+ $transcript
	155	+
	156	+ VISUAL ELEMENTS (if available):
	157	+ $visual_elements
	158	+
	159	+ Please extract and organize the following:
	160	+ - Main topics and themes
	161	+ - Key points for each topic
	162	+ - Important details or facts
	163	+ - Action items or follow-ups
	164	+ - Relationships between concepts
	165	+
	166	+ Format the output as structured markdown.
	167	+ """,
	168	+ "diagram_extraction": """
	169	+ Analyze the following image that contains a diagram, whiteboard content,
	170	+ or other visual information.
	171	+
	172	+ Extract and convert this visual information into a structured representation.
	173	+
	174	+ If it's a flowchart, process diagram, or similar structured visual:
	175	+ - Identify the components and their relationships
	176	+ - Preserve the logical flow and structure
	177	+ - Convert it to mermaid diagram syntax
	178	+
	179	+ If it's a whiteboard with text, bullet points, or unstructured content:
	180	+ - Extract all text elements
	181	+ - Preserve hierarchical organization if present
	182	+ - Maintain any emphasized or highlighted elements
	183	+
	184	+ Image context: $image_context
	185	+
	186	+ Return the results as markdown with appropriate structure.
	187	+ """,
	188	+ "action_item_detection": """
	189	+ Review the following transcript and identify all action items, commitments, or follow-up tasks.
	190	+
	191	+ TRANSCRIPT:
	192	+ $transcript
	193	+
	194	+ For each action item, extract:
	195	+ - The specific action to be taken
	196	+ - Who is responsible (if mentioned)
	197	+ - Any deadlines or timeframes
	198	+ - Priority level (if indicated)
	199	+ - Context or additional details
	200	+
	201	+ Format the results as a structured list of action items.
	202	+ """,
	203	+ "content_summary": """
	204	+ Provide a concise summary of the following content:
	205	+
	206	+ $content
	207	+
	208	+ The summary should:
	209	+ - Capture the main points and key takeaways
	210	+ - Be approximately 3-5 paragraphs
	211	+ - Focus on the most important information
	212	+ - Maintain a neutral, objective tone
	213	+
	214	+ Format the summary as clear, readable text.
	215	+ """,
	216	+ "summary_generation": """
	217	+ Generate a comprehensive summary of the following transcript content.
	218	+
	219	+ CONTENT:
	220	+ $content
	221	+
	222	+ Provide a well-structured summary that:
	223	+ - Captures the main topics discussed
	224	+ - Highlights key decisions or conclusions
	225	+ - Notes any important context or background
	226	+ - Is 3-5 paragraphs long
	227	+
	228	+ Write in clear, professional prose.
	229	+ """,
	230	+ "key_points_extraction": """
	231	+ Extract the key points from the following content.
	232	+
	233	+ CONTENT:
	234	+ $content
	235	+
	236	+ Return a JSON array of key point objects. Each object should have:
	237	+ - "point": the key point (1-2 sentences)
	238	+ - "topic": category or topic area (optional)
	239	+ - "details": supporting details (optional)
	240	+
	241	+ Example format:
	242	+ [
	243	+ {"point": "The system uses microservices architecture",
	244	+ "topic": "Architecture", "details": "Each service handles a specific domain"},
	245	+ ]
	246	+
	247	+ Return ONLY the JSON array, no additional text.
	248	+ """,
	249	+ "entity_extraction": """
	250	+ Extract all notable entities (people, concepts, technologies, organizations,
	251	+ time references) from the following content.
	252	+ CONTENT:
	253	+ $content
	254	+
	255	+ Return a JSON array of entity objects:
	256	+ [
	257	+ {"name": "entity name",
	258	+ "type": "person\|concept\|technology\|organization\|time",
	259	+ "description": "brief description"}
	260	+
	261	+ Return ONLY the JSON array, no additional text.
	262	+ """,
	263	+ "relationship_extraction": """
	264	+ Given the following content and entities, identify relationships between them.
	265	+
	266	+ CONTENT:
	267	+ $content
	268	+
	269	+ KNOWN ENTITIES:
	270	+ $entities
	271	+
	272	+ Return a JSON array of relationship objects:
	273	+ [
	274	+ {"source": "entity A", "target": "entity B",
	275	+ "type": "relationship type (e.g., uses, manages, depends_on, created_by, part_of)"}
	276	+
	277	+ Return ONLY the JSON array, no additional text.
	278	+ """,
	279	+ "diagram_analysis": """
	280	+ Analyze the following text extracted from a diagram or visual element.
	281	+
	282	+ DIAGRAM TEXT:
	283	+ $diagram_text
	284	+
	285	+ Identify:
	286	+ 1. The type of diagram (flowchart, architecture, sequence, etc.)
	287	+ 2. The main components and their roles
	288	+ 3. The relationships between components
	289	+ 4. Any data flows or process steps
	290	+
	291	+ Return a JSON object:
	292	+ {
	293	+ "diagram_type": "type",
	294	+ "components": ["list of components"],
	295	+ "relationships": ["component A -> component B: description"],
	296	+ "summary": "brief description of what the diagram shows"
	297	+ }
	298	+
	299	+ Return ONLY the JSON object, no additional text.
	300	+ """,
	301	+ "mermaid_generation": """
	302	+ Convert the following diagram information into valid Mermaid diagram syntax.
	303	+
	304	+ Diagram Type: $diagram_type
	305	+ Text Content: $text_content
	306	+ Analysis: $semantic_analysis
	307	+
	308	+ Generate a Mermaid diagram that accurately represents the visual structure.
	309	+ Use the appropriate Mermaid diagram type (graph, sequenceDiagram, classDiagram, etc.).
	310	+
	311	+ Return ONLY the Mermaid code, no markdown fences or explanations.
	312	+ """,
	313	+}
	314	+
	315	+# Create default prompt template manager
	316	+default_prompt_manager = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
	317	+Any, Dict, List, Optional, Union
	318	+
	319	+logger = loclass PromptTemplate:
	320	+ """Template manager for LLM prompts."""
	321	+
	322	+ def __init__(
	323	+ self, def __init__(
	324	+ self,
	325	+ templates_dir: Optional[Union[str, Path]] = None,
	326	+
	327	+ ):me] = template_contenameters
	328	+er.warning(f"Error loading ts_dir : str or Path, optional
	329	+ Directory containing template files
	330	+ defaul

	--- a/video_processor/utils/prompt_templates.py
	+++ b/video_processor/utils/prompt_templates.py
	@@ -0,0 +1,330 @@

	--- a/video_processor/utils/prompt_templates.py
	+++ b/video_processor/utils/prompt_templates.py
	@@ -0,0 +1,330 @@
1	"""Prompt templates for LLM-
2	from pathlib import Path
3	from string import Template
4	from typing import Dict, Optional, Union
5
6	logger = logging.getLogger(__name__)
7
8
9	class PromptTemplate:
10	"""Template manager for LLM prompts."""
11
12	def __init__(
13	self,
14	templates_dir: Optional[Union[str, Path]] = None,
15	default_templates: Optional[Dict[str, str]] = None,
16	):
17	"""
18	Initialize prompt template manager.
19
20	Parameters
21	----------
22	templates_dir : str or Path, optional
23	Directory containing template files
24	default_templates : dict, optional
25	Default templates to use
26	"""
27	self.templates_dir = Path(templates_dir) if templates_dir else None
28	self.templates = {}
29
30	# Load default templates
31	if default_templates:
32	self.templates.update(default_templates)
33
34	# Load templates from directory if provided
35	if self.templates_dir and self.templates_dir.exists():
36	self._load_templates_from_dir()
37
38	def _load_templates_from_dir(self) -> None:
39	"""Load templates from template directory."""
40	if not self.templates_dir:
41	return
42
43	for template_file in self.templates_dir.glob("*.txt"):
44	template_name = template_file.stem
45	try:
46	with open(template_file, "r", encoding="utf-8") as f:
47	template_content = f.read()
48	self.templates[template_name] = template_content
49	logger.debug(f"Loaded template: {template_name}")
50	except Exception as e:
51	logger.warning(f"Error loading template {template_name}: {str(e)}")
52
53	def get_template(self, template_name: str) -> Optional[Template]:
54	"""
55	Get template by name.
56
57	Parameters
58	----------
59	template_name : str
60	Template name
61
62	Returns
63	-------
64	Template or None
65	Template object if found, None otherwise
66	"""
67	if template_name not in self.templates:
68	logger.warning(f"Template not found: {template_name}")
69	return None
70
71	return Template(self.templates[template_name])
72
73	def format_prompt(self, template_name: str, **kwargs) -> Optional[str]:
74	"""
75	Format prompt with provided parameters.
76
77	Parameters
78	----------
79	template_name : str
80	Template name
81	**kwargs : dict
82	Template parameters
83
84	Returns
85	-------
86	str or None
87	Formatted prompt if template exists, None otherwise
88	"""
89	template = self.get_template(template_name)
90	if not template:
91	return None
92
93	try:
94	return template.safe_substitute(**kwargs)
95	except Exception as e:
96	logger.error(f"Error formatting template {template_name}: {str(e)}")
97	return None
98
99	def add_template(self, template_name: str, template_content: str) -> None:
100	"""
101	Add or update template.
102
103	Parameters
104	----------
105	template_name : str
106	Template name
107	template_content : str
108	Template content
109	"""
110	self.templates[template_name] = template_content
111
112	def save_template(self, template_name: str) -> bool:
113	"""
114	Save template to file.
115
116	Parameters
117	----------
118	template_name : str
119	Template name
120
121	Returns
122	-------
123	bool
124	True if successful, False otherwise
125	"""
126	if not self.templates_dir:
127	logger.error("Templates directory not set")
128	return False
129
130	if template_name not in self.templates:
131	logger.warning(f"Template not found: {template_name}")
132	return False
133
134	try:
135	self.templates_dir.mkdir(parents=True, exist_ok=True)
136	template_path = self.templates_dir / f"{template_name}.txt"
137
138	with open(template_path, "w", encoding="utf-8") as f:
139	f.write(self.templates[template_name])
140
141	logger.debug(f"Saved template: {template_name}")
142	return True
143	except Exception as e:
144	logger.error(f"Error saving template {template_name}: {str(e)}")
145	return False
146
147
148	# Default prompt templates
149	DEFAULT_TEMPLATES = {
150	"content_analysis": """
151	Analyze the provided video content and extract key information:
152
153	TRANSCRIPT:
154	$transcript
155
156	VISUAL ELEMENTS (if available):
157	$visual_elements
158
159	Please extract and organize the following:
160	- Main topics and themes
161	- Key points for each topic
162	- Important details or facts
163	- Action items or follow-ups
164	- Relationships between concepts
165
166	Format the output as structured markdown.
167	""",
168	"diagram_extraction": """
169	Analyze the following image that contains a diagram, whiteboard content,
170	or other visual information.
171
172	Extract and convert this visual information into a structured representation.
173
174	If it's a flowchart, process diagram, or similar structured visual:
175	- Identify the components and their relationships
176	- Preserve the logical flow and structure
177	- Convert it to mermaid diagram syntax
178
179	If it's a whiteboard with text, bullet points, or unstructured content:
180	- Extract all text elements
181	- Preserve hierarchical organization if present
182	- Maintain any emphasized or highlighted elements
183
184	Image context: $image_context
185
186	Return the results as markdown with appropriate structure.
187	""",
188	"action_item_detection": """
189	Review the following transcript and identify all action items, commitments, or follow-up tasks.
190
191	TRANSCRIPT:
192	$transcript
193
194	For each action item, extract:
195	- The specific action to be taken
196	- Who is responsible (if mentioned)
197	- Any deadlines or timeframes
198	- Priority level (if indicated)
199	- Context or additional details
200
201	Format the results as a structured list of action items.
202	""",
203	"content_summary": """
204	Provide a concise summary of the following content:
205
206	$content
207
208	The summary should:
209	- Capture the main points and key takeaways
210	- Be approximately 3-5 paragraphs
211	- Focus on the most important information
212	- Maintain a neutral, objective tone
213
214	Format the summary as clear, readable text.
215	""",
216	"summary_generation": """
217	Generate a comprehensive summary of the following transcript content.
218
219	CONTENT:
220	$content
221
222	Provide a well-structured summary that:
223	- Captures the main topics discussed
224	- Highlights key decisions or conclusions
225	- Notes any important context or background
226	- Is 3-5 paragraphs long
227
228	Write in clear, professional prose.
229	""",
230	"key_points_extraction": """
231	Extract the key points from the following content.
232
233	CONTENT:
234	$content
235
236	Return a JSON array of key point objects. Each object should have:
237	- "point": the key point (1-2 sentences)
238	- "topic": category or topic area (optional)
239	- "details": supporting details (optional)
240
241	Example format:
242	[
243	{"point": "The system uses microservices architecture",
244	"topic": "Architecture", "details": "Each service handles a specific domain"},
245	]
246
247	Return ONLY the JSON array, no additional text.
248	""",
249	"entity_extraction": """
250	Extract all notable entities (people, concepts, technologies, organizations,
251	time references) from the following content.
252	CONTENT:
253	$content
254
255	Return a JSON array of entity objects:
256	[
257	{"name": "entity name",
258	"type": "person\|concept\|technology\|organization\|time",
259	"description": "brief description"}
260
261	Return ONLY the JSON array, no additional text.
262	""",
263	"relationship_extraction": """
264	Given the following content and entities, identify relationships between them.
265
266	CONTENT:
267	$content
268
269	KNOWN ENTITIES:
270	$entities
271
272	Return a JSON array of relationship objects:
273	[
274	{"source": "entity A", "target": "entity B",
275	"type": "relationship type (e.g., uses, manages, depends_on, created_by, part_of)"}
276
277	Return ONLY the JSON array, no additional text.
278	""",
279	"diagram_analysis": """
280	Analyze the following text extracted from a diagram or visual element.
281
282	DIAGRAM TEXT:
283	$diagram_text
284
285	Identify:
286	1. The type of diagram (flowchart, architecture, sequence, etc.)
287	2. The main components and their roles
288	3. The relationships between components
289	4. Any data flows or process steps
290
291	Return a JSON object:
292	{
293	"diagram_type": "type",
294	"components": ["list of components"],
295	"relationships": ["component A -> component B: description"],
296	"summary": "brief description of what the diagram shows"
297	}
298
299	Return ONLY the JSON object, no additional text.
300	""",
301	"mermaid_generation": """
302	Convert the following diagram information into valid Mermaid diagram syntax.
303
304	Diagram Type: $diagram_type
305	Text Content: $text_content
306	Analysis: $semantic_analysis
307
308	Generate a Mermaid diagram that accurately represents the visual structure.
309	Use the appropriate Mermaid diagram type (graph, sequenceDiagram, classDiagram, etc.).
310
311	Return ONLY the Mermaid code, no markdown fences or explanations.
312	""",
313	}
314
315	# Create default prompt template manager
316	default_prompt_manager = PromptTemplate(default_templates=DEFAULT_TEMPLATES)
317	Any, Dict, List, Optional, Union
318
319	logger = loclass PromptTemplate:
320	"""Template manager for LLM prompts."""
321
322	def __init__(
323	self, def __init__(
324	self,
325	templates_dir: Optional[Union[str, Path]] = None,
326
327	):me] = template_contenameters
328	er.warning(f"Error loading ts_dir : str or Path, optional
329	Directory containing template files
330	defaul

PlanOpticon

Keyboard Shortcuts