PlanOpticon

planopticon / video_processor / integrators / knowledge_graph.py

Source Blame History 502 lines

321f2f5…	leo	1	"""Knowledge graph integration for organizing extracted content."""
321f2f5…	leo	2
321f2f5…	leo	3	import logging
321f2f5…	leo	4	from pathlib import Path
321f2f5…	leo	5	from typing import Dict, List, Optional, Union
321f2f5…	leo	6
287a3bb…	leo	7	from tqdm import tqdm
287a3bb…	leo	8
0ad36b7…	noreply	9	from video_processor.integrators.graph_store import GraphStore, create_store
0981a08…	noreply	10	from video_processor.models import Entity, KnowledgeGraphData, Relationship, SourceRecord
321f2f5…	leo	11	from video_processor.providers.manager import ProviderManager
321f2f5…	leo	12	from video_processor.utils.json_parsing import parse_json_from_response
321f2f5…	leo	13
321f2f5…	leo	14	logger = logging.getLogger(__name__)
321f2f5…	leo	15
321f2f5…	leo	16
321f2f5…	leo	17	class KnowledgeGraph:
321f2f5…	leo	18	"""Integrates extracted content into a structured knowledge graph."""
321f2f5…	leo	19
321f2f5…	leo	20	def __init__(
321f2f5…	leo	21	self,
321f2f5…	leo	22	provider_manager: Optional[ProviderManager] = None,
0ad36b7…	noreply	23	db_path: Optional[Path] = None,
0ad36b7…	noreply	24	store: Optional[GraphStore] = None,
321f2f5…	leo	25	):
321f2f5…	leo	26	self.pm = provider_manager
0ad36b7…	noreply	27	self._store = store or create_store(db_path)
0981a08…	noreply	28
0981a08…	noreply	29	def register_source(self, source: Dict) -> None:
0981a08…	noreply	30	"""Register a content source for provenance tracking."""
0981a08…	noreply	31	self._store.register_source(source)
0ad36b7…	noreply	32
0ad36b7…	noreply	33	@property
0ad36b7…	noreply	34	def nodes(self) -> Dict[str, dict]:
0ad36b7…	noreply	35	"""Backward-compatible read access to nodes as a dict keyed by entity name."""
0ad36b7…	noreply	36	result = {}
0ad36b7…	noreply	37	for entity in self._store.get_all_entities():
0ad36b7…	noreply	38	name = entity["name"]
0ad36b7…	noreply	39	descs = entity.get("descriptions", [])
0ad36b7…	noreply	40	result[name] = {
0ad36b7…	noreply	41	"id": entity.get("id", name),
0ad36b7…	noreply	42	"name": name,
0ad36b7…	noreply	43	"type": entity.get("type", "concept"),
0ad36b7…	noreply	44	"descriptions": set(descs) if isinstance(descs, list) else descs,
0ad36b7…	noreply	45	"occurrences": entity.get("occurrences", []),
0ad36b7…	noreply	46	}
0ad36b7…	noreply	47	return result
0ad36b7…	noreply	48
0ad36b7…	noreply	49	@property
0ad36b7…	noreply	50	def relationships(self) -> List[dict]:
0ad36b7…	noreply	51	"""Backward-compatible read access to relationships."""
0ad36b7…	noreply	52	return self._store.get_all_relationships()
321f2f5…	leo	53
321f2f5…	leo	54	def _chat(self, prompt: str, temperature: float = 0.3) -> str:
321f2f5…	leo	55	"""Send a chat message through ProviderManager (or return empty if none)."""
321f2f5…	leo	56	if not self.pm:
321f2f5…	leo	57	return ""
321f2f5…	leo	58	return self.pm.chat(
321f2f5…	leo	59	[{"role": "user", "content": prompt}],
321f2f5…	leo	60	max_tokens=4096,
321f2f5…	leo	61	temperature=temperature,
321f2f5…	leo	62	)
321f2f5…	leo	63
829e24a…	leo	64	def extract_entities_and_relationships(
829e24a…	leo	65	self, text: str
829e24a…	leo	66	) -> tuple[List[Entity], List[Relationship]]:
287a3bb…	leo	67	"""Extract entities and relationships in a single LLM call."""
321f2f5…	leo	68	prompt = (
287a3bb…	leo	69	"Extract all notable entities and relationships from the following content.\n\n"
321f2f5…	leo	70	f"CONTENT:\n{text}\n\n"
287a3bb…	leo	71	"Return a JSON object with two keys:\n"
829e24a…	leo	72	'- "entities": array of {"name": "...", '
829e24a…	leo	73	'"type": "person\|concept\|technology\|organization\|time", '
829e24a…	leo	74	'"description": "brief description"}\n'
829e24a…	leo	75	'- "relationships": array of {"source": "entity name", '
829e24a…	leo	76	'"target": "entity name", '
829e24a…	leo	77	'"type": "relationship description"}\n\n'
287a3bb…	leo	78	"Return ONLY the JSON object."
321f2f5…	leo	79	)
321f2f5…	leo	80	raw = self._chat(prompt)
321f2f5…	leo	81	parsed = parse_json_from_response(raw)
321f2f5…	leo	82
321f2f5…	leo	83	entities = []
287a3bb…	leo	84	rels = []
287a3bb…	leo	85
287a3bb…	leo	86	if isinstance(parsed, dict):
287a3bb…	leo	87	for item in parsed.get("entities", []):
287a3bb…	leo	88	if isinstance(item, dict) and "name" in item:
829e24a…	leo	89	entities.append(
829e24a…	leo	90	Entity(
829e24a…	leo	91	name=item["name"],
829e24a…	leo	92	type=item.get("type", "concept"),
829e24a…	leo	93	descriptions=[item["description"]] if item.get("description") else [],
829e24a…	leo	94	)
829e24a…	leo	95	)
829e24a…	leo	96	{e.name for e in entities}
287a3bb…	leo	97	for item in parsed.get("relationships", []):
287a3bb…	leo	98	if isinstance(item, dict) and "source" in item and "target" in item:
829e24a…	leo	99	rels.append(
829e24a…	leo	100	Relationship(
829e24a…	leo	101	source=item["source"],
829e24a…	leo	102	target=item["target"],
829e24a…	leo	103	type=item.get("type", "related_to"),
829e24a…	leo	104	)
829e24a…	leo	105	)
287a3bb…	leo	106	elif isinstance(parsed, list):
287a3bb…	leo	107	# Fallback: if model returns a flat entity list
321f2f5…	leo	108	for item in parsed:
321f2f5…	leo	109	if isinstance(item, dict) and "name" in item:
829e24a…	leo	110	entities.append(
829e24a…	leo	111	Entity(
829e24a…	leo	112	name=item["name"],
829e24a…	leo	113	type=item.get("type", "concept"),
829e24a…	leo	114	descriptions=[item["description"]] if item.get("description") else [],
829e24a…	leo	115	)
829e24a…	leo	116	)
287a3bb…	leo	117
287a3bb…	leo	118	return entities, rels
287a3bb…	leo	119
0981a08…	noreply	120	def add_content(
0981a08…	noreply	121	self,
0981a08…	noreply	122	text: str,
0981a08…	noreply	123	source: str,
0981a08…	noreply	124	timestamp: Optional[float] = None,
0981a08…	noreply	125	source_id: Optional[str] = None,
0981a08…	noreply	126	) -> None:
321f2f5…	leo	127	"""Add content to knowledge graph by extracting entities and relationships."""
287a3bb…	leo	128	entities, relationships = self.extract_entities_and_relationships(text)
287a3bb…	leo	129
0ad36b7…	noreply	130	snippet = text[:100] + "..." if len(text) > 100 else text
0ad36b7…	noreply	131
321f2f5…	leo	132	for entity in entities:
0ad36b7…	noreply	133	self._store.merge_entity(entity.name, entity.type, entity.descriptions, source=source)
0ad36b7…	noreply	134	self._store.add_occurrence(entity.name, source, timestamp, snippet)
0981a08…	noreply	135	if source_id:
0981a08…	noreply	136	self._store.add_source_location(
0981a08…	noreply	137	source_id,
0981a08…	noreply	138	entity_name_lower=entity.name.lower(),
0981a08…	noreply	139	timestamp=timestamp,
0981a08…	noreply	140	text_snippet=snippet,
0981a08…	noreply	141	)
321f2f5…	leo	142
321f2f5…	leo	143	for rel in relationships:
0ad36b7…	noreply	144	if self._store.has_entity(rel.source) and self._store.has_entity(rel.target):
0ad36b7…	noreply	145	self._store.add_relationship(
0ad36b7…	noreply	146	rel.source,
0ad36b7…	noreply	147	rel.target,
0ad36b7…	noreply	148	rel.type,
0ad36b7…	noreply	149	content_source=source,
0ad36b7…	noreply	150	timestamp=timestamp,
829e24a…	leo	151	)
287a3bb…	leo	152
287a3bb…	leo	153	def process_transcript(self, transcript: Dict, batch_size: int = 10) -> None:
287a3bb…	leo	154	"""Process transcript segments into knowledge graph, batching for efficiency."""
321f2f5…	leo	155	if "segments" not in transcript:
321f2f5…	leo	156	logger.warning("Transcript missing segments")
321f2f5…	leo	157	return
321f2f5…	leo	158
287a3bb…	leo	159	segments = transcript["segments"]
287a3bb…	leo	160
287a3bb…	leo	161	# Register speakers first
287a3bb…	leo	162	for i, segment in enumerate(segments):
287a3bb…	leo	163	speaker = segment.get("speaker", None)
0ad36b7…	noreply	164	if speaker and not self._store.has_entity(speaker):
0ad36b7…	noreply	165	self._store.merge_entity(speaker, "person", ["Speaker in transcript"])
287a3bb…	leo	166
287a3bb…	leo	167	# Batch segments together for fewer API calls
287a3bb…	leo	168	batches = []
287a3bb…	leo	169	for start in range(0, len(segments), batch_size):
829e24a…	leo	170	batches.append(segments[start : start + batch_size])
287a3bb…	leo	171
287a3bb…	leo	172	for batch in tqdm(batches, desc="Building knowledge graph", unit="batch"):
287a3bb…	leo	173	# Combine batch text
829e24a…	leo	174	combined_text = " ".join(seg["text"] for seg in batch if "text" in seg)
287a3bb…	leo	175	if not combined_text.strip():
287a3bb…	leo	176	continue
287a3bb…	leo	177
287a3bb…	leo	178	# Use first segment's timestamp as batch timestamp
287a3bb…	leo	179	batch_start_idx = segments.index(batch[0])
287a3bb…	leo	180	timestamp = batch[0].get("start", None)
287a3bb…	leo	181	source = f"transcript_batch_{batch_start_idx}"
287a3bb…	leo	182
287a3bb…	leo	183	self.add_content(combined_text, source, timestamp)
321f2f5…	leo	184
321f2f5…	leo	185	def process_diagrams(self, diagrams: List[Dict]) -> None:
321f2f5…	leo	186	"""Process diagram results into knowledge graph."""
287a3bb…	leo	187	for i, diagram in enumerate(tqdm(diagrams, desc="Processing diagrams for KG", unit="diag")):
321f2f5…	leo	188	text_content = diagram.get("text_content", "")
0ad36b7…	noreply	189	source = f"diagram_{i}"
321f2f5…	leo	190	if text_content:
321f2f5…	leo	191	self.add_content(text_content, source)
321f2f5…	leo	192
321f2f5…	leo	193	diagram_id = f"diagram_{i}"
0ad36b7…	noreply	194	if not self._store.has_entity(diagram_id):
0ad36b7…	noreply	195	self._store.merge_entity(diagram_id, "diagram", ["Visual diagram from video"])
0ad36b7…	noreply	196	self._store.add_occurrence(
0ad36b7…	noreply	197	diagram_id,
0ad36b7…	noreply	198	source if text_content else diagram_id,
0ad36b7…	noreply	199	text=f"frame_index={diagram.get('frame_index')}",
2a1b11a…	noreply	200	)
2a1b11a…	noreply	201
2a1b11a…	noreply	202	def process_screenshots(self, screenshots: List[Dict]) -> None:
2a1b11a…	noreply	203	"""Process screenshot captures into knowledge graph.
2a1b11a…	noreply	204
2a1b11a…	noreply	205	Extracts entities from text_content and adds screenshot-specific
2a1b11a…	noreply	206	entities from the entities list.
2a1b11a…	noreply	207	"""
2a1b11a…	noreply	208	for i, capture in enumerate(screenshots):
2a1b11a…	noreply	209	text_content = capture.get("text_content", "")
2a1b11a…	noreply	210	source = f"screenshot_{i}"
2a1b11a…	noreply	211	content_type = capture.get("content_type", "screenshot")
2a1b11a…	noreply	212
2a1b11a…	noreply	213	# Extract entities from visible text via LLM
2a1b11a…	noreply	214	if text_content:
2a1b11a…	noreply	215	self.add_content(text_content, source)
2a1b11a…	noreply	216
2a1b11a…	noreply	217	# Add explicitly identified entities from vision extraction
2a1b11a…	noreply	218	for entity_name in capture.get("entities", []):
2a1b11a…	noreply	219	if not entity_name or len(entity_name) < 2:
2a1b11a…	noreply	220	continue
2a1b11a…	noreply	221	if not self._store.has_entity(entity_name):
2a1b11a…	noreply	222	self._store.merge_entity(
2a1b11a…	noreply	223	entity_name,
2a1b11a…	noreply	224	"concept",
2a1b11a…	noreply	225	[f"Identified in {content_type} screenshot"],
2a1b11a…	noreply	226	source=source,
2a1b11a…	noreply	227	)
2a1b11a…	noreply	228	self._store.add_occurrence(
2a1b11a…	noreply	229	entity_name,
2a1b11a…	noreply	230	source,
2a1b11a…	noreply	231	text=f"Visible in {content_type} (frame {capture.get('frame_index', '?')})",
0981a08…	noreply	232	)
0981a08…	noreply	233
321f2f5…	leo	234	def to_data(self) -> KnowledgeGraphData:
321f2f5…	leo	235	"""Convert to pydantic KnowledgeGraphData model."""
321f2f5…	leo	236	nodes = []
0ad36b7…	noreply	237	for entity in self._store.get_all_entities():
0ad36b7…	noreply	238	descs = entity.get("descriptions", [])
321f2f5…	leo	239	if isinstance(descs, set):
321f2f5…	leo	240	descs = list(descs)
829e24a…	leo	241	nodes.append(
829e24a…	leo	242	Entity(
0ad36b7…	noreply	243	name=entity["name"],
0ad36b7…	noreply	244	type=entity.get("type", "concept"),
829e24a…	leo	245	descriptions=descs,
0ad36b7…	noreply	246	occurrences=entity.get("occurrences", []),
829e24a…	leo	247	)
829e24a…	leo	248	)
321f2f5…	leo	249
321f2f5…	leo	250	rels = [
321f2f5…	leo	251	Relationship(
321f2f5…	leo	252	source=r["source"],
321f2f5…	leo	253	target=r["target"],
321f2f5…	leo	254	type=r.get("type", "related_to"),
321f2f5…	leo	255	content_source=r.get("content_source"),
321f2f5…	leo	256	timestamp=r.get("timestamp"),
321f2f5…	leo	257	)
0ad36b7…	noreply	258	for r in self._store.get_all_relationships()
321f2f5…	leo	259	]
0981a08…	noreply	260
0981a08…	noreply	261	sources = [SourceRecord(**s) for s in self._store.get_sources()]
0981a08…	noreply	262
0981a08…	noreply	263	return KnowledgeGraphData(nodes=nodes, relationships=rels, sources=sources)
321f2f5…	leo	264
321f2f5…	leo	265	def to_dict(self) -> Dict:
321f2f5…	leo	266	"""Convert knowledge graph to dictionary (backward-compatible)."""
0ad36b7…	noreply	267	return self._store.to_dict()
321f2f5…	leo	268
321f2f5…	leo	269	def save(self, output_path: Union[str, Path]) -> Path:
0981a08…	noreply	270	"""Save knowledge graph. Defaults to .db (SQLite), also supports .json."""
321f2f5…	leo	271	output_path = Path(output_path)
321f2f5…	leo	272	if not output_path.suffix:
0981a08…	noreply	273	output_path = output_path.with_suffix(".db")
321f2f5…	leo	274	output_path.parent.mkdir(parents=True, exist_ok=True)
321f2f5…	leo	275
0981a08…	noreply	276	if output_path.suffix == ".json":
0981a08…	noreply	277	data = self.to_data()
0981a08…	noreply	278	output_path.write_text(data.model_dump_json(indent=2))
0981a08…	noreply	279	elif output_path.suffix == ".db":
0981a08…	noreply	280	# If the backing store is already SQLite at this path, it's already persisted.
0981a08…	noreply	281	# Otherwise, create a new SQLite store and copy data into it.
0981a08…	noreply	282	from video_processor.integrators.graph_store import SQLiteStore
0981a08…	noreply	283
0981a08…	noreply	284	if not isinstance(self._store, SQLiteStore) or self._store._db_path != str(output_path):
0981a08…	noreply	285	target = SQLiteStore(output_path)
0981a08…	noreply	286	for source in self._store.get_sources():
0981a08…	noreply	287	target.register_source(source)
0981a08…	noreply	288	for entity in self._store.get_all_entities():
0981a08…	noreply	289	descs = entity.get("descriptions", [])
0981a08…	noreply	290	if isinstance(descs, set):
0981a08…	noreply	291	descs = list(descs)
0981a08…	noreply	292	target.merge_entity(
0981a08…	noreply	293	entity["name"],
0981a08…	noreply	294	entity.get("type", "concept"),
0981a08…	noreply	295	descs,
0981a08…	noreply	296	source=entity.get("source"),
0981a08…	noreply	297	)
0981a08…	noreply	298	for occ in entity.get("occurrences", []):
0981a08…	noreply	299	target.add_occurrence(
0981a08…	noreply	300	entity["name"],
0981a08…	noreply	301	occ.get("source", ""),
0981a08…	noreply	302	occ.get("timestamp"),
0981a08…	noreply	303	occ.get("text"),
0981a08…	noreply	304	)
0981a08…	noreply	305	for rel in self._store.get_all_relationships():
0981a08…	noreply	306	target.add_relationship(
0981a08…	noreply	307	rel.get("source", ""),
0981a08…	noreply	308	rel.get("target", ""),
0981a08…	noreply	309	rel.get("type", "related_to"),
0981a08…	noreply	310	content_source=rel.get("content_source"),
0981a08…	noreply	311	timestamp=rel.get("timestamp"),
0981a08…	noreply	312	)
0981a08…	noreply	313	target.close()
0981a08…	noreply	314	else:
0981a08…	noreply	315	# Unknown suffix — fall back to JSON
0981a08…	noreply	316	data = self.to_data()
0981a08…	noreply	317	output_path.write_text(data.model_dump_json(indent=2))
0981a08…	noreply	318
321f2f5…	leo	319	logger.info(
0ad36b7…	noreply	320	f"Saved knowledge graph with {self._store.get_entity_count()} nodes "
0ad36b7…	noreply	321	f"and {self._store.get_relationship_count()} relationships to {output_path}"
321f2f5…	leo	322	)
321f2f5…	leo	323	return output_path
321f2f5…	leo	324
321f2f5…	leo	325	@classmethod
0ad36b7…	noreply	326	def from_dict(cls, data: Dict, db_path: Optional[Path] = None) -> "KnowledgeGraph":
321f2f5…	leo	327	"""Reconstruct a KnowledgeGraph from saved JSON dict."""
0ad36b7…	noreply	328	kg = cls(db_path=db_path)
0981a08…	noreply	329	for source in data.get("sources", []):
0981a08…	noreply	330	kg._store.register_source(source)
321f2f5…	leo	331	for node in data.get("nodes", []):
0ad36b7…	noreply	332	name = node.get("name", node.get("id", ""))
321f2f5…	leo	333	descs = node.get("descriptions", [])
0ad36b7…	noreply	334	if isinstance(descs, set):
0ad36b7…	noreply	335	descs = list(descs)
0ad36b7…	noreply	336	kg._store.merge_entity(
0ad36b7…	noreply	337	name, node.get("type", "concept"), descs, source=node.get("source")
0ad36b7…	noreply	338	)
0ad36b7…	noreply	339	for occ in node.get("occurrences", []):
0ad36b7…	noreply	340	kg._store.add_occurrence(
0ad36b7…	noreply	341	name,
0ad36b7…	noreply	342	occ.get("source", ""),
0ad36b7…	noreply	343	occ.get("timestamp"),
0ad36b7…	noreply	344	occ.get("text"),
0ad36b7…	noreply	345	)
0ad36b7…	noreply	346	for rel in data.get("relationships", []):
0ad36b7…	noreply	347	kg._store.add_relationship(
0ad36b7…	noreply	348	rel.get("source", ""),
0ad36b7…	noreply	349	rel.get("target", ""),
0ad36b7…	noreply	350	rel.get("type", "related_to"),
0ad36b7…	noreply	351	content_source=rel.get("content_source"),
0ad36b7…	noreply	352	timestamp=rel.get("timestamp"),
0ad36b7…	noreply	353	)
321f2f5…	leo	354	return kg
321f2f5…	leo	355
0981a08…	noreply	356	# Type specificity ranking for conflict resolution during merge.
0981a08…	noreply	357	# Higher rank = more specific type wins when two entities match.
0981a08…	noreply	358	_TYPE_SPECIFICITY = {
0981a08…	noreply	359	"concept": 0,
0981a08…	noreply	360	"time": 1,
0981a08…	noreply	361	"diagram": 1,
0981a08…	noreply	362	"organization": 2,
0981a08…	noreply	363	"person": 3,
0981a08…	noreply	364	"technology": 3,
0981a08…	noreply	365	}
0981a08…	noreply	366
0981a08…	noreply	367	@staticmethod
0981a08…	noreply	368	def _fuzzy_match(name_a: str, name_b: str, threshold: float = 0.85) -> bool:
0981a08…	noreply	369	"""Return True if two names are similar enough to be considered the same entity."""
0981a08…	noreply	370	from difflib import SequenceMatcher
0981a08…	noreply	371
0981a08…	noreply	372	return SequenceMatcher(None, name_a.lower(), name_b.lower()).ratio() >= threshold
0981a08…	noreply	373
0981a08…	noreply	374	def _more_specific_type(self, type_a: str, type_b: str) -> str:
0981a08…	noreply	375	"""Return the more specific of two entity types."""
0981a08…	noreply	376	rank_a = self._TYPE_SPECIFICITY.get(type_a, 1)
0981a08…	noreply	377	rank_b = self._TYPE_SPECIFICITY.get(type_b, 1)
0981a08…	noreply	378	return type_a if rank_a >= rank_b else type_b
0981a08…	noreply	379
321f2f5…	leo	380	def merge(self, other: "KnowledgeGraph") -> None:
0981a08…	noreply	381	"""Merge another KnowledgeGraph into this one.
0981a08…	noreply	382
0981a08…	noreply	383	Improvements over naive merge:
0981a08…	noreply	384	- Fuzzy name matching (SequenceMatcher >= 0.85) to unify near-duplicate entities
0981a08…	noreply	385	- Type conflict resolution: prefer more specific types (e.g. technology > concept)
0981a08…	noreply	386	- Provenance: merged entities get a ``merged_from`` description entry
0981a08…	noreply	387	"""
0981a08…	noreply	388	for source in other._store.get_sources():
0981a08…	noreply	389	self._store.register_source(source)
0981a08…	noreply	390
0981a08…	noreply	391	# Build a lookup of existing entity names for fuzzy matching
0981a08…	noreply	392	existing_entities = self._store.get_all_entities()
0981a08…	noreply	393	existing_names = {e["name"]: e for e in existing_entities}
0981a08…	noreply	394	# Cache lowercase -> canonical name for fast lookup
0981a08…	noreply	395	name_index: dict[str, str] = {n.lower(): n for n in existing_names}
0981a08…	noreply	396
0ad36b7…	noreply	397	for entity in other._store.get_all_entities():
0981a08…	noreply	398	incoming_name = entity["name"]
0ad36b7…	noreply	399	descs = entity.get("descriptions", [])
0ad36b7…	noreply	400	if isinstance(descs, set):
0ad36b7…	noreply	401	descs = list(descs)
0981a08…	noreply	402	incoming_type = entity.get("type", "concept")
0981a08…	noreply	403
0981a08…	noreply	404	# Try exact match first (case-insensitive), then fuzzy
0981a08…	noreply	405	matched_name: Optional[str] = None
0981a08…	noreply	406	if incoming_name.lower() in name_index:
0981a08…	noreply	407	matched_name = name_index[incoming_name.lower()]
0981a08…	noreply	408	else:
0981a08…	noreply	409	for existing_name in existing_names:
0981a08…	noreply	410	if self._fuzzy_match(incoming_name, existing_name):
0981a08…	noreply	411	matched_name = existing_name
0981a08…	noreply	412	break
0981a08…	noreply	413
0981a08…	noreply	414	if matched_name is not None:
0981a08…	noreply	415	# Resolve type conflict
0981a08…	noreply	416	existing_type = existing_names[matched_name].get("type", "concept")
0981a08…	noreply	417	resolved_type = self._more_specific_type(existing_type, incoming_type)
0981a08…	noreply	418
0981a08…	noreply	419	# Add merge provenance
0981a08…	noreply	420	merge_note = f"merged_from:{incoming_name}"
0981a08…	noreply	421	merged_descs = descs if incoming_name == matched_name else descs + [merge_note]
0981a08…	noreply	422
0981a08…	noreply	423	self._store.merge_entity(
0981a08…	noreply	424	matched_name, resolved_type, merged_descs, source=entity.get("source")
0981a08…	noreply	425	)
0981a08…	noreply	426	target_name = matched_name
0981a08…	noreply	427	else:
0981a08…	noreply	428	self._store.merge_entity(
0981a08…	noreply	429	incoming_name, incoming_type, descs, source=entity.get("source")
0981a08…	noreply	430	)
0981a08…	noreply	431	# Update indexes for subsequent fuzzy matches within this merge
0981a08…	noreply	432	existing_names[incoming_name] = entity
0981a08…	noreply	433	name_index[incoming_name.lower()] = incoming_name
0981a08…	noreply	434	target_name = incoming_name
0981a08…	noreply	435
0ad36b7…	noreply	436	for occ in entity.get("occurrences", []):
0ad36b7…	noreply	437	self._store.add_occurrence(
0981a08…	noreply	438	target_name,
0ad36b7…	noreply	439	occ.get("source", ""),
0ad36b7…	noreply	440	occ.get("timestamp"),
0ad36b7…	noreply	441	occ.get("text"),
0ad36b7…	noreply	442	)
0ad36b7…	noreply	443
0ad36b7…	noreply	444	for rel in other._store.get_all_relationships():
0ad36b7…	noreply	445	self._store.add_relationship(
0ad36b7…	noreply	446	rel.get("source", ""),
0ad36b7…	noreply	447	rel.get("target", ""),
0ad36b7…	noreply	448	rel.get("type", "related_to"),
0ad36b7…	noreply	449	content_source=rel.get("content_source"),
0ad36b7…	noreply	450	timestamp=rel.get("timestamp"),
0ad36b7…	noreply	451	)
0981a08…	noreply	452
0981a08…	noreply	453	def classify_for_planning(self):
0981a08…	noreply	454	"""Classify entities in this knowledge graph into planning taxonomy types."""
0981a08…	noreply	455	from video_processor.integrators.taxonomy import TaxonomyClassifier
0981a08…	noreply	456
0981a08…	noreply	457	classifier = TaxonomyClassifier(provider_manager=self.pm)
0981a08…	noreply	458	entities = self._store.get_all_entities()
0981a08…	noreply	459	relationships = self._store.get_all_relationships()
0981a08…	noreply	460	return classifier.classify_entities(entities, relationships)
321f2f5…	leo	461
321f2f5…	leo	462	def generate_mermaid(self, max_nodes: int = 30) -> str:
321f2f5…	leo	463	"""Generate Mermaid visualization code."""
0ad36b7…	noreply	464	nodes = self.nodes
0ad36b7…	noreply	465	rels = self.relationships
0ad36b7…	noreply	466
321f2f5…	leo	467	node_importance = {}
0ad36b7…	noreply	468	for node_id in nodes:
0ad36b7…	noreply	469	count = sum(1 for rel in rels if rel["source"] == node_id or rel["target"] == node_id)
321f2f5…	leo	470	node_importance[node_id] = count
321f2f5…	leo	471
321f2f5…	leo	472	important = sorted(node_importance.items(), key=lambda x: x[1], reverse=True)
321f2f5…	leo	473	important_ids = [n[0] for n in important[:max_nodes]]
321f2f5…	leo	474
321f2f5…	leo	475	mermaid = ["graph LR"]
321f2f5…	leo	476
321f2f5…	leo	477	for nid in important_ids:
0ad36b7…	noreply	478	node = nodes[nid]
321f2f5…	leo	479	ntype = node.get("type", "concept")
321f2f5…	leo	480	# Sanitize id for mermaid (alphanumeric + underscore only)
321f2f5…	leo	481	safe_id = "".join(c if c.isalnum() or c == "_" else "_" for c in nid)
321f2f5…	leo	482	safe_name = node["name"].replace('"', "'")
321f2f5…	leo	483	mermaid.append(f' {safe_id}["{safe_name}"]:::{ntype}')
321f2f5…	leo	484
321f2f5…	leo	485	added = set()
0ad36b7…	noreply	486	for rel in rels:
321f2f5…	leo	487	src, tgt = rel["source"], rel["target"]
321f2f5…	leo	488	if src in important_ids and tgt in important_ids:
321f2f5…	leo	489	rtype = rel.get("type", "related_to")
321f2f5…	leo	490	key = f"{src}\|{tgt}\|{rtype}"
321f2f5…	leo	491	if key not in added:
321f2f5…	leo	492	safe_src = "".join(c if c.isalnum() or c == "_" else "_" for c in src)
321f2f5…	leo	493	safe_tgt = "".join(c if c.isalnum() or c == "_" else "_" for c in tgt)
321f2f5…	leo	494	mermaid.append(f' {safe_src} -- "{rtype}" --> {safe_tgt}')
321f2f5…	leo	495	added.add(key)
321f2f5…	leo	496
321f2f5…	leo	497	mermaid.append(" classDef person fill:#f9d5e5,stroke:#333,stroke-width:1px")
321f2f5…	leo	498	mermaid.append(" classDef concept fill:#eeeeee,stroke:#333,stroke-width:1px")
321f2f5…	leo	499	mermaid.append(" classDef diagram fill:#d5f9e5,stroke:#333,stroke-width:1px")
321f2f5…	leo	500	mermaid.append(" classDef time fill:#e5d5f9,stroke:#333,stroke-width:1px")
321f2f5…	leo	501
321f2f5…	leo	502	return "\n".join(mermaid)

PlanOpticon

Keyboard Shortcuts