PlanOpticon

planopticon / video_processor / integrators / knowledge_graph.py
Source Blame History 502 lines
321f2f5… leo 1 """Knowledge graph integration for organizing extracted content."""
321f2f5… leo 2
321f2f5… leo 3 import logging
321f2f5… leo 4 from pathlib import Path
321f2f5… leo 5 from typing import Dict, List, Optional, Union
321f2f5… leo 6
287a3bb… leo 7 from tqdm import tqdm
287a3bb… leo 8
0ad36b7… noreply 9 from video_processor.integrators.graph_store import GraphStore, create_store
0981a08… noreply 10 from video_processor.models import Entity, KnowledgeGraphData, Relationship, SourceRecord
321f2f5… leo 11 from video_processor.providers.manager import ProviderManager
321f2f5… leo 12 from video_processor.utils.json_parsing import parse_json_from_response
321f2f5… leo 13
321f2f5… leo 14 logger = logging.getLogger(__name__)
321f2f5… leo 15
321f2f5… leo 16
321f2f5… leo 17 class KnowledgeGraph:
321f2f5… leo 18 """Integrates extracted content into a structured knowledge graph."""
321f2f5… leo 19
321f2f5… leo 20 def __init__(
321f2f5… leo 21 self,
321f2f5… leo 22 provider_manager: Optional[ProviderManager] = None,
0ad36b7… noreply 23 db_path: Optional[Path] = None,
0ad36b7… noreply 24 store: Optional[GraphStore] = None,
321f2f5… leo 25 ):
321f2f5… leo 26 self.pm = provider_manager
0ad36b7… noreply 27 self._store = store or create_store(db_path)
0981a08… noreply 28
0981a08… noreply 29 def register_source(self, source: Dict) -> None:
0981a08… noreply 30 """Register a content source for provenance tracking."""
0981a08… noreply 31 self._store.register_source(source)
0ad36b7… noreply 32
0ad36b7… noreply 33 @property
0ad36b7… noreply 34 def nodes(self) -> Dict[str, dict]:
0ad36b7… noreply 35 """Backward-compatible read access to nodes as a dict keyed by entity name."""
0ad36b7… noreply 36 result = {}
0ad36b7… noreply 37 for entity in self._store.get_all_entities():
0ad36b7… noreply 38 name = entity["name"]
0ad36b7… noreply 39 descs = entity.get("descriptions", [])
0ad36b7… noreply 40 result[name] = {
0ad36b7… noreply 41 "id": entity.get("id", name),
0ad36b7… noreply 42 "name": name,
0ad36b7… noreply 43 "type": entity.get("type", "concept"),
0ad36b7… noreply 44 "descriptions": set(descs) if isinstance(descs, list) else descs,
0ad36b7… noreply 45 "occurrences": entity.get("occurrences", []),
0ad36b7… noreply 46 }
0ad36b7… noreply 47 return result
0ad36b7… noreply 48
0ad36b7… noreply 49 @property
0ad36b7… noreply 50 def relationships(self) -> List[dict]:
0ad36b7… noreply 51 """Backward-compatible read access to relationships."""
0ad36b7… noreply 52 return self._store.get_all_relationships()
321f2f5… leo 53
321f2f5… leo 54 def _chat(self, prompt: str, temperature: float = 0.3) -> str:
321f2f5… leo 55 """Send a chat message through ProviderManager (or return empty if none)."""
321f2f5… leo 56 if not self.pm:
321f2f5… leo 57 return ""
321f2f5… leo 58 return self.pm.chat(
321f2f5… leo 59 [{"role": "user", "content": prompt}],
321f2f5… leo 60 max_tokens=4096,
321f2f5… leo 61 temperature=temperature,
321f2f5… leo 62 )
321f2f5… leo 63
829e24a… leo 64 def extract_entities_and_relationships(
829e24a… leo 65 self, text: str
829e24a… leo 66 ) -> tuple[List[Entity], List[Relationship]]:
287a3bb… leo 67 """Extract entities and relationships in a single LLM call."""
321f2f5… leo 68 prompt = (
287a3bb… leo 69 "Extract all notable entities and relationships from the following content.\n\n"
321f2f5… leo 70 f"CONTENT:\n{text}\n\n"
287a3bb… leo 71 "Return a JSON object with two keys:\n"
829e24a… leo 72 '- "entities": array of {"name": "...", '
829e24a… leo 73 '"type": "person|concept|technology|organization|time", '
829e24a… leo 74 '"description": "brief description"}\n'
829e24a… leo 75 '- "relationships": array of {"source": "entity name", '
829e24a… leo 76 '"target": "entity name", '
829e24a… leo 77 '"type": "relationship description"}\n\n'
287a3bb… leo 78 "Return ONLY the JSON object."
321f2f5… leo 79 )
321f2f5… leo 80 raw = self._chat(prompt)
321f2f5… leo 81 parsed = parse_json_from_response(raw)
321f2f5… leo 82
321f2f5… leo 83 entities = []
287a3bb… leo 84 rels = []
287a3bb… leo 85
287a3bb… leo 86 if isinstance(parsed, dict):
287a3bb… leo 87 for item in parsed.get("entities", []):
287a3bb… leo 88 if isinstance(item, dict) and "name" in item:
829e24a… leo 89 entities.append(
829e24a… leo 90 Entity(
829e24a… leo 91 name=item["name"],
829e24a… leo 92 type=item.get("type", "concept"),
829e24a… leo 93 descriptions=[item["description"]] if item.get("description") else [],
829e24a… leo 94 )
829e24a… leo 95 )
829e24a… leo 96 {e.name for e in entities}
287a3bb… leo 97 for item in parsed.get("relationships", []):
287a3bb… leo 98 if isinstance(item, dict) and "source" in item and "target" in item:
829e24a… leo 99 rels.append(
829e24a… leo 100 Relationship(
829e24a… leo 101 source=item["source"],
829e24a… leo 102 target=item["target"],
829e24a… leo 103 type=item.get("type", "related_to"),
829e24a… leo 104 )
829e24a… leo 105 )
287a3bb… leo 106 elif isinstance(parsed, list):
287a3bb… leo 107 # Fallback: if model returns a flat entity list
321f2f5… leo 108 for item in parsed:
321f2f5… leo 109 if isinstance(item, dict) and "name" in item:
829e24a… leo 110 entities.append(
829e24a… leo 111 Entity(
829e24a… leo 112 name=item["name"],
829e24a… leo 113 type=item.get("type", "concept"),
829e24a… leo 114 descriptions=[item["description"]] if item.get("description") else [],
829e24a… leo 115 )
829e24a… leo 116 )
287a3bb… leo 117
287a3bb… leo 118 return entities, rels
287a3bb… leo 119
0981a08… noreply 120 def add_content(
0981a08… noreply 121 self,
0981a08… noreply 122 text: str,
0981a08… noreply 123 source: str,
0981a08… noreply 124 timestamp: Optional[float] = None,
0981a08… noreply 125 source_id: Optional[str] = None,
0981a08… noreply 126 ) -> None:
321f2f5… leo 127 """Add content to knowledge graph by extracting entities and relationships."""
287a3bb… leo 128 entities, relationships = self.extract_entities_and_relationships(text)
287a3bb… leo 129
0ad36b7… noreply 130 snippet = text[:100] + "..." if len(text) > 100 else text
0ad36b7… noreply 131
321f2f5… leo 132 for entity in entities:
0ad36b7… noreply 133 self._store.merge_entity(entity.name, entity.type, entity.descriptions, source=source)
0ad36b7… noreply 134 self._store.add_occurrence(entity.name, source, timestamp, snippet)
0981a08… noreply 135 if source_id:
0981a08… noreply 136 self._store.add_source_location(
0981a08… noreply 137 source_id,
0981a08… noreply 138 entity_name_lower=entity.name.lower(),
0981a08… noreply 139 timestamp=timestamp,
0981a08… noreply 140 text_snippet=snippet,
0981a08… noreply 141 )
321f2f5… leo 142
321f2f5… leo 143 for rel in relationships:
0ad36b7… noreply 144 if self._store.has_entity(rel.source) and self._store.has_entity(rel.target):
0ad36b7… noreply 145 self._store.add_relationship(
0ad36b7… noreply 146 rel.source,
0ad36b7… noreply 147 rel.target,
0ad36b7… noreply 148 rel.type,
0ad36b7… noreply 149 content_source=source,
0ad36b7… noreply 150 timestamp=timestamp,
829e24a… leo 151 )
287a3bb… leo 152
287a3bb… leo 153 def process_transcript(self, transcript: Dict, batch_size: int = 10) -> None:
287a3bb… leo 154 """Process transcript segments into knowledge graph, batching for efficiency."""
321f2f5… leo 155 if "segments" not in transcript:
321f2f5… leo 156 logger.warning("Transcript missing segments")
321f2f5… leo 157 return
321f2f5… leo 158
287a3bb… leo 159 segments = transcript["segments"]
287a3bb… leo 160
287a3bb… leo 161 # Register speakers first
287a3bb… leo 162 for i, segment in enumerate(segments):
287a3bb… leo 163 speaker = segment.get("speaker", None)
0ad36b7… noreply 164 if speaker and not self._store.has_entity(speaker):
0ad36b7… noreply 165 self._store.merge_entity(speaker, "person", ["Speaker in transcript"])
287a3bb… leo 166
287a3bb… leo 167 # Batch segments together for fewer API calls
287a3bb… leo 168 batches = []
287a3bb… leo 169 for start in range(0, len(segments), batch_size):
829e24a… leo 170 batches.append(segments[start : start + batch_size])
287a3bb… leo 171
287a3bb… leo 172 for batch in tqdm(batches, desc="Building knowledge graph", unit="batch"):
287a3bb… leo 173 # Combine batch text
829e24a… leo 174 combined_text = " ".join(seg["text"] for seg in batch if "text" in seg)
287a3bb… leo 175 if not combined_text.strip():
287a3bb… leo 176 continue
287a3bb… leo 177
287a3bb… leo 178 # Use first segment's timestamp as batch timestamp
287a3bb… leo 179 batch_start_idx = segments.index(batch[0])
287a3bb… leo 180 timestamp = batch[0].get("start", None)
287a3bb… leo 181 source = f"transcript_batch_{batch_start_idx}"
287a3bb… leo 182
287a3bb… leo 183 self.add_content(combined_text, source, timestamp)
321f2f5… leo 184
321f2f5… leo 185 def process_diagrams(self, diagrams: List[Dict]) -> None:
321f2f5… leo 186 """Process diagram results into knowledge graph."""
287a3bb… leo 187 for i, diagram in enumerate(tqdm(diagrams, desc="Processing diagrams for KG", unit="diag")):
321f2f5… leo 188 text_content = diagram.get("text_content", "")
0ad36b7… noreply 189 source = f"diagram_{i}"
321f2f5… leo 190 if text_content:
321f2f5… leo 191 self.add_content(text_content, source)
321f2f5… leo 192
321f2f5… leo 193 diagram_id = f"diagram_{i}"
0ad36b7… noreply 194 if not self._store.has_entity(diagram_id):
0ad36b7… noreply 195 self._store.merge_entity(diagram_id, "diagram", ["Visual diagram from video"])
0ad36b7… noreply 196 self._store.add_occurrence(
0ad36b7… noreply 197 diagram_id,
0ad36b7… noreply 198 source if text_content else diagram_id,
0ad36b7… noreply 199 text=f"frame_index={diagram.get('frame_index')}",
2a1b11a… noreply 200 )
2a1b11a… noreply 201
2a1b11a… noreply 202 def process_screenshots(self, screenshots: List[Dict]) -> None:
2a1b11a… noreply 203 """Process screenshot captures into knowledge graph.
2a1b11a… noreply 204
2a1b11a… noreply 205 Extracts entities from text_content and adds screenshot-specific
2a1b11a… noreply 206 entities from the entities list.
2a1b11a… noreply 207 """
2a1b11a… noreply 208 for i, capture in enumerate(screenshots):
2a1b11a… noreply 209 text_content = capture.get("text_content", "")
2a1b11a… noreply 210 source = f"screenshot_{i}"
2a1b11a… noreply 211 content_type = capture.get("content_type", "screenshot")
2a1b11a… noreply 212
2a1b11a… noreply 213 # Extract entities from visible text via LLM
2a1b11a… noreply 214 if text_content:
2a1b11a… noreply 215 self.add_content(text_content, source)
2a1b11a… noreply 216
2a1b11a… noreply 217 # Add explicitly identified entities from vision extraction
2a1b11a… noreply 218 for entity_name in capture.get("entities", []):
2a1b11a… noreply 219 if not entity_name or len(entity_name) < 2:
2a1b11a… noreply 220 continue
2a1b11a… noreply 221 if not self._store.has_entity(entity_name):
2a1b11a… noreply 222 self._store.merge_entity(
2a1b11a… noreply 223 entity_name,
2a1b11a… noreply 224 "concept",
2a1b11a… noreply 225 [f"Identified in {content_type} screenshot"],
2a1b11a… noreply 226 source=source,
2a1b11a… noreply 227 )
2a1b11a… noreply 228 self._store.add_occurrence(
2a1b11a… noreply 229 entity_name,
2a1b11a… noreply 230 source,
2a1b11a… noreply 231 text=f"Visible in {content_type} (frame {capture.get('frame_index', '?')})",
0981a08… noreply 232 )
0981a08… noreply 233
321f2f5… leo 234 def to_data(self) -> KnowledgeGraphData:
321f2f5… leo 235 """Convert to pydantic KnowledgeGraphData model."""
321f2f5… leo 236 nodes = []
0ad36b7… noreply 237 for entity in self._store.get_all_entities():
0ad36b7… noreply 238 descs = entity.get("descriptions", [])
321f2f5… leo 239 if isinstance(descs, set):
321f2f5… leo 240 descs = list(descs)
829e24a… leo 241 nodes.append(
829e24a… leo 242 Entity(
0ad36b7… noreply 243 name=entity["name"],
0ad36b7… noreply 244 type=entity.get("type", "concept"),
829e24a… leo 245 descriptions=descs,
0ad36b7… noreply 246 occurrences=entity.get("occurrences", []),
829e24a… leo 247 )
829e24a… leo 248 )
321f2f5… leo 249
321f2f5… leo 250 rels = [
321f2f5… leo 251 Relationship(
321f2f5… leo 252 source=r["source"],
321f2f5… leo 253 target=r["target"],
321f2f5… leo 254 type=r.get("type", "related_to"),
321f2f5… leo 255 content_source=r.get("content_source"),
321f2f5… leo 256 timestamp=r.get("timestamp"),
321f2f5… leo 257 )
0ad36b7… noreply 258 for r in self._store.get_all_relationships()
321f2f5… leo 259 ]
0981a08… noreply 260
0981a08… noreply 261 sources = [SourceRecord(**s) for s in self._store.get_sources()]
0981a08… noreply 262
0981a08… noreply 263 return KnowledgeGraphData(nodes=nodes, relationships=rels, sources=sources)
321f2f5… leo 264
321f2f5… leo 265 def to_dict(self) -> Dict:
321f2f5… leo 266 """Convert knowledge graph to dictionary (backward-compatible)."""
0ad36b7… noreply 267 return self._store.to_dict()
321f2f5… leo 268
321f2f5… leo 269 def save(self, output_path: Union[str, Path]) -> Path:
0981a08… noreply 270 """Save knowledge graph. Defaults to .db (SQLite), also supports .json."""
321f2f5… leo 271 output_path = Path(output_path)
321f2f5… leo 272 if not output_path.suffix:
0981a08… noreply 273 output_path = output_path.with_suffix(".db")
321f2f5… leo 274 output_path.parent.mkdir(parents=True, exist_ok=True)
321f2f5… leo 275
0981a08… noreply 276 if output_path.suffix == ".json":
0981a08… noreply 277 data = self.to_data()
0981a08… noreply 278 output_path.write_text(data.model_dump_json(indent=2))
0981a08… noreply 279 elif output_path.suffix == ".db":
0981a08… noreply 280 # If the backing store is already SQLite at this path, it's already persisted.
0981a08… noreply 281 # Otherwise, create a new SQLite store and copy data into it.
0981a08… noreply 282 from video_processor.integrators.graph_store import SQLiteStore
0981a08… noreply 283
0981a08… noreply 284 if not isinstance(self._store, SQLiteStore) or self._store._db_path != str(output_path):
0981a08… noreply 285 target = SQLiteStore(output_path)
0981a08… noreply 286 for source in self._store.get_sources():
0981a08… noreply 287 target.register_source(source)
0981a08… noreply 288 for entity in self._store.get_all_entities():
0981a08… noreply 289 descs = entity.get("descriptions", [])
0981a08… noreply 290 if isinstance(descs, set):
0981a08… noreply 291 descs = list(descs)
0981a08… noreply 292 target.merge_entity(
0981a08… noreply 293 entity["name"],
0981a08… noreply 294 entity.get("type", "concept"),
0981a08… noreply 295 descs,
0981a08… noreply 296 source=entity.get("source"),
0981a08… noreply 297 )
0981a08… noreply 298 for occ in entity.get("occurrences", []):
0981a08… noreply 299 target.add_occurrence(
0981a08… noreply 300 entity["name"],
0981a08… noreply 301 occ.get("source", ""),
0981a08… noreply 302 occ.get("timestamp"),
0981a08… noreply 303 occ.get("text"),
0981a08… noreply 304 )
0981a08… noreply 305 for rel in self._store.get_all_relationships():
0981a08… noreply 306 target.add_relationship(
0981a08… noreply 307 rel.get("source", ""),
0981a08… noreply 308 rel.get("target", ""),
0981a08… noreply 309 rel.get("type", "related_to"),
0981a08… noreply 310 content_source=rel.get("content_source"),
0981a08… noreply 311 timestamp=rel.get("timestamp"),
0981a08… noreply 312 )
0981a08… noreply 313 target.close()
0981a08… noreply 314 else:
0981a08… noreply 315 # Unknown suffix — fall back to JSON
0981a08… noreply 316 data = self.to_data()
0981a08… noreply 317 output_path.write_text(data.model_dump_json(indent=2))
0981a08… noreply 318
321f2f5… leo 319 logger.info(
0ad36b7… noreply 320 f"Saved knowledge graph with {self._store.get_entity_count()} nodes "
0ad36b7… noreply 321 f"and {self._store.get_relationship_count()} relationships to {output_path}"
321f2f5… leo 322 )
321f2f5… leo 323 return output_path
321f2f5… leo 324
321f2f5… leo 325 @classmethod
0ad36b7… noreply 326 def from_dict(cls, data: Dict, db_path: Optional[Path] = None) -> "KnowledgeGraph":
321f2f5… leo 327 """Reconstruct a KnowledgeGraph from saved JSON dict."""
0ad36b7… noreply 328 kg = cls(db_path=db_path)
0981a08… noreply 329 for source in data.get("sources", []):
0981a08… noreply 330 kg._store.register_source(source)
321f2f5… leo 331 for node in data.get("nodes", []):
0ad36b7… noreply 332 name = node.get("name", node.get("id", ""))
321f2f5… leo 333 descs = node.get("descriptions", [])
0ad36b7… noreply 334 if isinstance(descs, set):
0ad36b7… noreply 335 descs = list(descs)
0ad36b7… noreply 336 kg._store.merge_entity(
0ad36b7… noreply 337 name, node.get("type", "concept"), descs, source=node.get("source")
0ad36b7… noreply 338 )
0ad36b7… noreply 339 for occ in node.get("occurrences", []):
0ad36b7… noreply 340 kg._store.add_occurrence(
0ad36b7… noreply 341 name,
0ad36b7… noreply 342 occ.get("source", ""),
0ad36b7… noreply 343 occ.get("timestamp"),
0ad36b7… noreply 344 occ.get("text"),
0ad36b7… noreply 345 )
0ad36b7… noreply 346 for rel in data.get("relationships", []):
0ad36b7… noreply 347 kg._store.add_relationship(
0ad36b7… noreply 348 rel.get("source", ""),
0ad36b7… noreply 349 rel.get("target", ""),
0ad36b7… noreply 350 rel.get("type", "related_to"),
0ad36b7… noreply 351 content_source=rel.get("content_source"),
0ad36b7… noreply 352 timestamp=rel.get("timestamp"),
0ad36b7… noreply 353 )
321f2f5… leo 354 return kg
321f2f5… leo 355
0981a08… noreply 356 # Type specificity ranking for conflict resolution during merge.
0981a08… noreply 357 # Higher rank = more specific type wins when two entities match.
0981a08… noreply 358 _TYPE_SPECIFICITY = {
0981a08… noreply 359 "concept": 0,
0981a08… noreply 360 "time": 1,
0981a08… noreply 361 "diagram": 1,
0981a08… noreply 362 "organization": 2,
0981a08… noreply 363 "person": 3,
0981a08… noreply 364 "technology": 3,
0981a08… noreply 365 }
0981a08… noreply 366
0981a08… noreply 367 @staticmethod
0981a08… noreply 368 def _fuzzy_match(name_a: str, name_b: str, threshold: float = 0.85) -> bool:
0981a08… noreply 369 """Return True if two names are similar enough to be considered the same entity."""
0981a08… noreply 370 from difflib import SequenceMatcher
0981a08… noreply 371
0981a08… noreply 372 return SequenceMatcher(None, name_a.lower(), name_b.lower()).ratio() >= threshold
0981a08… noreply 373
0981a08… noreply 374 def _more_specific_type(self, type_a: str, type_b: str) -> str:
0981a08… noreply 375 """Return the more specific of two entity types."""
0981a08… noreply 376 rank_a = self._TYPE_SPECIFICITY.get(type_a, 1)
0981a08… noreply 377 rank_b = self._TYPE_SPECIFICITY.get(type_b, 1)
0981a08… noreply 378 return type_a if rank_a >= rank_b else type_b
0981a08… noreply 379
321f2f5… leo 380 def merge(self, other: "KnowledgeGraph") -> None:
0981a08… noreply 381 """Merge another KnowledgeGraph into this one.
0981a08… noreply 382
0981a08… noreply 383 Improvements over naive merge:
0981a08… noreply 384 - Fuzzy name matching (SequenceMatcher >= 0.85) to unify near-duplicate entities
0981a08… noreply 385 - Type conflict resolution: prefer more specific types (e.g. technology > concept)
0981a08… noreply 386 - Provenance: merged entities get a ``merged_from`` description entry
0981a08… noreply 387 """
0981a08… noreply 388 for source in other._store.get_sources():
0981a08… noreply 389 self._store.register_source(source)
0981a08… noreply 390
0981a08… noreply 391 # Build a lookup of existing entity names for fuzzy matching
0981a08… noreply 392 existing_entities = self._store.get_all_entities()
0981a08… noreply 393 existing_names = {e["name"]: e for e in existing_entities}
0981a08… noreply 394 # Cache lowercase -> canonical name for fast lookup
0981a08… noreply 395 name_index: dict[str, str] = {n.lower(): n for n in existing_names}
0981a08… noreply 396
0ad36b7… noreply 397 for entity in other._store.get_all_entities():
0981a08… noreply 398 incoming_name = entity["name"]
0ad36b7… noreply 399 descs = entity.get("descriptions", [])
0ad36b7… noreply 400 if isinstance(descs, set):
0ad36b7… noreply 401 descs = list(descs)
0981a08… noreply 402 incoming_type = entity.get("type", "concept")
0981a08… noreply 403
0981a08… noreply 404 # Try exact match first (case-insensitive), then fuzzy
0981a08… noreply 405 matched_name: Optional[str] = None
0981a08… noreply 406 if incoming_name.lower() in name_index:
0981a08… noreply 407 matched_name = name_index[incoming_name.lower()]
0981a08… noreply 408 else:
0981a08… noreply 409 for existing_name in existing_names:
0981a08… noreply 410 if self._fuzzy_match(incoming_name, existing_name):
0981a08… noreply 411 matched_name = existing_name
0981a08… noreply 412 break
0981a08… noreply 413
0981a08… noreply 414 if matched_name is not None:
0981a08… noreply 415 # Resolve type conflict
0981a08… noreply 416 existing_type = existing_names[matched_name].get("type", "concept")
0981a08… noreply 417 resolved_type = self._more_specific_type(existing_type, incoming_type)
0981a08… noreply 418
0981a08… noreply 419 # Add merge provenance
0981a08… noreply 420 merge_note = f"merged_from:{incoming_name}"
0981a08… noreply 421 merged_descs = descs if incoming_name == matched_name else descs + [merge_note]
0981a08… noreply 422
0981a08… noreply 423 self._store.merge_entity(
0981a08… noreply 424 matched_name, resolved_type, merged_descs, source=entity.get("source")
0981a08… noreply 425 )
0981a08… noreply 426 target_name = matched_name
0981a08… noreply 427 else:
0981a08… noreply 428 self._store.merge_entity(
0981a08… noreply 429 incoming_name, incoming_type, descs, source=entity.get("source")
0981a08… noreply 430 )
0981a08… noreply 431 # Update indexes for subsequent fuzzy matches within this merge
0981a08… noreply 432 existing_names[incoming_name] = entity
0981a08… noreply 433 name_index[incoming_name.lower()] = incoming_name
0981a08… noreply 434 target_name = incoming_name
0981a08… noreply 435
0ad36b7… noreply 436 for occ in entity.get("occurrences", []):
0ad36b7… noreply 437 self._store.add_occurrence(
0981a08… noreply 438 target_name,
0ad36b7… noreply 439 occ.get("source", ""),
0ad36b7… noreply 440 occ.get("timestamp"),
0ad36b7… noreply 441 occ.get("text"),
0ad36b7… noreply 442 )
0ad36b7… noreply 443
0ad36b7… noreply 444 for rel in other._store.get_all_relationships():
0ad36b7… noreply 445 self._store.add_relationship(
0ad36b7… noreply 446 rel.get("source", ""),
0ad36b7… noreply 447 rel.get("target", ""),
0ad36b7… noreply 448 rel.get("type", "related_to"),
0ad36b7… noreply 449 content_source=rel.get("content_source"),
0ad36b7… noreply 450 timestamp=rel.get("timestamp"),
0ad36b7… noreply 451 )
0981a08… noreply 452
0981a08… noreply 453 def classify_for_planning(self):
0981a08… noreply 454 """Classify entities in this knowledge graph into planning taxonomy types."""
0981a08… noreply 455 from video_processor.integrators.taxonomy import TaxonomyClassifier
0981a08… noreply 456
0981a08… noreply 457 classifier = TaxonomyClassifier(provider_manager=self.pm)
0981a08… noreply 458 entities = self._store.get_all_entities()
0981a08… noreply 459 relationships = self._store.get_all_relationships()
0981a08… noreply 460 return classifier.classify_entities(entities, relationships)
321f2f5… leo 461
321f2f5… leo 462 def generate_mermaid(self, max_nodes: int = 30) -> str:
321f2f5… leo 463 """Generate Mermaid visualization code."""
0ad36b7… noreply 464 nodes = self.nodes
0ad36b7… noreply 465 rels = self.relationships
0ad36b7… noreply 466
321f2f5… leo 467 node_importance = {}
0ad36b7… noreply 468 for node_id in nodes:
0ad36b7… noreply 469 count = sum(1 for rel in rels if rel["source"] == node_id or rel["target"] == node_id)
321f2f5… leo 470 node_importance[node_id] = count
321f2f5… leo 471
321f2f5… leo 472 important = sorted(node_importance.items(), key=lambda x: x[1], reverse=True)
321f2f5… leo 473 important_ids = [n[0] for n in important[:max_nodes]]
321f2f5… leo 474
321f2f5… leo 475 mermaid = ["graph LR"]
321f2f5… leo 476
321f2f5… leo 477 for nid in important_ids:
0ad36b7… noreply 478 node = nodes[nid]
321f2f5… leo 479 ntype = node.get("type", "concept")
321f2f5… leo 480 # Sanitize id for mermaid (alphanumeric + underscore only)
321f2f5… leo 481 safe_id = "".join(c if c.isalnum() or c == "_" else "_" for c in nid)
321f2f5… leo 482 safe_name = node["name"].replace('"', "'")
321f2f5… leo 483 mermaid.append(f' {safe_id}["{safe_name}"]:::{ntype}')
321f2f5… leo 484
321f2f5… leo 485 added = set()
0ad36b7… noreply 486 for rel in rels:
321f2f5… leo 487 src, tgt = rel["source"], rel["target"]
321f2f5… leo 488 if src in important_ids and tgt in important_ids:
321f2f5… leo 489 rtype = rel.get("type", "related_to")
321f2f5… leo 490 key = f"{src}|{tgt}|{rtype}"
321f2f5… leo 491 if key not in added:
321f2f5… leo 492 safe_src = "".join(c if c.isalnum() or c == "_" else "_" for c in src)
321f2f5… leo 493 safe_tgt = "".join(c if c.isalnum() or c == "_" else "_" for c in tgt)
321f2f5… leo 494 mermaid.append(f' {safe_src} -- "{rtype}" --> {safe_tgt}')
321f2f5… leo 495 added.add(key)
321f2f5… leo 496
321f2f5… leo 497 mermaid.append(" classDef person fill:#f9d5e5,stroke:#333,stroke-width:1px")
321f2f5… leo 498 mermaid.append(" classDef concept fill:#eeeeee,stroke:#333,stroke-width:1px")
321f2f5… leo 499 mermaid.append(" classDef diagram fill:#d5f9e5,stroke:#333,stroke-width:1px")
321f2f5… leo 500 mermaid.append(" classDef time fill:#e5d5f9,stroke:#333,stroke-width:1px")
321f2f5… leo 501
321f2f5… leo 502 return "\n".join(mermaid)

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button