PlanOpticon

planopticon / video_processor / integrators / graph_query.py
Source Blame History 601 lines
b363c5b… noreply 1 """Query engine for PlanOpticon knowledge graphs."""
b363c5b… noreply 2
b363c5b… noreply 3 import json
b363c5b… noreply 4 import logging
b363c5b… noreply 5 from dataclasses import dataclass
b363c5b… noreply 6 from pathlib import Path
b363c5b… noreply 7 from typing import Any, Dict, Optional
b363c5b… noreply 8
b363c5b… noreply 9 from video_processor.integrators.graph_store import (
b363c5b… noreply 10 GraphStore,
b363c5b… noreply 11 InMemoryStore,
b363c5b… noreply 12 create_store,
b363c5b… noreply 13 )
b363c5b… noreply 14
b363c5b… noreply 15 logger = logging.getLogger(__name__)
b363c5b… noreply 16
b363c5b… noreply 17
b363c5b… noreply 18 @dataclass
b363c5b… noreply 19 class QueryResult:
b363c5b… noreply 20 """Uniform wrapper for query results."""
b363c5b… noreply 21
b363c5b… noreply 22 data: Any
b363c5b… noreply 23 query_type: str # "cypher", "filter", "agentic"
b363c5b… noreply 24 raw_query: str = ""
b363c5b… noreply 25 explanation: str = ""
b363c5b… noreply 26
b363c5b… noreply 27 def to_text(self) -> str:
b363c5b… noreply 28 """Human-readable text output."""
b363c5b… noreply 29 lines = []
b363c5b… noreply 30 if self.explanation:
b363c5b… noreply 31 lines.append(self.explanation)
b363c5b… noreply 32 lines.append("")
b363c5b… noreply 33
b363c5b… noreply 34 if isinstance(self.data, dict):
b363c5b… noreply 35 # Stats or single entity
b363c5b… noreply 36 for key, value in self.data.items():
b363c5b… noreply 37 if isinstance(value, dict):
b363c5b… noreply 38 lines.append(f"{key}:")
b363c5b… noreply 39 for k, v in value.items():
b363c5b… noreply 40 lines.append(f" {k}: {v}")
b363c5b… noreply 41 else:
b363c5b… noreply 42 lines.append(f"{key}: {value}")
b363c5b… noreply 43 elif isinstance(self.data, list):
b363c5b… noreply 44 if not self.data:
b363c5b… noreply 45 lines.append("No results found.")
b363c5b… noreply 46 for item in self.data:
b363c5b… noreply 47 if isinstance(item, dict):
b363c5b… noreply 48 if "source" in item and "target" in item:
b363c5b… noreply 49 rtype = item.get("type", "related_to")
b363c5b… noreply 50 lines.append(f" {item['source']} --[{rtype}]--> {item['target']}")
b363c5b… noreply 51 elif item.get("name") and item.get("type"):
b363c5b… noreply 52 descs = item.get("descriptions", [])
b363c5b… noreply 53 if isinstance(descs, set):
b363c5b… noreply 54 descs = list(descs)
b363c5b… noreply 55 desc_str = "; ".join(descs[:3]) if descs else ""
b363c5b… noreply 56 line = f" [{item['type']}] {item['name']}"
b363c5b… noreply 57 if desc_str:
b363c5b… noreply 58 line += f" — {desc_str}"
b363c5b… noreply 59 lines.append(line)
b363c5b… noreply 60 else:
b363c5b… noreply 61 lines.append(f" {item}")
b363c5b… noreply 62 else:
b363c5b… noreply 63 lines.append(f" {item}")
b363c5b… noreply 64 else:
b363c5b… noreply 65 lines.append(str(self.data))
b363c5b… noreply 66
b363c5b… noreply 67 return "\n".join(lines)
b363c5b… noreply 68
b363c5b… noreply 69 def to_json(self) -> str:
b363c5b… noreply 70 """JSON string output."""
b363c5b… noreply 71 payload = {
b363c5b… noreply 72 "query_type": self.query_type,
b363c5b… noreply 73 "raw_query": self.raw_query,
b363c5b… noreply 74 "explanation": self.explanation,
b363c5b… noreply 75 "data": self.data,
b363c5b… noreply 76 }
b363c5b… noreply 77 return json.dumps(payload, indent=2, default=str)
b363c5b… noreply 78
b363c5b… noreply 79 def to_mermaid(self) -> str:
b363c5b… noreply 80 """Mermaid diagram output from result data."""
b363c5b… noreply 81 lines = ["graph LR"]
b363c5b… noreply 82 seen_nodes = set()
b363c5b… noreply 83 edges = []
b363c5b… noreply 84
b363c5b… noreply 85 items = self.data if isinstance(self.data, list) else [self.data]
b363c5b… noreply 86
b363c5b… noreply 87 for item in items:
b363c5b… noreply 88 if not isinstance(item, dict):
b363c5b… noreply 89 continue
b363c5b… noreply 90 # Entity node
b363c5b… noreply 91 if "name" in item and "type" in item:
b363c5b… noreply 92 name = item["name"]
b363c5b… noreply 93 if name not in seen_nodes:
b363c5b… noreply 94 safe_id = _mermaid_id(name)
b363c5b… noreply 95 safe_name = name.replace('"', "'")
b363c5b… noreply 96 ntype = item.get("type", "concept")
b363c5b… noreply 97 lines.append(f' {safe_id}["{safe_name}"]:::{ntype}')
b363c5b… noreply 98 seen_nodes.add(name)
b363c5b… noreply 99 # Relationship edge
b363c5b… noreply 100 if "source" in item and "target" in item:
b363c5b… noreply 101 src = item["source"]
b363c5b… noreply 102 tgt = item["target"]
b363c5b… noreply 103 rtype = item.get("type", "related_to")
b363c5b… noreply 104 for n in (src, tgt):
b363c5b… noreply 105 if n not in seen_nodes:
b363c5b… noreply 106 safe_id = _mermaid_id(n)
b363c5b… noreply 107 lines.append(f' {safe_id}["{n.replace(chr(34), chr(39))}"]')
b363c5b… noreply 108 seen_nodes.add(n)
b363c5b… noreply 109 edges.append((src, tgt, rtype))
b363c5b… noreply 110
b363c5b… noreply 111 for src, tgt, rtype in edges:
b363c5b… noreply 112 lines.append(f' {_mermaid_id(src)} -- "{rtype}" --> {_mermaid_id(tgt)}')
b363c5b… noreply 113
b363c5b… noreply 114 lines.append(" classDef person fill:#f9d5e5,stroke:#333")
b363c5b… noreply 115 lines.append(" classDef concept fill:#eeeeee,stroke:#333")
b363c5b… noreply 116 lines.append(" classDef technology fill:#d5e5f9,stroke:#333")
b363c5b… noreply 117 lines.append(" classDef organization fill:#f9e5d5,stroke:#333")
b363c5b… noreply 118
b363c5b… noreply 119 return "\n".join(lines)
b363c5b… noreply 120
b363c5b… noreply 121
b363c5b… noreply 122 def _mermaid_id(name: str) -> str:
b363c5b… noreply 123 return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
b363c5b… noreply 124
b363c5b… noreply 125
b363c5b… noreply 126 class GraphQueryEngine:
b363c5b… noreply 127 """Query engine with direct (no-LLM) and agentic (LLM) modes."""
b363c5b… noreply 128
b363c5b… noreply 129 def __init__(self, store: GraphStore, provider_manager=None):
b363c5b… noreply 130 self.store = store
b363c5b… noreply 131 self.pm = provider_manager
b363c5b… noreply 132
b363c5b… noreply 133 @classmethod
b363c5b… noreply 134 def from_db_path(cls, path: Path, provider_manager=None) -> "GraphQueryEngine":
b363c5b… noreply 135 """Open a .db file and create a query engine."""
b363c5b… noreply 136 store = create_store(path)
b363c5b… noreply 137 return cls(store, provider_manager)
b363c5b… noreply 138
b363c5b… noreply 139 @classmethod
b363c5b… noreply 140 def from_json_path(cls, path: Path, provider_manager=None) -> "GraphQueryEngine":
b363c5b… noreply 141 """Load a .json knowledge graph file and create a query engine."""
b363c5b… noreply 142 data = json.loads(Path(path).read_text())
b363c5b… noreply 143 store = InMemoryStore()
b363c5b… noreply 144 for node in data.get("nodes", []):
b363c5b… noreply 145 store.merge_entity(
b363c5b… noreply 146 node.get("name", ""),
b363c5b… noreply 147 node.get("type", "concept"),
b363c5b… noreply 148 node.get("descriptions", []),
b363c5b… noreply 149 )
b363c5b… noreply 150 for occ in node.get("occurrences", []):
b363c5b… noreply 151 store.add_occurrence(
b363c5b… noreply 152 node.get("name", ""),
b363c5b… noreply 153 occ.get("source", ""),
b363c5b… noreply 154 occ.get("timestamp"),
b363c5b… noreply 155 occ.get("text"),
b363c5b… noreply 156 )
b363c5b… noreply 157 for rel in data.get("relationships", []):
b363c5b… noreply 158 store.add_relationship(
b363c5b… noreply 159 rel.get("source", ""),
b363c5b… noreply 160 rel.get("target", ""),
b363c5b… noreply 161 rel.get("type", "related_to"),
b363c5b… noreply 162 content_source=rel.get("content_source"),
b363c5b… noreply 163 timestamp=rel.get("timestamp"),
b363c5b… noreply 164 )
b363c5b… noreply 165 return cls(store, provider_manager)
b363c5b… noreply 166
b363c5b… noreply 167 # ── Direct mode methods (no LLM required) ──
b363c5b… noreply 168
b363c5b… noreply 169 def entities(
b363c5b… noreply 170 self,
b363c5b… noreply 171 name: Optional[str] = None,
b363c5b… noreply 172 entity_type: Optional[str] = None,
b363c5b… noreply 173 limit: int = 50,
b363c5b… noreply 174 ) -> QueryResult:
b363c5b… noreply 175 """Filter entities by name substring and/or type."""
b363c5b… noreply 176 all_entities = self.store.get_all_entities()
b363c5b… noreply 177 results = []
b363c5b… noreply 178 for e in all_entities:
b363c5b… noreply 179 if name and name.lower() not in e.get("name", "").lower():
b363c5b… noreply 180 continue
b363c5b… noreply 181 if entity_type and entity_type.lower() != e.get("type", "").lower():
b363c5b… noreply 182 continue
b363c5b… noreply 183 results.append(e)
b363c5b… noreply 184 if len(results) >= limit:
b363c5b… noreply 185 break
b363c5b… noreply 186
b363c5b… noreply 187 raw = f"entities(name={name!r}, entity_type={entity_type!r}, limit={limit})"
b363c5b… noreply 188 return QueryResult(
b363c5b… noreply 189 data=results,
b363c5b… noreply 190 query_type="filter",
b363c5b… noreply 191 raw_query=raw,
b363c5b… noreply 192 explanation=f"Found {len(results)} entities",
b363c5b… noreply 193 )
b363c5b… noreply 194
b363c5b… noreply 195 def relationships(
b363c5b… noreply 196 self,
b363c5b… noreply 197 source: Optional[str] = None,
b363c5b… noreply 198 target: Optional[str] = None,
b363c5b… noreply 199 rel_type: Optional[str] = None,
b363c5b… noreply 200 limit: int = 50,
b363c5b… noreply 201 ) -> QueryResult:
b363c5b… noreply 202 """Filter relationships by source, target, and/or type."""
b363c5b… noreply 203 all_rels = self.store.get_all_relationships()
b363c5b… noreply 204 results = []
b363c5b… noreply 205 for r in all_rels:
b363c5b… noreply 206 if source and source.lower() not in r.get("source", "").lower():
b363c5b… noreply 207 continue
b363c5b… noreply 208 if target and target.lower() not in r.get("target", "").lower():
b363c5b… noreply 209 continue
b363c5b… noreply 210 if rel_type and rel_type.lower() not in r.get("type", "").lower():
b363c5b… noreply 211 continue
b363c5b… noreply 212 results.append(r)
b363c5b… noreply 213 if len(results) >= limit:
b363c5b… noreply 214 break
b363c5b… noreply 215
b363c5b… noreply 216 raw = f"relationships(source={source!r}, target={target!r}, rel_type={rel_type!r})"
b363c5b… noreply 217 return QueryResult(
b363c5b… noreply 218 data=results,
b363c5b… noreply 219 query_type="filter",
b363c5b… noreply 220 raw_query=raw,
b363c5b… noreply 221 explanation=f"Found {len(results)} relationships",
b363c5b… noreply 222 )
b363c5b… noreply 223
b363c5b… noreply 224 def neighbors(self, entity_name: str, depth: int = 1) -> QueryResult:
b363c5b… noreply 225 """Get an entity and its connected nodes (up to *depth* hops)."""
b363c5b… noreply 226 entity = self.store.get_entity(entity_name)
b363c5b… noreply 227 if not entity:
b363c5b… noreply 228 return QueryResult(
b363c5b… noreply 229 data=[],
b363c5b… noreply 230 query_type="filter",
b363c5b… noreply 231 raw_query=f"neighbors({entity_name!r}, depth={depth})",
b363c5b… noreply 232 explanation=f"Entity '{entity_name}' not found",
b363c5b… noreply 233 )
b363c5b… noreply 234
b363c5b… noreply 235 visited = {entity_name.lower()}
b363c5b… noreply 236 result_entities = [entity]
b363c5b… noreply 237 result_rels = []
b363c5b… noreply 238 frontier = {entity_name.lower()}
b363c5b… noreply 239
b363c5b… noreply 240 all_rels = self.store.get_all_relationships()
b363c5b… noreply 241
b363c5b… noreply 242 for _ in range(depth):
b363c5b… noreply 243 next_frontier = set()
b363c5b… noreply 244 for rel in all_rels:
b363c5b… noreply 245 src_lower = rel["source"].lower()
b363c5b… noreply 246 tgt_lower = rel["target"].lower()
b363c5b… noreply 247 if src_lower in frontier or tgt_lower in frontier:
b363c5b… noreply 248 result_rels.append(rel)
b363c5b… noreply 249 for n in (src_lower, tgt_lower):
b363c5b… noreply 250 if n not in visited:
b363c5b… noreply 251 visited.add(n)
b363c5b… noreply 252 next_frontier.add(n)
b363c5b… noreply 253 e = self.store.get_entity(n)
b363c5b… noreply 254 if e:
b363c5b… noreply 255 result_entities.append(e)
b363c5b… noreply 256 frontier = next_frontier
b363c5b… noreply 257
b363c5b… noreply 258 # Combine entities + relationships into output
b363c5b… noreply 259 combined = result_entities + result_rels
b363c5b… noreply 260 return QueryResult(
b363c5b… noreply 261 data=combined,
b363c5b… noreply 262 query_type="filter",
b363c5b… noreply 263 raw_query=f"neighbors({entity_name!r}, depth={depth})",
b363c5b… noreply 264 explanation=(
b363c5b… noreply 265 f"Found {len(result_entities)} entities and {len(result_rels)} relationships"
b363c5b… noreply 266 ),
b363c5b… noreply 267 )
b363c5b… noreply 268
b363c5b… noreply 269 def stats(self) -> QueryResult:
b363c5b… noreply 270 """Return entity count, relationship count, type breakdown."""
b363c5b… noreply 271 all_entities = self.store.get_all_entities()
b363c5b… noreply 272 type_breakdown = {}
b363c5b… noreply 273 for e in all_entities:
b363c5b… noreply 274 t = e.get("type", "concept")
b363c5b… noreply 275 type_breakdown[t] = type_breakdown.get(t, 0) + 1
b363c5b… noreply 276
b363c5b… noreply 277 data = {
b363c5b… noreply 278 "entity_count": self.store.get_entity_count(),
b363c5b… noreply 279 "relationship_count": self.store.get_relationship_count(),
b363c5b… noreply 280 "entity_types": type_breakdown,
b363c5b… noreply 281 }
b363c5b… noreply 282 return QueryResult(
b363c5b… noreply 283 data=data,
b363c5b… noreply 284 query_type="filter",
b363c5b… noreply 285 raw_query="stats()",
b363c5b… noreply 286 explanation="Knowledge graph statistics",
b363c5b… noreply 287 )
b363c5b… noreply 288
0981a08… noreply 289 def sources(self) -> QueryResult:
0981a08… noreply 290 """Return all registered content sources."""
0981a08… noreply 291 all_sources = self.store.get_sources()
0981a08… noreply 292 return QueryResult(
0981a08… noreply 293 data=all_sources,
0981a08… noreply 294 query_type="filter",
0981a08… noreply 295 raw_query="sources()",
0981a08… noreply 296 explanation=f"Found {len(all_sources)} registered sources",
0981a08… noreply 297 )
0981a08… noreply 298
0981a08… noreply 299 def provenance(self, entity_name: str) -> QueryResult:
0981a08… noreply 300 """Return source locations for a given entity."""
0981a08… noreply 301 locations = self.store.get_entity_provenance(entity_name)
0981a08… noreply 302 if not locations:
0981a08… noreply 303 return QueryResult(
0981a08… noreply 304 data=[],
0981a08… noreply 305 query_type="filter",
0981a08… noreply 306 raw_query=f"provenance({entity_name!r})",
0981a08… noreply 307 explanation=f"No provenance records found for '{entity_name}'",
0981a08… noreply 308 )
0981a08… noreply 309 return QueryResult(
0981a08… noreply 310 data=locations,
0981a08… noreply 311 query_type="filter",
0981a08… noreply 312 raw_query=f"provenance({entity_name!r})",
0981a08… noreply 313 explanation=f"Found {len(locations)} provenance records for '{entity_name}'",
0981a08… noreply 314 )
0981a08… noreply 315
4a3c1b4… noreply 316 def shortest_path(self, start: str, end: str, max_depth: int = 6) -> QueryResult:
4a3c1b4… noreply 317 """Find the shortest path between two entities via BFS."""
4a3c1b4… noreply 318 start_entity = self.store.get_entity(start)
4a3c1b4… noreply 319 end_entity = self.store.get_entity(end)
4a3c1b4… noreply 320 if not start_entity:
4a3c1b4… noreply 321 return QueryResult(
4a3c1b4… noreply 322 data=[],
4a3c1b4… noreply 323 query_type="filter",
4a3c1b4… noreply 324 raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4… noreply 325 explanation=f"Entity '{start}' not found",
4a3c1b4… noreply 326 )
4a3c1b4… noreply 327 if not end_entity:
4a3c1b4… noreply 328 return QueryResult(
4a3c1b4… noreply 329 data=[],
4a3c1b4… noreply 330 query_type="filter",
4a3c1b4… noreply 331 raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4… noreply 332 explanation=f"Entity '{end}' not found",
4a3c1b4… noreply 333 )
4a3c1b4… noreply 334
4a3c1b4… noreply 335 all_rels = self.store.get_all_relationships()
4a3c1b4… noreply 336 # Build adjacency list
4a3c1b4… noreply 337 adj: dict[str, list[tuple[str, dict]]] = {}
4a3c1b4… noreply 338 for rel in all_rels:
4a3c1b4… noreply 339 src_l = rel["source"].lower()
4a3c1b4… noreply 340 tgt_l = rel["target"].lower()
4a3c1b4… noreply 341 adj.setdefault(src_l, []).append((tgt_l, rel))
4a3c1b4… noreply 342 adj.setdefault(tgt_l, []).append((src_l, rel))
4a3c1b4… noreply 343
4a3c1b4… noreply 344 # BFS
4a3c1b4… noreply 345 start_l = start.lower()
4a3c1b4… noreply 346 end_l = end.lower()
4a3c1b4… noreply 347 if start_l == end_l:
4a3c1b4… noreply 348 return QueryResult(
4a3c1b4… noreply 349 data=[start_entity],
4a3c1b4… noreply 350 query_type="filter",
4a3c1b4… noreply 351 raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4… noreply 352 explanation="Start and end are the same entity",
4a3c1b4… noreply 353 )
4a3c1b4… noreply 354
4a3c1b4… noreply 355 from collections import deque
4a3c1b4… noreply 356
4a3c1b4… noreply 357 queue: deque[tuple[str, list[dict]]] = deque([(start_l, [])])
4a3c1b4… noreply 358 visited = {start_l}
4a3c1b4… noreply 359
4a3c1b4… noreply 360 while queue:
4a3c1b4… noreply 361 current, path = queue.popleft()
4a3c1b4… noreply 362 if len(path) >= max_depth:
4a3c1b4… noreply 363 continue
4a3c1b4… noreply 364 for neighbor, rel in adj.get(current, []):
4a3c1b4… noreply 365 if neighbor in visited:
4a3c1b4… noreply 366 continue
4a3c1b4… noreply 367 new_path = path + [rel]
4a3c1b4… noreply 368 if neighbor == end_l:
4a3c1b4… noreply 369 # Build result: entities + relationships along path
4a3c1b4… noreply 370 path_entities = [start_entity]
4a3c1b4… noreply 371 for r in new_path:
4a3c1b4… noreply 372 path_entities.append(r)
4a3c1b4… noreply 373 tgt_name = r["target"] if r["source"].lower() == current else r["source"]
4a3c1b4… noreply 374 e = self.store.get_entity(tgt_name)
4a3c1b4… noreply 375 if e:
4a3c1b4… noreply 376 path_entities.append(e)
4a3c1b4… noreply 377 path_entities.append(end_entity)
4a3c1b4… noreply 378 # Deduplicate
4a3c1b4… noreply 379 seen = set()
4a3c1b4… noreply 380 deduped = []
4a3c1b4… noreply 381 for item in path_entities:
4a3c1b4… noreply 382 key = str(item)
4a3c1b4… noreply 383 if key not in seen:
4a3c1b4… noreply 384 seen.add(key)
4a3c1b4… noreply 385 deduped.append(item)
4a3c1b4… noreply 386 return QueryResult(
4a3c1b4… noreply 387 data=deduped,
4a3c1b4… noreply 388 query_type="filter",
4a3c1b4… noreply 389 raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4… noreply 390 explanation=f"Path found: {len(new_path)} hops",
4a3c1b4… noreply 391 )
4a3c1b4… noreply 392 visited.add(neighbor)
4a3c1b4… noreply 393 queue.append((neighbor, new_path))
4a3c1b4… noreply 394
4a3c1b4… noreply 395 return QueryResult(
4a3c1b4… noreply 396 data=[],
4a3c1b4… noreply 397 query_type="filter",
4a3c1b4… noreply 398 raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4… noreply 399 explanation=f"No path found between '{start}' and '{end}' within {max_depth} hops",
4a3c1b4… noreply 400 )
4a3c1b4… noreply 401
4a3c1b4… noreply 402 def clusters(self) -> QueryResult:
4a3c1b4… noreply 403 """Find connected components (clusters) in the graph."""
4a3c1b4… noreply 404 all_entities = self.store.get_all_entities()
4a3c1b4… noreply 405 all_rels = self.store.get_all_relationships()
4a3c1b4… noreply 406
4a3c1b4… noreply 407 # Build adjacency
4a3c1b4… noreply 408 adj: dict[str, set[str]] = {}
4a3c1b4… noreply 409 for e in all_entities:
4a3c1b4… noreply 410 adj.setdefault(e["name"].lower(), set())
4a3c1b4… noreply 411 for r in all_rels:
4a3c1b4… noreply 412 adj.setdefault(r["source"].lower(), set()).add(r["target"].lower())
4a3c1b4… noreply 413 adj.setdefault(r["target"].lower(), set()).add(r["source"].lower())
4a3c1b4… noreply 414
4a3c1b4… noreply 415 visited: set[str] = set()
4a3c1b4… noreply 416 components: list[list[str]] = []
4a3c1b4… noreply 417
4a3c1b4… noreply 418 for node in adj:
4a3c1b4… noreply 419 if node in visited:
4a3c1b4… noreply 420 continue
4a3c1b4… noreply 421 component: list[str] = []
4a3c1b4… noreply 422 stack = [node]
4a3c1b4… noreply 423 while stack:
4a3c1b4… noreply 424 n = stack.pop()
4a3c1b4… noreply 425 if n in visited:
4a3c1b4… noreply 426 continue
4a3c1b4… noreply 427 visited.add(n)
4a3c1b4… noreply 428 component.append(n)
4a3c1b4… noreply 429 stack.extend(adj.get(n, set()) - visited)
4a3c1b4… noreply 430 components.append(sorted(component))
4a3c1b4… noreply 431
4a3c1b4… noreply 432 # Sort by size descending
4a3c1b4… noreply 433 components.sort(key=len, reverse=True)
4a3c1b4… noreply 434
4a3c1b4… noreply 435 result = [{"cluster_id": i, "size": len(c), "members": c} for i, c in enumerate(components)]
4a3c1b4… noreply 436
4a3c1b4… noreply 437 return QueryResult(
4a3c1b4… noreply 438 data=result,
4a3c1b4… noreply 439 query_type="filter",
4a3c1b4… noreply 440 raw_query="clusters()",
4a3c1b4… noreply 441 explanation=f"Found {len(components)} clusters",
4a3c1b4… noreply 442 )
4a3c1b4… noreply 443
0981a08… noreply 444 def sql(self, query: str) -> QueryResult:
0981a08… noreply 445 """Execute a raw SQL query (SQLite only)."""
b363c5b… noreply 446 result = self.store.raw_query(query)
b363c5b… noreply 447 return QueryResult(
b363c5b… noreply 448 data=result,
0981a08… noreply 449 query_type="sql",
b363c5b… noreply 450 raw_query=query,
b363c5b… noreply 451 explanation=(
0981a08… noreply 452 f"SQL query returned {len(result) if isinstance(result, list) else 1} rows"
b363c5b… noreply 453 ),
b363c5b… noreply 454 )
b363c5b… noreply 455
b363c5b… noreply 456 # ── Agentic mode (requires LLM) ──
b363c5b… noreply 457
b363c5b… noreply 458 def ask(self, question: str) -> QueryResult:
b363c5b… noreply 459 """Answer a natural language question using LLM-guided query planning.
b363c5b… noreply 460
b363c5b… noreply 461 The LLM picks from known direct-mode actions (never generates arbitrary code),
b363c5b… noreply 462 the engine executes them, then the LLM synthesizes a natural language answer.
b363c5b… noreply 463 """
b363c5b… noreply 464 if not self.pm:
b363c5b… noreply 465 return QueryResult(
b363c5b… noreply 466 data=None,
b363c5b… noreply 467 query_type="agentic",
b363c5b… noreply 468 raw_query=question,
b363c5b… noreply 469 explanation="Agentic mode requires a configured LLM provider. "
b363c5b… noreply 470 "Pass --provider/--chat-model or set an API key.",
b363c5b… noreply 471 )
b363c5b… noreply 472
b363c5b… noreply 473 # Step 1: Ask LLM to generate a query plan
b363c5b… noreply 474 stats = self.stats().data
b363c5b… noreply 475 plan_prompt = (
b363c5b… noreply 476 "You are a knowledge graph query planner. Given a user question and graph stats, "
b363c5b… noreply 477 "choose ONE action to answer it.\n\n"
b363c5b… noreply 478 f"Graph stats: {json.dumps(stats)}\n\n"
b363c5b… noreply 479 "Available actions (pick exactly one):\n"
b363c5b… noreply 480 '- {{"action": "entities", "name": "...", "entity_type": "..."}}\n'
b363c5b… noreply 481 '- {{"action": "relationships", "source": "...", "target": "...", "rel_type": "..."}}\n'
b363c5b… noreply 482 '- {{"action": "neighbors", "entity_name": "...", "depth": 1}}\n'
4a3c1b4… noreply 483 '- {{"action": "shortest_path", "start": "...", "end": "..."}}\n'
4a3c1b4… noreply 484 '- {{"action": "clusters"}}\n'
b363c5b… noreply 485 '- {{"action": "stats"}}\n\n'
b363c5b… noreply 486 f"User question: {question}\n\n"
b363c5b… noreply 487 "Return ONLY a JSON object with the action. Omit optional fields you don't need."
b363c5b… noreply 488 )
b363c5b… noreply 489
b363c5b… noreply 490 try:
b363c5b… noreply 491 plan_raw = self.pm.chat(
b363c5b… noreply 492 [{"role": "user", "content": plan_prompt}],
b363c5b… noreply 493 max_tokens=256,
b363c5b… noreply 494 temperature=0.1,
b363c5b… noreply 495 )
b363c5b… noreply 496 except Exception as e:
b363c5b… noreply 497 return QueryResult(
b363c5b… noreply 498 data=None,
b363c5b… noreply 499 query_type="agentic",
b363c5b… noreply 500 raw_query=question,
b363c5b… noreply 501 explanation=f"LLM query planning failed: {e}",
b363c5b… noreply 502 )
b363c5b… noreply 503
b363c5b… noreply 504 # Parse the plan
b363c5b… noreply 505 plan = _parse_json(plan_raw)
b363c5b… noreply 506 if not plan or "action" not in plan:
b363c5b… noreply 507 return QueryResult(
b363c5b… noreply 508 data=None,
b363c5b… noreply 509 query_type="agentic",
b363c5b… noreply 510 raw_query=question,
b363c5b… noreply 511 explanation="Could not parse LLM query plan from response.",
b363c5b… noreply 512 )
b363c5b… noreply 513
b363c5b… noreply 514 # Step 2: Execute the planned action
b363c5b… noreply 515 action = plan["action"]
b363c5b… noreply 516 try:
b363c5b… noreply 517 if action == "entities":
b363c5b… noreply 518 result = self.entities(
b363c5b… noreply 519 name=plan.get("name"),
b363c5b… noreply 520 entity_type=plan.get("entity_type"),
b363c5b… noreply 521 )
b363c5b… noreply 522 elif action == "relationships":
b363c5b… noreply 523 result = self.relationships(
b363c5b… noreply 524 source=plan.get("source"),
b363c5b… noreply 525 target=plan.get("target"),
b363c5b… noreply 526 rel_type=plan.get("rel_type"),
b363c5b… noreply 527 )
b363c5b… noreply 528 elif action == "neighbors":
b363c5b… noreply 529 result = self.neighbors(
b363c5b… noreply 530 entity_name=plan.get("entity_name", ""),
b363c5b… noreply 531 depth=plan.get("depth", 1),
b363c5b… noreply 532 )
4a3c1b4… noreply 533 elif action == "shortest_path":
4a3c1b4… noreply 534 result = self.shortest_path(
4a3c1b4… noreply 535 start=plan.get("start", ""),
4a3c1b4… noreply 536 end=plan.get("end", ""),
4a3c1b4… noreply 537 )
4a3c1b4… noreply 538 elif action == "clusters":
4a3c1b4… noreply 539 result = self.clusters()
b363c5b… noreply 540 elif action == "stats":
b363c5b… noreply 541 result = self.stats()
b363c5b… noreply 542 else:
b363c5b… noreply 543 return QueryResult(
b363c5b… noreply 544 data=None,
b363c5b… noreply 545 query_type="agentic",
b363c5b… noreply 546 raw_query=question,
b363c5b… noreply 547 explanation=f"Unknown action in plan: {action}",
b363c5b… noreply 548 )
b363c5b… noreply 549 except Exception as e:
b363c5b… noreply 550 return QueryResult(
b363c5b… noreply 551 data=None,
b363c5b… noreply 552 query_type="agentic",
b363c5b… noreply 553 raw_query=question,
b363c5b… noreply 554 explanation=f"Action execution failed: {e}",
b363c5b… noreply 555 )
b363c5b… noreply 556
b363c5b… noreply 557 # Step 3: Synthesize a natural language answer
b363c5b… noreply 558 synth_prompt = (
b363c5b… noreply 559 "You are a helpful assistant answering questions about a knowledge graph.\n\n"
b363c5b… noreply 560 f"User question: {question}\n\n"
b363c5b… noreply 561 f"Query result:\n{result.to_text()}\n\n"
b363c5b… noreply 562 "Provide a concise, natural language answer based on the data above."
b363c5b… noreply 563 )
b363c5b… noreply 564
b363c5b… noreply 565 try:
b363c5b… noreply 566 answer = self.pm.chat(
b363c5b… noreply 567 [{"role": "user", "content": synth_prompt}],
b363c5b… noreply 568 max_tokens=1024,
b363c5b… noreply 569 temperature=0.3,
b363c5b… noreply 570 )
b363c5b… noreply 571 except Exception as e:
b363c5b… noreply 572 # Return the raw result if synthesis fails
b363c5b… noreply 573 result.query_type = "agentic"
b363c5b… noreply 574 result.explanation = f"LLM synthesis failed ({e}), showing raw results"
b363c5b… noreply 575 return result
b363c5b… noreply 576
b363c5b… noreply 577 return QueryResult(
b363c5b… noreply 578 data=result.data,
b363c5b… noreply 579 query_type="agentic",
b363c5b… noreply 580 raw_query=question,
b363c5b… noreply 581 explanation=answer.strip(),
b363c5b… noreply 582 )
b363c5b… noreply 583
b363c5b… noreply 584
b363c5b… noreply 585 def _parse_json(text: str) -> Optional[Dict]:
b363c5b… noreply 586 """Try to extract a JSON object from LLM output."""
b363c5b… noreply 587 text = text.strip()
b363c5b… noreply 588 # Try direct parse first
b363c5b… noreply 589 try:
b363c5b… noreply 590 return json.loads(text)
b363c5b… noreply 591 except json.JSONDecodeError:
b363c5b… noreply 592 pass
b363c5b… noreply 593 # Try to find JSON between braces
b363c5b… noreply 594 start = text.find("{")
b363c5b… noreply 595 end = text.rfind("}")
b363c5b… noreply 596 if start >= 0 and end > start:
b363c5b… noreply 597 try:
b363c5b… noreply 598 return json.loads(text[start : end + 1])
b363c5b… noreply 599 except json.JSONDecodeError:
b363c5b… noreply 600 pass
b363c5b… noreply 601 return None

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button