PlanOpticon

planopticon / video_processor / integrators / graph_query.py

Source Blame History 601 lines

b363c5b…	noreply	1	"""Query engine for PlanOpticon knowledge graphs."""
b363c5b…	noreply	2
b363c5b…	noreply	3	import json
b363c5b…	noreply	4	import logging
b363c5b…	noreply	5	from dataclasses import dataclass
b363c5b…	noreply	6	from pathlib import Path
b363c5b…	noreply	7	from typing import Any, Dict, Optional
b363c5b…	noreply	8
b363c5b…	noreply	9	from video_processor.integrators.graph_store import (
b363c5b…	noreply	10	GraphStore,
b363c5b…	noreply	11	InMemoryStore,
b363c5b…	noreply	12	create_store,
b363c5b…	noreply	13	)
b363c5b…	noreply	14
b363c5b…	noreply	15	logger = logging.getLogger(__name__)
b363c5b…	noreply	16
b363c5b…	noreply	17
b363c5b…	noreply	18	@dataclass
b363c5b…	noreply	19	class QueryResult:
b363c5b…	noreply	20	"""Uniform wrapper for query results."""
b363c5b…	noreply	21
b363c5b…	noreply	22	data: Any
b363c5b…	noreply	23	query_type: str # "cypher", "filter", "agentic"
b363c5b…	noreply	24	raw_query: str = ""
b363c5b…	noreply	25	explanation: str = ""
b363c5b…	noreply	26
b363c5b…	noreply	27	def to_text(self) -> str:
b363c5b…	noreply	28	"""Human-readable text output."""
b363c5b…	noreply	29	lines = []
b363c5b…	noreply	30	if self.explanation:
b363c5b…	noreply	31	lines.append(self.explanation)
b363c5b…	noreply	32	lines.append("")
b363c5b…	noreply	33
b363c5b…	noreply	34	if isinstance(self.data, dict):
b363c5b…	noreply	35	# Stats or single entity
b363c5b…	noreply	36	for key, value in self.data.items():
b363c5b…	noreply	37	if isinstance(value, dict):
b363c5b…	noreply	38	lines.append(f"{key}:")
b363c5b…	noreply	39	for k, v in value.items():
b363c5b…	noreply	40	lines.append(f" {k}: {v}")
b363c5b…	noreply	41	else:
b363c5b…	noreply	42	lines.append(f"{key}: {value}")
b363c5b…	noreply	43	elif isinstance(self.data, list):
b363c5b…	noreply	44	if not self.data:
b363c5b…	noreply	45	lines.append("No results found.")
b363c5b…	noreply	46	for item in self.data:
b363c5b…	noreply	47	if isinstance(item, dict):
b363c5b…	noreply	48	if "source" in item and "target" in item:
b363c5b…	noreply	49	rtype = item.get("type", "related_to")
b363c5b…	noreply	50	lines.append(f" {item['source']} --[{rtype}]--> {item['target']}")
b363c5b…	noreply	51	elif item.get("name") and item.get("type"):
b363c5b…	noreply	52	descs = item.get("descriptions", [])
b363c5b…	noreply	53	if isinstance(descs, set):
b363c5b…	noreply	54	descs = list(descs)
b363c5b…	noreply	55	desc_str = "; ".join(descs[:3]) if descs else ""
b363c5b…	noreply	56	line = f" [{item['type']}] {item['name']}"
b363c5b…	noreply	57	if desc_str:
b363c5b…	noreply	58	line += f" — {desc_str}"
b363c5b…	noreply	59	lines.append(line)
b363c5b…	noreply	60	else:
b363c5b…	noreply	61	lines.append(f" {item}")
b363c5b…	noreply	62	else:
b363c5b…	noreply	63	lines.append(f" {item}")
b363c5b…	noreply	64	else:
b363c5b…	noreply	65	lines.append(str(self.data))
b363c5b…	noreply	66
b363c5b…	noreply	67	return "\n".join(lines)
b363c5b…	noreply	68
b363c5b…	noreply	69	def to_json(self) -> str:
b363c5b…	noreply	70	"""JSON string output."""
b363c5b…	noreply	71	payload = {
b363c5b…	noreply	72	"query_type": self.query_type,
b363c5b…	noreply	73	"raw_query": self.raw_query,
b363c5b…	noreply	74	"explanation": self.explanation,
b363c5b…	noreply	75	"data": self.data,
b363c5b…	noreply	76	}
b363c5b…	noreply	77	return json.dumps(payload, indent=2, default=str)
b363c5b…	noreply	78
b363c5b…	noreply	79	def to_mermaid(self) -> str:
b363c5b…	noreply	80	"""Mermaid diagram output from result data."""
b363c5b…	noreply	81	lines = ["graph LR"]
b363c5b…	noreply	82	seen_nodes = set()
b363c5b…	noreply	83	edges = []
b363c5b…	noreply	84
b363c5b…	noreply	85	items = self.data if isinstance(self.data, list) else [self.data]
b363c5b…	noreply	86
b363c5b…	noreply	87	for item in items:
b363c5b…	noreply	88	if not isinstance(item, dict):
b363c5b…	noreply	89	continue
b363c5b…	noreply	90	# Entity node
b363c5b…	noreply	91	if "name" in item and "type" in item:
b363c5b…	noreply	92	name = item["name"]
b363c5b…	noreply	93	if name not in seen_nodes:
b363c5b…	noreply	94	safe_id = _mermaid_id(name)
b363c5b…	noreply	95	safe_name = name.replace('"', "'")
b363c5b…	noreply	96	ntype = item.get("type", "concept")
b363c5b…	noreply	97	lines.append(f' {safe_id}["{safe_name}"]:::{ntype}')
b363c5b…	noreply	98	seen_nodes.add(name)
b363c5b…	noreply	99	# Relationship edge
b363c5b…	noreply	100	if "source" in item and "target" in item:
b363c5b…	noreply	101	src = item["source"]
b363c5b…	noreply	102	tgt = item["target"]
b363c5b…	noreply	103	rtype = item.get("type", "related_to")
b363c5b…	noreply	104	for n in (src, tgt):
b363c5b…	noreply	105	if n not in seen_nodes:
b363c5b…	noreply	106	safe_id = _mermaid_id(n)
b363c5b…	noreply	107	lines.append(f' {safe_id}["{n.replace(chr(34), chr(39))}"]')
b363c5b…	noreply	108	seen_nodes.add(n)
b363c5b…	noreply	109	edges.append((src, tgt, rtype))
b363c5b…	noreply	110
b363c5b…	noreply	111	for src, tgt, rtype in edges:
b363c5b…	noreply	112	lines.append(f' {_mermaid_id(src)} -- "{rtype}" --> {_mermaid_id(tgt)}')
b363c5b…	noreply	113
b363c5b…	noreply	114	lines.append(" classDef person fill:#f9d5e5,stroke:#333")
b363c5b…	noreply	115	lines.append(" classDef concept fill:#eeeeee,stroke:#333")
b363c5b…	noreply	116	lines.append(" classDef technology fill:#d5e5f9,stroke:#333")
b363c5b…	noreply	117	lines.append(" classDef organization fill:#f9e5d5,stroke:#333")
b363c5b…	noreply	118
b363c5b…	noreply	119	return "\n".join(lines)
b363c5b…	noreply	120
b363c5b…	noreply	121
b363c5b…	noreply	122	def _mermaid_id(name: str) -> str:
b363c5b…	noreply	123	return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
b363c5b…	noreply	124
b363c5b…	noreply	125
b363c5b…	noreply	126	class GraphQueryEngine:
b363c5b…	noreply	127	"""Query engine with direct (no-LLM) and agentic (LLM) modes."""
b363c5b…	noreply	128
b363c5b…	noreply	129	def __init__(self, store: GraphStore, provider_manager=None):
b363c5b…	noreply	130	self.store = store
b363c5b…	noreply	131	self.pm = provider_manager
b363c5b…	noreply	132
b363c5b…	noreply	133	@classmethod
b363c5b…	noreply	134	def from_db_path(cls, path: Path, provider_manager=None) -> "GraphQueryEngine":
b363c5b…	noreply	135	"""Open a .db file and create a query engine."""
b363c5b…	noreply	136	store = create_store(path)
b363c5b…	noreply	137	return cls(store, provider_manager)
b363c5b…	noreply	138
b363c5b…	noreply	139	@classmethod
b363c5b…	noreply	140	def from_json_path(cls, path: Path, provider_manager=None) -> "GraphQueryEngine":
b363c5b…	noreply	141	"""Load a .json knowledge graph file and create a query engine."""
b363c5b…	noreply	142	data = json.loads(Path(path).read_text())
b363c5b…	noreply	143	store = InMemoryStore()
b363c5b…	noreply	144	for node in data.get("nodes", []):
b363c5b…	noreply	145	store.merge_entity(
b363c5b…	noreply	146	node.get("name", ""),
b363c5b…	noreply	147	node.get("type", "concept"),
b363c5b…	noreply	148	node.get("descriptions", []),
b363c5b…	noreply	149	)
b363c5b…	noreply	150	for occ in node.get("occurrences", []):
b363c5b…	noreply	151	store.add_occurrence(
b363c5b…	noreply	152	node.get("name", ""),
b363c5b…	noreply	153	occ.get("source", ""),
b363c5b…	noreply	154	occ.get("timestamp"),
b363c5b…	noreply	155	occ.get("text"),
b363c5b…	noreply	156	)
b363c5b…	noreply	157	for rel in data.get("relationships", []):
b363c5b…	noreply	158	store.add_relationship(
b363c5b…	noreply	159	rel.get("source", ""),
b363c5b…	noreply	160	rel.get("target", ""),
b363c5b…	noreply	161	rel.get("type", "related_to"),
b363c5b…	noreply	162	content_source=rel.get("content_source"),
b363c5b…	noreply	163	timestamp=rel.get("timestamp"),
b363c5b…	noreply	164	)
b363c5b…	noreply	165	return cls(store, provider_manager)
b363c5b…	noreply	166
b363c5b…	noreply	167	# ── Direct mode methods (no LLM required) ──
b363c5b…	noreply	168
b363c5b…	noreply	169	def entities(
b363c5b…	noreply	170	self,
b363c5b…	noreply	171	name: Optional[str] = None,
b363c5b…	noreply	172	entity_type: Optional[str] = None,
b363c5b…	noreply	173	limit: int = 50,
b363c5b…	noreply	174	) -> QueryResult:
b363c5b…	noreply	175	"""Filter entities by name substring and/or type."""
b363c5b…	noreply	176	all_entities = self.store.get_all_entities()
b363c5b…	noreply	177	results = []
b363c5b…	noreply	178	for e in all_entities:
b363c5b…	noreply	179	if name and name.lower() not in e.get("name", "").lower():
b363c5b…	noreply	180	continue
b363c5b…	noreply	181	if entity_type and entity_type.lower() != e.get("type", "").lower():
b363c5b…	noreply	182	continue
b363c5b…	noreply	183	results.append(e)
b363c5b…	noreply	184	if len(results) >= limit:
b363c5b…	noreply	185	break
b363c5b…	noreply	186
b363c5b…	noreply	187	raw = f"entities(name={name!r}, entity_type={entity_type!r}, limit={limit})"
b363c5b…	noreply	188	return QueryResult(
b363c5b…	noreply	189	data=results,
b363c5b…	noreply	190	query_type="filter",
b363c5b…	noreply	191	raw_query=raw,
b363c5b…	noreply	192	explanation=f"Found {len(results)} entities",
b363c5b…	noreply	193	)
b363c5b…	noreply	194
b363c5b…	noreply	195	def relationships(
b363c5b…	noreply	196	self,
b363c5b…	noreply	197	source: Optional[str] = None,
b363c5b…	noreply	198	target: Optional[str] = None,
b363c5b…	noreply	199	rel_type: Optional[str] = None,
b363c5b…	noreply	200	limit: int = 50,
b363c5b…	noreply	201	) -> QueryResult:
b363c5b…	noreply	202	"""Filter relationships by source, target, and/or type."""
b363c5b…	noreply	203	all_rels = self.store.get_all_relationships()
b363c5b…	noreply	204	results = []
b363c5b…	noreply	205	for r in all_rels:
b363c5b…	noreply	206	if source and source.lower() not in r.get("source", "").lower():
b363c5b…	noreply	207	continue
b363c5b…	noreply	208	if target and target.lower() not in r.get("target", "").lower():
b363c5b…	noreply	209	continue
b363c5b…	noreply	210	if rel_type and rel_type.lower() not in r.get("type", "").lower():
b363c5b…	noreply	211	continue
b363c5b…	noreply	212	results.append(r)
b363c5b…	noreply	213	if len(results) >= limit:
b363c5b…	noreply	214	break
b363c5b…	noreply	215
b363c5b…	noreply	216	raw = f"relationships(source={source!r}, target={target!r}, rel_type={rel_type!r})"
b363c5b…	noreply	217	return QueryResult(
b363c5b…	noreply	218	data=results,
b363c5b…	noreply	219	query_type="filter",
b363c5b…	noreply	220	raw_query=raw,
b363c5b…	noreply	221	explanation=f"Found {len(results)} relationships",
b363c5b…	noreply	222	)
b363c5b…	noreply	223
b363c5b…	noreply	224	def neighbors(self, entity_name: str, depth: int = 1) -> QueryResult:
b363c5b…	noreply	225	"""Get an entity and its connected nodes (up to depth hops)."""
b363c5b…	noreply	226	entity = self.store.get_entity(entity_name)
b363c5b…	noreply	227	if not entity:
b363c5b…	noreply	228	return QueryResult(
b363c5b…	noreply	229	data=[],
b363c5b…	noreply	230	query_type="filter",
b363c5b…	noreply	231	raw_query=f"neighbors({entity_name!r}, depth={depth})",
b363c5b…	noreply	232	explanation=f"Entity '{entity_name}' not found",
b363c5b…	noreply	233	)
b363c5b…	noreply	234
b363c5b…	noreply	235	visited = {entity_name.lower()}
b363c5b…	noreply	236	result_entities = [entity]
b363c5b…	noreply	237	result_rels = []
b363c5b…	noreply	238	frontier = {entity_name.lower()}
b363c5b…	noreply	239
b363c5b…	noreply	240	all_rels = self.store.get_all_relationships()
b363c5b…	noreply	241
b363c5b…	noreply	242	for _ in range(depth):
b363c5b…	noreply	243	next_frontier = set()
b363c5b…	noreply	244	for rel in all_rels:
b363c5b…	noreply	245	src_lower = rel["source"].lower()
b363c5b…	noreply	246	tgt_lower = rel["target"].lower()
b363c5b…	noreply	247	if src_lower in frontier or tgt_lower in frontier:
b363c5b…	noreply	248	result_rels.append(rel)
b363c5b…	noreply	249	for n in (src_lower, tgt_lower):
b363c5b…	noreply	250	if n not in visited:
b363c5b…	noreply	251	visited.add(n)
b363c5b…	noreply	252	next_frontier.add(n)
b363c5b…	noreply	253	e = self.store.get_entity(n)
b363c5b…	noreply	254	if e:
b363c5b…	noreply	255	result_entities.append(e)
b363c5b…	noreply	256	frontier = next_frontier
b363c5b…	noreply	257
b363c5b…	noreply	258	# Combine entities + relationships into output
b363c5b…	noreply	259	combined = result_entities + result_rels
b363c5b…	noreply	260	return QueryResult(
b363c5b…	noreply	261	data=combined,
b363c5b…	noreply	262	query_type="filter",
b363c5b…	noreply	263	raw_query=f"neighbors({entity_name!r}, depth={depth})",
b363c5b…	noreply	264	explanation=(
b363c5b…	noreply	265	f"Found {len(result_entities)} entities and {len(result_rels)} relationships"
b363c5b…	noreply	266	),
b363c5b…	noreply	267	)
b363c5b…	noreply	268
b363c5b…	noreply	269	def stats(self) -> QueryResult:
b363c5b…	noreply	270	"""Return entity count, relationship count, type breakdown."""
b363c5b…	noreply	271	all_entities = self.store.get_all_entities()
b363c5b…	noreply	272	type_breakdown = {}
b363c5b…	noreply	273	for e in all_entities:
b363c5b…	noreply	274	t = e.get("type", "concept")
b363c5b…	noreply	275	type_breakdown[t] = type_breakdown.get(t, 0) + 1
b363c5b…	noreply	276
b363c5b…	noreply	277	data = {
b363c5b…	noreply	278	"entity_count": self.store.get_entity_count(),
b363c5b…	noreply	279	"relationship_count": self.store.get_relationship_count(),
b363c5b…	noreply	280	"entity_types": type_breakdown,
b363c5b…	noreply	281	}
b363c5b…	noreply	282	return QueryResult(
b363c5b…	noreply	283	data=data,
b363c5b…	noreply	284	query_type="filter",
b363c5b…	noreply	285	raw_query="stats()",
b363c5b…	noreply	286	explanation="Knowledge graph statistics",
b363c5b…	noreply	287	)
b363c5b…	noreply	288
0981a08…	noreply	289	def sources(self) -> QueryResult:
0981a08…	noreply	290	"""Return all registered content sources."""
0981a08…	noreply	291	all_sources = self.store.get_sources()
0981a08…	noreply	292	return QueryResult(
0981a08…	noreply	293	data=all_sources,
0981a08…	noreply	294	query_type="filter",
0981a08…	noreply	295	raw_query="sources()",
0981a08…	noreply	296	explanation=f"Found {len(all_sources)} registered sources",
0981a08…	noreply	297	)
0981a08…	noreply	298
0981a08…	noreply	299	def provenance(self, entity_name: str) -> QueryResult:
0981a08…	noreply	300	"""Return source locations for a given entity."""
0981a08…	noreply	301	locations = self.store.get_entity_provenance(entity_name)
0981a08…	noreply	302	if not locations:
0981a08…	noreply	303	return QueryResult(
0981a08…	noreply	304	data=[],
0981a08…	noreply	305	query_type="filter",
0981a08…	noreply	306	raw_query=f"provenance({entity_name!r})",
0981a08…	noreply	307	explanation=f"No provenance records found for '{entity_name}'",
0981a08…	noreply	308	)
0981a08…	noreply	309	return QueryResult(
0981a08…	noreply	310	data=locations,
0981a08…	noreply	311	query_type="filter",
0981a08…	noreply	312	raw_query=f"provenance({entity_name!r})",
0981a08…	noreply	313	explanation=f"Found {len(locations)} provenance records for '{entity_name}'",
0981a08…	noreply	314	)
0981a08…	noreply	315
4a3c1b4…	noreply	316	def shortest_path(self, start: str, end: str, max_depth: int = 6) -> QueryResult:
4a3c1b4…	noreply	317	"""Find the shortest path between two entities via BFS."""
4a3c1b4…	noreply	318	start_entity = self.store.get_entity(start)
4a3c1b4…	noreply	319	end_entity = self.store.get_entity(end)
4a3c1b4…	noreply	320	if not start_entity:
4a3c1b4…	noreply	321	return QueryResult(
4a3c1b4…	noreply	322	data=[],
4a3c1b4…	noreply	323	query_type="filter",
4a3c1b4…	noreply	324	raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4…	noreply	325	explanation=f"Entity '{start}' not found",
4a3c1b4…	noreply	326	)
4a3c1b4…	noreply	327	if not end_entity:
4a3c1b4…	noreply	328	return QueryResult(
4a3c1b4…	noreply	329	data=[],
4a3c1b4…	noreply	330	query_type="filter",
4a3c1b4…	noreply	331	raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4…	noreply	332	explanation=f"Entity '{end}' not found",
4a3c1b4…	noreply	333	)
4a3c1b4…	noreply	334
4a3c1b4…	noreply	335	all_rels = self.store.get_all_relationships()
4a3c1b4…	noreply	336	# Build adjacency list
4a3c1b4…	noreply	337	adj: dict[str, list[tuple[str, dict]]] = {}
4a3c1b4…	noreply	338	for rel in all_rels:
4a3c1b4…	noreply	339	src_l = rel["source"].lower()
4a3c1b4…	noreply	340	tgt_l = rel["target"].lower()
4a3c1b4…	noreply	341	adj.setdefault(src_l, []).append((tgt_l, rel))
4a3c1b4…	noreply	342	adj.setdefault(tgt_l, []).append((src_l, rel))
4a3c1b4…	noreply	343
4a3c1b4…	noreply	344	# BFS
4a3c1b4…	noreply	345	start_l = start.lower()
4a3c1b4…	noreply	346	end_l = end.lower()
4a3c1b4…	noreply	347	if start_l == end_l:
4a3c1b4…	noreply	348	return QueryResult(
4a3c1b4…	noreply	349	data=[start_entity],
4a3c1b4…	noreply	350	query_type="filter",
4a3c1b4…	noreply	351	raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4…	noreply	352	explanation="Start and end are the same entity",
4a3c1b4…	noreply	353	)
4a3c1b4…	noreply	354
4a3c1b4…	noreply	355	from collections import deque
4a3c1b4…	noreply	356
4a3c1b4…	noreply	357	queue: deque[tuple[str, list[dict]]] = deque([(start_l, [])])
4a3c1b4…	noreply	358	visited = {start_l}
4a3c1b4…	noreply	359
4a3c1b4…	noreply	360	while queue:
4a3c1b4…	noreply	361	current, path = queue.popleft()
4a3c1b4…	noreply	362	if len(path) >= max_depth:
4a3c1b4…	noreply	363	continue
4a3c1b4…	noreply	364	for neighbor, rel in adj.get(current, []):
4a3c1b4…	noreply	365	if neighbor in visited:
4a3c1b4…	noreply	366	continue
4a3c1b4…	noreply	367	new_path = path + [rel]
4a3c1b4…	noreply	368	if neighbor == end_l:
4a3c1b4…	noreply	369	# Build result: entities + relationships along path
4a3c1b4…	noreply	370	path_entities = [start_entity]
4a3c1b4…	noreply	371	for r in new_path:
4a3c1b4…	noreply	372	path_entities.append(r)
4a3c1b4…	noreply	373	tgt_name = r["target"] if r["source"].lower() == current else r["source"]
4a3c1b4…	noreply	374	e = self.store.get_entity(tgt_name)
4a3c1b4…	noreply	375	if e:
4a3c1b4…	noreply	376	path_entities.append(e)
4a3c1b4…	noreply	377	path_entities.append(end_entity)
4a3c1b4…	noreply	378	# Deduplicate
4a3c1b4…	noreply	379	seen = set()
4a3c1b4…	noreply	380	deduped = []
4a3c1b4…	noreply	381	for item in path_entities:
4a3c1b4…	noreply	382	key = str(item)
4a3c1b4…	noreply	383	if key not in seen:
4a3c1b4…	noreply	384	seen.add(key)
4a3c1b4…	noreply	385	deduped.append(item)
4a3c1b4…	noreply	386	return QueryResult(
4a3c1b4…	noreply	387	data=deduped,
4a3c1b4…	noreply	388	query_type="filter",
4a3c1b4…	noreply	389	raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4…	noreply	390	explanation=f"Path found: {len(new_path)} hops",
4a3c1b4…	noreply	391	)
4a3c1b4…	noreply	392	visited.add(neighbor)
4a3c1b4…	noreply	393	queue.append((neighbor, new_path))
4a3c1b4…	noreply	394
4a3c1b4…	noreply	395	return QueryResult(
4a3c1b4…	noreply	396	data=[],
4a3c1b4…	noreply	397	query_type="filter",
4a3c1b4…	noreply	398	raw_query=f"shortest_path({start!r}, {end!r})",
4a3c1b4…	noreply	399	explanation=f"No path found between '{start}' and '{end}' within {max_depth} hops",
4a3c1b4…	noreply	400	)
4a3c1b4…	noreply	401
4a3c1b4…	noreply	402	def clusters(self) -> QueryResult:
4a3c1b4…	noreply	403	"""Find connected components (clusters) in the graph."""
4a3c1b4…	noreply	404	all_entities = self.store.get_all_entities()
4a3c1b4…	noreply	405	all_rels = self.store.get_all_relationships()
4a3c1b4…	noreply	406
4a3c1b4…	noreply	407	# Build adjacency
4a3c1b4…	noreply	408	adj: dict[str, set[str]] = {}
4a3c1b4…	noreply	409	for e in all_entities:
4a3c1b4…	noreply	410	adj.setdefault(e["name"].lower(), set())
4a3c1b4…	noreply	411	for r in all_rels:
4a3c1b4…	noreply	412	adj.setdefault(r["source"].lower(), set()).add(r["target"].lower())
4a3c1b4…	noreply	413	adj.setdefault(r["target"].lower(), set()).add(r["source"].lower())
4a3c1b4…	noreply	414
4a3c1b4…	noreply	415	visited: set[str] = set()
4a3c1b4…	noreply	416	components: list[list[str]] = []
4a3c1b4…	noreply	417
4a3c1b4…	noreply	418	for node in adj:
4a3c1b4…	noreply	419	if node in visited:
4a3c1b4…	noreply	420	continue
4a3c1b4…	noreply	421	component: list[str] = []
4a3c1b4…	noreply	422	stack = [node]
4a3c1b4…	noreply	423	while stack:
4a3c1b4…	noreply	424	n = stack.pop()
4a3c1b4…	noreply	425	if n in visited:
4a3c1b4…	noreply	426	continue
4a3c1b4…	noreply	427	visited.add(n)
4a3c1b4…	noreply	428	component.append(n)
4a3c1b4…	noreply	429	stack.extend(adj.get(n, set()) - visited)
4a3c1b4…	noreply	430	components.append(sorted(component))
4a3c1b4…	noreply	431
4a3c1b4…	noreply	432	# Sort by size descending
4a3c1b4…	noreply	433	components.sort(key=len, reverse=True)
4a3c1b4…	noreply	434
4a3c1b4…	noreply	435	result = [{"cluster_id": i, "size": len(c), "members": c} for i, c in enumerate(components)]
4a3c1b4…	noreply	436
4a3c1b4…	noreply	437	return QueryResult(
4a3c1b4…	noreply	438	data=result,
4a3c1b4…	noreply	439	query_type="filter",
4a3c1b4…	noreply	440	raw_query="clusters()",
4a3c1b4…	noreply	441	explanation=f"Found {len(components)} clusters",
4a3c1b4…	noreply	442	)
4a3c1b4…	noreply	443
0981a08…	noreply	444	def sql(self, query: str) -> QueryResult:
0981a08…	noreply	445	"""Execute a raw SQL query (SQLite only)."""
b363c5b…	noreply	446	result = self.store.raw_query(query)
b363c5b…	noreply	447	return QueryResult(
b363c5b…	noreply	448	data=result,
0981a08…	noreply	449	query_type="sql",
b363c5b…	noreply	450	raw_query=query,
b363c5b…	noreply	451	explanation=(
0981a08…	noreply	452	f"SQL query returned {len(result) if isinstance(result, list) else 1} rows"
b363c5b…	noreply	453	),
b363c5b…	noreply	454	)
b363c5b…	noreply	455
b363c5b…	noreply	456	# ── Agentic mode (requires LLM) ──
b363c5b…	noreply	457
b363c5b…	noreply	458	def ask(self, question: str) -> QueryResult:
b363c5b…	noreply	459	"""Answer a natural language question using LLM-guided query planning.
b363c5b…	noreply	460
b363c5b…	noreply	461	The LLM picks from known direct-mode actions (never generates arbitrary code),
b363c5b…	noreply	462	the engine executes them, then the LLM synthesizes a natural language answer.
b363c5b…	noreply	463	"""
b363c5b…	noreply	464	if not self.pm:
b363c5b…	noreply	465	return QueryResult(
b363c5b…	noreply	466	data=None,
b363c5b…	noreply	467	query_type="agentic",
b363c5b…	noreply	468	raw_query=question,
b363c5b…	noreply	469	explanation="Agentic mode requires a configured LLM provider. "
b363c5b…	noreply	470	"Pass --provider/--chat-model or set an API key.",
b363c5b…	noreply	471	)
b363c5b…	noreply	472
b363c5b…	noreply	473	# Step 1: Ask LLM to generate a query plan
b363c5b…	noreply	474	stats = self.stats().data
b363c5b…	noreply	475	plan_prompt = (
b363c5b…	noreply	476	"You are a knowledge graph query planner. Given a user question and graph stats, "
b363c5b…	noreply	477	"choose ONE action to answer it.\n\n"
b363c5b…	noreply	478	f"Graph stats: {json.dumps(stats)}\n\n"
b363c5b…	noreply	479	"Available actions (pick exactly one):\n"
b363c5b…	noreply	480	'- {{"action": "entities", "name": "...", "entity_type": "..."}}\n'
b363c5b…	noreply	481	'- {{"action": "relationships", "source": "...", "target": "...", "rel_type": "..."}}\n'
b363c5b…	noreply	482	'- {{"action": "neighbors", "entity_name": "...", "depth": 1}}\n'
4a3c1b4…	noreply	483	'- {{"action": "shortest_path", "start": "...", "end": "..."}}\n'
4a3c1b4…	noreply	484	'- {{"action": "clusters"}}\n'
b363c5b…	noreply	485	'- {{"action": "stats"}}\n\n'
b363c5b…	noreply	486	f"User question: {question}\n\n"
b363c5b…	noreply	487	"Return ONLY a JSON object with the action. Omit optional fields you don't need."
b363c5b…	noreply	488	)
b363c5b…	noreply	489
b363c5b…	noreply	490	try:
b363c5b…	noreply	491	plan_raw = self.pm.chat(
b363c5b…	noreply	492	[{"role": "user", "content": plan_prompt}],
b363c5b…	noreply	493	max_tokens=256,
b363c5b…	noreply	494	temperature=0.1,
b363c5b…	noreply	495	)
b363c5b…	noreply	496	except Exception as e:
b363c5b…	noreply	497	return QueryResult(
b363c5b…	noreply	498	data=None,
b363c5b…	noreply	499	query_type="agentic",
b363c5b…	noreply	500	raw_query=question,
b363c5b…	noreply	501	explanation=f"LLM query planning failed: {e}",
b363c5b…	noreply	502	)
b363c5b…	noreply	503
b363c5b…	noreply	504	# Parse the plan
b363c5b…	noreply	505	plan = _parse_json(plan_raw)
b363c5b…	noreply	506	if not plan or "action" not in plan:
b363c5b…	noreply	507	return QueryResult(
b363c5b…	noreply	508	data=None,
b363c5b…	noreply	509	query_type="agentic",
b363c5b…	noreply	510	raw_query=question,
b363c5b…	noreply	511	explanation="Could not parse LLM query plan from response.",
b363c5b…	noreply	512	)
b363c5b…	noreply	513
b363c5b…	noreply	514	# Step 2: Execute the planned action
b363c5b…	noreply	515	action = plan["action"]
b363c5b…	noreply	516	try:
b363c5b…	noreply	517	if action == "entities":
b363c5b…	noreply	518	result = self.entities(
b363c5b…	noreply	519	name=plan.get("name"),
b363c5b…	noreply	520	entity_type=plan.get("entity_type"),
b363c5b…	noreply	521	)
b363c5b…	noreply	522	elif action == "relationships":
b363c5b…	noreply	523	result = self.relationships(
b363c5b…	noreply	524	source=plan.get("source"),
b363c5b…	noreply	525	target=plan.get("target"),
b363c5b…	noreply	526	rel_type=plan.get("rel_type"),
b363c5b…	noreply	527	)
b363c5b…	noreply	528	elif action == "neighbors":
b363c5b…	noreply	529	result = self.neighbors(
b363c5b…	noreply	530	entity_name=plan.get("entity_name", ""),
b363c5b…	noreply	531	depth=plan.get("depth", 1),
b363c5b…	noreply	532	)
4a3c1b4…	noreply	533	elif action == "shortest_path":
4a3c1b4…	noreply	534	result = self.shortest_path(
4a3c1b4…	noreply	535	start=plan.get("start", ""),
4a3c1b4…	noreply	536	end=plan.get("end", ""),
4a3c1b4…	noreply	537	)
4a3c1b4…	noreply	538	elif action == "clusters":
4a3c1b4…	noreply	539	result = self.clusters()
b363c5b…	noreply	540	elif action == "stats":
b363c5b…	noreply	541	result = self.stats()
b363c5b…	noreply	542	else:
b363c5b…	noreply	543	return QueryResult(
b363c5b…	noreply	544	data=None,
b363c5b…	noreply	545	query_type="agentic",
b363c5b…	noreply	546	raw_query=question,
b363c5b…	noreply	547	explanation=f"Unknown action in plan: {action}",
b363c5b…	noreply	548	)
b363c5b…	noreply	549	except Exception as e:
b363c5b…	noreply	550	return QueryResult(
b363c5b…	noreply	551	data=None,
b363c5b…	noreply	552	query_type="agentic",
b363c5b…	noreply	553	raw_query=question,
b363c5b…	noreply	554	explanation=f"Action execution failed: {e}",
b363c5b…	noreply	555	)
b363c5b…	noreply	556
b363c5b…	noreply	557	# Step 3: Synthesize a natural language answer
b363c5b…	noreply	558	synth_prompt = (
b363c5b…	noreply	559	"You are a helpful assistant answering questions about a knowledge graph.\n\n"
b363c5b…	noreply	560	f"User question: {question}\n\n"
b363c5b…	noreply	561	f"Query result:\n{result.to_text()}\n\n"
b363c5b…	noreply	562	"Provide a concise, natural language answer based on the data above."
b363c5b…	noreply	563	)
b363c5b…	noreply	564
b363c5b…	noreply	565	try:
b363c5b…	noreply	566	answer = self.pm.chat(
b363c5b…	noreply	567	[{"role": "user", "content": synth_prompt}],
b363c5b…	noreply	568	max_tokens=1024,
b363c5b…	noreply	569	temperature=0.3,
b363c5b…	noreply	570	)
b363c5b…	noreply	571	except Exception as e:
b363c5b…	noreply	572	# Return the raw result if synthesis fails
b363c5b…	noreply	573	result.query_type = "agentic"
b363c5b…	noreply	574	result.explanation = f"LLM synthesis failed ({e}), showing raw results"
b363c5b…	noreply	575	return result
b363c5b…	noreply	576
b363c5b…	noreply	577	return QueryResult(
b363c5b…	noreply	578	data=result.data,
b363c5b…	noreply	579	query_type="agentic",
b363c5b…	noreply	580	raw_query=question,
b363c5b…	noreply	581	explanation=answer.strip(),
b363c5b…	noreply	582	)
b363c5b…	noreply	583
b363c5b…	noreply	584
b363c5b…	noreply	585	def _parse_json(text: str) -> Optional[Dict]:
b363c5b…	noreply	586	"""Try to extract a JSON object from LLM output."""
b363c5b…	noreply	587	text = text.strip()
b363c5b…	noreply	588	# Try direct parse first
b363c5b…	noreply	589	try:
b363c5b…	noreply	590	return json.loads(text)
b363c5b…	noreply	591	except json.JSONDecodeError:
b363c5b…	noreply	592	pass
b363c5b…	noreply	593	# Try to find JSON between braces
b363c5b…	noreply	594	start = text.find("{")
b363c5b…	noreply	595	end = text.rfind("}")
b363c5b…	noreply	596	if start >= 0 and end > start:
b363c5b…	noreply	597	try:
b363c5b…	noreply	598	return json.loads(text[start : end + 1])
b363c5b…	noreply	599	except json.JSONDecodeError:
b363c5b…	noreply	600	pass
b363c5b…	noreply	601	return None

PlanOpticon

Keyboard Shortcuts