PlanOpticon

planopticon / video_processor / integrators / graph_store.py
Source Blame History 757 lines
0ad36b7… noreply 1 """Graph storage backends for PlanOpticon knowledge graphs."""
0ad36b7… noreply 2
0981a08… noreply 3 import json
0ad36b7… noreply 4 import logging
0981a08… noreply 5 import sqlite3
0ad36b7… noreply 6 from abc import ABC, abstractmethod
0ad36b7… noreply 7 from pathlib import Path
0ad36b7… noreply 8 from typing import Any, Dict, List, Optional, Union
0ad36b7… noreply 9
0ad36b7… noreply 10 logger = logging.getLogger(__name__)
0ad36b7… noreply 11
0ad36b7… noreply 12
0ad36b7… noreply 13 class GraphStore(ABC):
0ad36b7… noreply 14 """Abstract interface for knowledge graph storage backends."""
0ad36b7… noreply 15
0ad36b7… noreply 16 @abstractmethod
0ad36b7… noreply 17 def merge_entity(
0ad36b7… noreply 18 self,
0ad36b7… noreply 19 name: str,
0ad36b7… noreply 20 entity_type: str,
0ad36b7… noreply 21 descriptions: List[str],
0ad36b7… noreply 22 source: Optional[str] = None,
0ad36b7… noreply 23 ) -> None:
0ad36b7… noreply 24 """Upsert an entity by case-insensitive name."""
0ad36b7… noreply 25 ...
0ad36b7… noreply 26
0ad36b7… noreply 27 @abstractmethod
0ad36b7… noreply 28 def add_occurrence(
0ad36b7… noreply 29 self,
0ad36b7… noreply 30 entity_name: str,
0ad36b7… noreply 31 source: str,
0ad36b7… noreply 32 timestamp: Optional[float] = None,
0ad36b7… noreply 33 text: Optional[str] = None,
0ad36b7… noreply 34 ) -> None:
0ad36b7… noreply 35 """Add an occurrence record to an existing entity."""
0ad36b7… noreply 36 ...
0ad36b7… noreply 37
0ad36b7… noreply 38 @abstractmethod
0ad36b7… noreply 39 def add_relationship(
0ad36b7… noreply 40 self,
0ad36b7… noreply 41 source: str,
0ad36b7… noreply 42 target: str,
0ad36b7… noreply 43 rel_type: str,
0ad36b7… noreply 44 content_source: Optional[str] = None,
0ad36b7… noreply 45 timestamp: Optional[float] = None,
0ad36b7… noreply 46 ) -> None:
0ad36b7… noreply 47 """Add a relationship between two entities (both must already exist)."""
0ad36b7… noreply 48 ...
0ad36b7… noreply 49
0ad36b7… noreply 50 @abstractmethod
0ad36b7… noreply 51 def get_entity(self, name: str) -> Optional[Dict[str, Any]]:
0ad36b7… noreply 52 """Get an entity by case-insensitive name, or None."""
0ad36b7… noreply 53 ...
0ad36b7… noreply 54
0ad36b7… noreply 55 @abstractmethod
0ad36b7… noreply 56 def get_all_entities(self) -> List[Dict[str, Any]]:
0ad36b7… noreply 57 """Return all entities as dicts."""
0ad36b7… noreply 58 ...
0ad36b7… noreply 59
0ad36b7… noreply 60 @abstractmethod
0ad36b7… noreply 61 def get_all_relationships(self) -> List[Dict[str, Any]]:
0ad36b7… noreply 62 """Return all relationships as dicts."""
0ad36b7… noreply 63 ...
0ad36b7… noreply 64
0ad36b7… noreply 65 @abstractmethod
0ad36b7… noreply 66 def get_entity_count(self) -> int: ...
0ad36b7… noreply 67
0ad36b7… noreply 68 @abstractmethod
0ad36b7… noreply 69 def get_relationship_count(self) -> int: ...
0ad36b7… noreply 70
0ad36b7… noreply 71 @abstractmethod
0ad36b7… noreply 72 def has_entity(self, name: str) -> bool:
0ad36b7… noreply 73 """Check if an entity exists (case-insensitive)."""
0ad36b7… noreply 74 ...
0ad36b7… noreply 75
f4e202a… noreply 76 @abstractmethod
f4e202a… noreply 77 def add_typed_relationship(
f4e202a… noreply 78 self,
f4e202a… noreply 79 source: str,
f4e202a… noreply 80 target: str,
f4e202a… noreply 81 edge_label: str,
f4e202a… noreply 82 properties: Optional[Dict[str, Any]] = None,
f4e202a… noreply 83 ) -> None:
f4e202a… noreply 84 """Add a relationship with a custom edge label (e.g. DEPENDS_ON, USES_SYSTEM).
f4e202a… noreply 85
f4e202a… noreply 86 Unlike add_relationship which always uses RELATED_TO, this creates edges
f4e202a… noreply 87 with the specified label for richer graph semantics.
f4e202a… noreply 88 """
f4e202a… noreply 89 ...
f4e202a… noreply 90
f4e202a… noreply 91 @abstractmethod
f4e202a… noreply 92 def set_entity_properties(
f4e202a… noreply 93 self,
f4e202a… noreply 94 name: str,
f4e202a… noreply 95 properties: Dict[str, Any],
f4e202a… noreply 96 ) -> bool:
f4e202a… noreply 97 """Set arbitrary key/value properties on an existing entity.
f4e202a… noreply 98
f4e202a… noreply 99 Returns True if the entity was found and updated, False otherwise.
f4e202a… noreply 100 """
f4e202a… noreply 101 ...
f4e202a… noreply 102
f4e202a… noreply 103 @abstractmethod
f4e202a… noreply 104 def has_relationship(
f4e202a… noreply 105 self,
f4e202a… noreply 106 source: str,
f4e202a… noreply 107 target: str,
f4e202a… noreply 108 edge_label: Optional[str] = None,
f4e202a… noreply 109 ) -> bool:
f4e202a… noreply 110 """Check if a relationship exists between two entities.
f4e202a… noreply 111
f4e202a… noreply 112 If edge_label is None, checks for any relationship type.
f4e202a… noreply 113 """
f4e202a… noreply 114 ...
f4e202a… noreply 115
0981a08… noreply 116 def register_source(self, source: Dict[str, Any]) -> None:
0981a08… noreply 117 """Register a content source. Default no-op for backends that don't support it."""
0981a08… noreply 118 pass
0981a08… noreply 119
0981a08… noreply 120 def get_sources(self) -> List[Dict[str, Any]]:
0981a08… noreply 121 """Return all registered sources."""
0981a08… noreply 122 return []
0981a08… noreply 123
0981a08… noreply 124 def get_source(self, source_id: str) -> Optional[Dict[str, Any]]:
0981a08… noreply 125 """Get a source by ID."""
0981a08… noreply 126 return None
0981a08… noreply 127
0981a08… noreply 128 def add_source_location(
0981a08… noreply 129 self,
0981a08… noreply 130 source_id: str,
0981a08… noreply 131 entity_name_lower: Optional[str] = None,
0981a08… noreply 132 relationship_id: Optional[int] = None,
0981a08… noreply 133 **kwargs,
0981a08… noreply 134 ) -> None:
0981a08… noreply 135 """Link a source to an entity or relationship with location details."""
0981a08… noreply 136 pass
0981a08… noreply 137
0981a08… noreply 138 def get_entity_provenance(self, name: str) -> List[Dict[str, Any]]:
0981a08… noreply 139 """Get all source locations for an entity."""
0981a08… noreply 140 return []
0981a08… noreply 141
b363c5b… noreply 142 def raw_query(self, query_string: str) -> Any:
0981a08… noreply 143 """Execute a raw query against the backend (e.g. SQL for SQLite).
b363c5b… noreply 144
b363c5b… noreply 145 Not supported by all backends — raises NotImplementedError by default.
b363c5b… noreply 146 """
b363c5b… noreply 147 raise NotImplementedError(f"{type(self).__name__} does not support raw queries")
b363c5b… noreply 148
0ad36b7… noreply 149 def to_dict(self) -> Dict[str, Any]:
0ad36b7… noreply 150 """Export to JSON-compatible dict matching knowledge_graph.json format."""
0ad36b7… noreply 151 entities = self.get_all_entities()
0ad36b7… noreply 152 nodes = []
0ad36b7… noreply 153 for e in entities:
0ad36b7… noreply 154 descs = e.get("descriptions", [])
0ad36b7… noreply 155 if isinstance(descs, set):
0ad36b7… noreply 156 descs = list(descs)
0ad36b7… noreply 157 nodes.append(
0ad36b7… noreply 158 {
0ad36b7… noreply 159 "id": e.get("id", e["name"]),
0ad36b7… noreply 160 "name": e["name"],
0ad36b7… noreply 161 "type": e.get("type", "concept"),
0ad36b7… noreply 162 "descriptions": descs,
0ad36b7… noreply 163 "occurrences": e.get("occurrences", []),
0ad36b7… noreply 164 }
0ad36b7… noreply 165 )
0981a08… noreply 166 result = {"nodes": nodes, "relationships": self.get_all_relationships()}
0981a08… noreply 167 sources = self.get_sources()
0981a08… noreply 168 if sources:
0981a08… noreply 169 result["sources"] = sources
0981a08… noreply 170 return result
0ad36b7… noreply 171
0ad36b7… noreply 172
0ad36b7… noreply 173 class InMemoryStore(GraphStore):
0ad36b7… noreply 174 """In-memory graph store using Python dicts. Default fallback."""
0ad36b7… noreply 175
0ad36b7… noreply 176 def __init__(self) -> None:
0ad36b7… noreply 177 self._nodes: Dict[str, Dict[str, Any]] = {} # keyed by name.lower()
0ad36b7… noreply 178 self._relationships: List[Dict[str, Any]] = []
0981a08… noreply 179 self._sources: Dict[str, Dict[str, Any]] = {} # keyed by source_id
0981a08… noreply 180 self._source_locations: List[Dict[str, Any]] = []
0ad36b7… noreply 181
0ad36b7… noreply 182 def merge_entity(
0ad36b7… noreply 183 self,
0ad36b7… noreply 184 name: str,
0ad36b7… noreply 185 entity_type: str,
0ad36b7… noreply 186 descriptions: List[str],
0ad36b7… noreply 187 source: Optional[str] = None,
0ad36b7… noreply 188 ) -> None:
0ad36b7… noreply 189 key = name.lower()
0ad36b7… noreply 190 if key in self._nodes:
0ad36b7… noreply 191 if descriptions:
0ad36b7… noreply 192 self._nodes[key]["descriptions"].update(descriptions)
0981a08… noreply 193 if entity_type and entity_type != self._nodes[key]["type"]:
0981a08… noreply 194 self._nodes[key]["type"] = entity_type
0ad36b7… noreply 195 else:
0ad36b7… noreply 196 self._nodes[key] = {
0ad36b7… noreply 197 "id": name,
0ad36b7… noreply 198 "name": name,
0ad36b7… noreply 199 "type": entity_type,
0ad36b7… noreply 200 "descriptions": set(descriptions),
0ad36b7… noreply 201 "occurrences": [],
0ad36b7… noreply 202 "source": source,
0ad36b7… noreply 203 }
0ad36b7… noreply 204
0ad36b7… noreply 205 def add_occurrence(
0ad36b7… noreply 206 self,
0ad36b7… noreply 207 entity_name: str,
0ad36b7… noreply 208 source: str,
0ad36b7… noreply 209 timestamp: Optional[float] = None,
0ad36b7… noreply 210 text: Optional[str] = None,
0ad36b7… noreply 211 ) -> None:
0ad36b7… noreply 212 key = entity_name.lower()
0ad36b7… noreply 213 if key in self._nodes:
0ad36b7… noreply 214 self._nodes[key]["occurrences"].append(
0ad36b7… noreply 215 {"source": source, "timestamp": timestamp, "text": text}
0ad36b7… noreply 216 )
0ad36b7… noreply 217
0ad36b7… noreply 218 def add_relationship(
0ad36b7… noreply 219 self,
0ad36b7… noreply 220 source: str,
0ad36b7… noreply 221 target: str,
0ad36b7… noreply 222 rel_type: str,
0ad36b7… noreply 223 content_source: Optional[str] = None,
0ad36b7… noreply 224 timestamp: Optional[float] = None,
0ad36b7… noreply 225 ) -> None:
0ad36b7… noreply 226 self._relationships.append(
0ad36b7… noreply 227 {
0ad36b7… noreply 228 "source": source,
0ad36b7… noreply 229 "target": target,
0ad36b7… noreply 230 "type": rel_type,
0ad36b7… noreply 231 "content_source": content_source,
0ad36b7… noreply 232 "timestamp": timestamp,
0ad36b7… noreply 233 }
0ad36b7… noreply 234 )
0ad36b7… noreply 235
0ad36b7… noreply 236 def get_entity(self, name: str) -> Optional[Dict[str, Any]]:
0ad36b7… noreply 237 return self._nodes.get(name.lower())
0ad36b7… noreply 238
0ad36b7… noreply 239 def get_all_entities(self) -> List[Dict[str, Any]]:
0ad36b7… noreply 240 return list(self._nodes.values())
0ad36b7… noreply 241
0ad36b7… noreply 242 def get_all_relationships(self) -> List[Dict[str, Any]]:
0ad36b7… noreply 243 return list(self._relationships)
0ad36b7… noreply 244
0ad36b7… noreply 245 def get_entity_count(self) -> int:
0ad36b7… noreply 246 return len(self._nodes)
0ad36b7… noreply 247
0ad36b7… noreply 248 def get_relationship_count(self) -> int:
0ad36b7… noreply 249 return len(self._relationships)
0ad36b7… noreply 250
0ad36b7… noreply 251 def has_entity(self, name: str) -> bool:
0ad36b7… noreply 252 return name.lower() in self._nodes
0ad36b7… noreply 253
f4e202a… noreply 254 def add_typed_relationship(
f4e202a… noreply 255 self,
f4e202a… noreply 256 source: str,
f4e202a… noreply 257 target: str,
f4e202a… noreply 258 edge_label: str,
f4e202a… noreply 259 properties: Optional[Dict[str, Any]] = None,
f4e202a… noreply 260 ) -> None:
f4e202a… noreply 261 entry: Dict[str, Any] = {
f4e202a… noreply 262 "source": source,
f4e202a… noreply 263 "target": target,
f4e202a… noreply 264 "type": edge_label,
f4e202a… noreply 265 }
f4e202a… noreply 266 if properties:
f4e202a… noreply 267 entry.update(properties)
f4e202a… noreply 268 self._relationships.append(entry)
f4e202a… noreply 269
f4e202a… noreply 270 def set_entity_properties(
f4e202a… noreply 271 self,
f4e202a… noreply 272 name: str,
f4e202a… noreply 273 properties: Dict[str, Any],
f4e202a… noreply 274 ) -> bool:
f4e202a… noreply 275 key = name.lower()
f4e202a… noreply 276 if key not in self._nodes:
f4e202a… noreply 277 return False
f4e202a… noreply 278 self._nodes[key].update(properties)
f4e202a… noreply 279 return True
f4e202a… noreply 280
0981a08… noreply 281 def register_source(self, source: Dict[str, Any]) -> None:
0981a08… noreply 282 source_id = source.get("source_id", "")
0981a08… noreply 283 self._sources[source_id] = dict(source)
0981a08… noreply 284
0981a08… noreply 285 def get_sources(self) -> List[Dict[str, Any]]:
0981a08… noreply 286 return list(self._sources.values())
0981a08… noreply 287
0981a08… noreply 288 def get_source(self, source_id: str) -> Optional[Dict[str, Any]]:
0981a08… noreply 289 return self._sources.get(source_id)
0981a08… noreply 290
0981a08… noreply 291 def add_source_location(
0981a08… noreply 292 self,
0981a08… noreply 293 source_id: str,
0981a08… noreply 294 entity_name_lower: Optional[str] = None,
0981a08… noreply 295 relationship_id: Optional[int] = None,
0981a08… noreply 296 **kwargs,
0981a08… noreply 297 ) -> None:
0981a08… noreply 298 entry: Dict[str, Any] = {
0981a08… noreply 299 "source_id": source_id,
0981a08… noreply 300 "entity_name_lower": entity_name_lower,
0981a08… noreply 301 "relationship_id": relationship_id,
0981a08… noreply 302 }
0981a08… noreply 303 entry.update(kwargs)
0981a08… noreply 304 self._source_locations.append(entry)
0981a08… noreply 305
0981a08… noreply 306 def get_entity_provenance(self, name: str) -> List[Dict[str, Any]]:
0981a08… noreply 307 name_lower = name.lower()
0981a08… noreply 308 results = []
0981a08… noreply 309 for loc in self._source_locations:
0981a08… noreply 310 if loc.get("entity_name_lower") == name_lower:
0981a08… noreply 311 entry = dict(loc)
0981a08… noreply 312 src = self._sources.get(loc.get("source_id", ""))
0981a08… noreply 313 if src:
0981a08… noreply 314 entry["source"] = src
0981a08… noreply 315 results.append(entry)
0981a08… noreply 316 return results
0981a08… noreply 317
f4e202a… noreply 318 def has_relationship(
f4e202a… noreply 319 self,
f4e202a… noreply 320 source: str,
f4e202a… noreply 321 target: str,
f4e202a… noreply 322 edge_label: Optional[str] = None,
f4e202a… noreply 323 ) -> bool:
f4e202a… noreply 324 src_lower = source.lower()
f4e202a… noreply 325 tgt_lower = target.lower()
f4e202a… noreply 326 for rel in self._relationships:
f4e202a… noreply 327 if rel["source"].lower() == src_lower and rel["target"].lower() == tgt_lower:
f4e202a… noreply 328 if edge_label is None or rel.get("type") == edge_label:
f4e202a… noreply 329 return True
f4e202a… noreply 330 return False
f4e202a… noreply 331
f4e202a… noreply 332
0981a08… noreply 333 class SQLiteStore(GraphStore):
0981a08… noreply 334 """SQLite-backed graph store. Uses Python's built-in sqlite3 module."""
0981a08… noreply 335
0981a08… noreply 336 _SCHEMA = """
0981a08… noreply 337 CREATE TABLE IF NOT EXISTS entities (
0981a08… noreply 338 name TEXT NOT NULL,
0981a08… noreply 339 name_lower TEXT NOT NULL UNIQUE,
0981a08… noreply 340 type TEXT NOT NULL DEFAULT 'concept',
0981a08… noreply 341 descriptions TEXT NOT NULL DEFAULT '[]',
0981a08… noreply 342 source TEXT,
0981a08… noreply 343 properties TEXT NOT NULL DEFAULT '{}'
0981a08… noreply 344 );
0981a08… noreply 345 CREATE TABLE IF NOT EXISTS occurrences (
0981a08… noreply 346 entity_name_lower TEXT NOT NULL,
0981a08… noreply 347 source TEXT NOT NULL,
0981a08… noreply 348 timestamp REAL,
0981a08… noreply 349 text TEXT,
0981a08… noreply 350 FOREIGN KEY (entity_name_lower) REFERENCES entities(name_lower)
0981a08… noreply 351 );
0981a08… noreply 352 CREATE TABLE IF NOT EXISTS relationships (
0981a08… noreply 353 source TEXT NOT NULL,
0981a08… noreply 354 target TEXT NOT NULL,
0981a08… noreply 355 type TEXT NOT NULL DEFAULT 'related_to',
0981a08… noreply 356 content_source TEXT,
0981a08… noreply 357 timestamp REAL,
0981a08… noreply 358 properties TEXT NOT NULL DEFAULT '{}'
0981a08… noreply 359 );
0981a08… noreply 360 CREATE INDEX IF NOT EXISTS idx_entities_name_lower ON entities(name_lower);
0981a08… noreply 361 CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(type);
0981a08… noreply 362 CREATE INDEX IF NOT EXISTS idx_occurrences_entity ON occurrences(entity_name_lower);
0981a08… noreply 363 CREATE INDEX IF NOT EXISTS idx_relationships_source ON relationships(source);
0981a08… noreply 364 CREATE INDEX IF NOT EXISTS idx_relationships_target ON relationships(target);
0981a08… noreply 365
0981a08… noreply 366 CREATE TABLE IF NOT EXISTS sources (
0981a08… noreply 367 source_id TEXT PRIMARY KEY,
0981a08… noreply 368 source_type TEXT NOT NULL,
0981a08… noreply 369 title TEXT NOT NULL,
0981a08… noreply 370 path TEXT,
0981a08… noreply 371 url TEXT,
0981a08… noreply 372 mime_type TEXT,
0981a08… noreply 373 ingested_at TEXT NOT NULL,
0981a08… noreply 374 metadata TEXT NOT NULL DEFAULT '{}'
0981a08… noreply 375 );
0981a08… noreply 376 CREATE TABLE IF NOT EXISTS source_locations (
0981a08… noreply 377 id INTEGER PRIMARY KEY AUTOINCREMENT,
0981a08… noreply 378 source_id TEXT NOT NULL REFERENCES sources(source_id),
0981a08… noreply 379 entity_name_lower TEXT,
0981a08… noreply 380 relationship_id INTEGER,
0981a08… noreply 381 timestamp REAL,
0981a08… noreply 382 page INTEGER,
0981a08… noreply 383 section TEXT,
0981a08… noreply 384 line_start INTEGER,
0981a08… noreply 385 line_end INTEGER,
0981a08… noreply 386 text_snippet TEXT
0981a08… noreply 387 );
0981a08… noreply 388 CREATE INDEX IF NOT EXISTS idx_source_locations_source ON source_locations(source_id);
0981a08… noreply 389 CREATE INDEX IF NOT EXISTS idx_source_locations_entity
0981a08… noreply 390 ON source_locations(entity_name_lower);
0981a08… noreply 391 """
0ad36b7… noreply 392
0ad36b7… noreply 393 def __init__(self, db_path: Union[str, Path]) -> None:
0ad36b7… noreply 394 self._db_path = str(db_path)
0981a08… noreply 395 self._conn = sqlite3.connect(self._db_path)
0981a08… noreply 396 self._conn.execute("PRAGMA journal_mode=WAL")
0981a08… noreply 397 self._conn.execute("PRAGMA foreign_keys=ON")
0981a08… noreply 398 self._conn.executescript(self._SCHEMA)
0981a08… noreply 399 self._conn.commit()
0ad36b7… noreply 400
0ad36b7… noreply 401 def merge_entity(
0ad36b7… noreply 402 self,
0ad36b7… noreply 403 name: str,
0ad36b7… noreply 404 entity_type: str,
0ad36b7… noreply 405 descriptions: List[str],
0ad36b7… noreply 406 source: Optional[str] = None,
0ad36b7… noreply 407 ) -> None:
0ad36b7… noreply 408 name_lower = name.lower()
0981a08… noreply 409 row = self._conn.execute(
0981a08… noreply 410 "SELECT descriptions FROM entities WHERE name_lower = ?",
0981a08… noreply 411 (name_lower,),
0981a08… noreply 412 ).fetchone()
0981a08… noreply 413
0981a08… noreply 414 if row:
0981a08… noreply 415 existing = json.loads(row[0])
0981a08… noreply 416 merged = list(set(existing + descriptions))
0981a08… noreply 417 self._conn.execute(
0981a08… noreply 418 "UPDATE entities SET descriptions = ?, type = ? WHERE name_lower = ?",
0981a08… noreply 419 (json.dumps(merged), entity_type, name_lower),
0ad36b7… noreply 420 )
0ad36b7… noreply 421 else:
0981a08… noreply 422 self._conn.execute(
0981a08… noreply 423 "INSERT INTO entities (name, name_lower, type, descriptions, source) "
0981a08… noreply 424 "VALUES (?, ?, ?, ?, ?)",
0981a08… noreply 425 (name, name_lower, entity_type, json.dumps(descriptions), source),
0981a08… noreply 426 )
0981a08… noreply 427 self._conn.commit()
0ad36b7… noreply 428
0ad36b7… noreply 429 def add_occurrence(
0ad36b7… noreply 430 self,
0ad36b7… noreply 431 entity_name: str,
0ad36b7… noreply 432 source: str,
0ad36b7… noreply 433 timestamp: Optional[float] = None,
0ad36b7… noreply 434 text: Optional[str] = None,
0ad36b7… noreply 435 ) -> None:
0ad36b7… noreply 436 name_lower = entity_name.lower()
0981a08… noreply 437 exists = self._conn.execute(
0981a08… noreply 438 "SELECT 1 FROM entities WHERE name_lower = ?", (name_lower,)
0981a08… noreply 439 ).fetchone()
0981a08… noreply 440 if not exists:
0981a08… noreply 441 return
0981a08… noreply 442 self._conn.execute(
0981a08… noreply 443 "INSERT INTO occurrences (entity_name_lower, source, timestamp, text) "
0981a08… noreply 444 "VALUES (?, ?, ?, ?)",
0981a08… noreply 445 (name_lower, source, timestamp, text),
0981a08… noreply 446 )
0981a08… noreply 447 self._conn.commit()
0ad36b7… noreply 448
0ad36b7… noreply 449 def add_relationship(
0ad36b7… noreply 450 self,
0ad36b7… noreply 451 source: str,
0ad36b7… noreply 452 target: str,
0ad36b7… noreply 453 rel_type: str,
0ad36b7… noreply 454 content_source: Optional[str] = None,
0ad36b7… noreply 455 timestamp: Optional[float] = None,
0ad36b7… noreply 456 ) -> None:
0981a08… noreply 457 self._conn.execute(
0981a08… noreply 458 "INSERT INTO relationships (source, target, type, content_source, timestamp) "
0981a08… noreply 459 "VALUES (?, ?, ?, ?, ?)",
0981a08… noreply 460 (source, target, rel_type, content_source, timestamp),
0981a08… noreply 461 )
0981a08… noreply 462 self._conn.commit()
0ad36b7… noreply 463
0ad36b7… noreply 464 def get_entity(self, name: str) -> Optional[Dict[str, Any]]:
0981a08… noreply 465 row = self._conn.execute(
0981a08… noreply 466 "SELECT name, type, descriptions, source FROM entities WHERE name_lower = ?",
0981a08… noreply 467 (name.lower(),),
0981a08… noreply 468 ).fetchone()
0981a08… noreply 469 if not row:
0ad36b7… noreply 470 return None
0ad36b7… noreply 471
0ad36b7… noreply 472 entity_name = row[0]
0981a08… noreply 473 occ_rows = self._conn.execute(
0981a08… noreply 474 "SELECT source, timestamp, text FROM occurrences WHERE entity_name_lower = ?",
0981a08… noreply 475 (name.lower(),),
0981a08… noreply 476 ).fetchall()
0981a08… noreply 477 occurrences = [{"source": o[0], "timestamp": o[1], "text": o[2]} for o in occ_rows]
0ad36b7… noreply 478
0ad36b7… noreply 479 return {
0ad36b7… noreply 480 "id": entity_name,
0ad36b7… noreply 481 "name": entity_name,
0ad36b7… noreply 482 "type": row[1] or "concept",
0981a08… noreply 483 "descriptions": json.loads(row[2]) if row[2] else [],
0ad36b7… noreply 484 "occurrences": occurrences,
0ad36b7… noreply 485 "source": row[3],
0ad36b7… noreply 486 }
0ad36b7… noreply 487
0ad36b7… noreply 488 def get_all_entities(self) -> List[Dict[str, Any]]:
0981a08… noreply 489 rows = self._conn.execute(
0981a08… noreply 490 "SELECT name, name_lower, type, descriptions, source FROM entities"
0981a08… noreply 491 ).fetchall()
0ad36b7… noreply 492 entities = []
0981a08… noreply 493 for row in rows:
0ad36b7… noreply 494 name_lower = row[1]
0981a08… noreply 495 occ_rows = self._conn.execute(
0981a08… noreply 496 "SELECT source, timestamp, text FROM occurrences WHERE entity_name_lower = ?",
0981a08… noreply 497 (name_lower,),
0981a08… noreply 498 ).fetchall()
0981a08… noreply 499 occurrences = [{"source": o[0], "timestamp": o[1], "text": o[2]} for o in occ_rows]
0ad36b7… noreply 500 entities.append(
0ad36b7… noreply 501 {
0ad36b7… noreply 502 "id": row[0],
0ad36b7… noreply 503 "name": row[0],
0ad36b7… noreply 504 "type": row[2] or "concept",
0981a08… noreply 505 "descriptions": json.loads(row[3]) if row[3] else [],
0ad36b7… noreply 506 "occurrences": occurrences,
0ad36b7… noreply 507 "source": row[4],
0ad36b7… noreply 508 }
0ad36b7… noreply 509 )
0ad36b7… noreply 510 return entities
0ad36b7… noreply 511
0ad36b7… noreply 512 def get_all_relationships(self) -> List[Dict[str, Any]]:
0981a08… noreply 513 rows = self._conn.execute(
0981a08… noreply 514 "SELECT source, target, type, content_source, timestamp FROM relationships"
0981a08… noreply 515 ).fetchall()
0ad36b7… noreply 516 return [
0ad36b7… noreply 517 {
0ad36b7… noreply 518 "source": row[0],
0ad36b7… noreply 519 "target": row[1],
0ad36b7… noreply 520 "type": row[2] or "related_to",
0ad36b7… noreply 521 "content_source": row[3],
0ad36b7… noreply 522 "timestamp": row[4],
0ad36b7… noreply 523 }
0981a08… noreply 524 for row in rows
0ad36b7… noreply 525 ]
0ad36b7… noreply 526
0ad36b7… noreply 527 def get_entity_count(self) -> int:
0981a08… noreply 528 row = self._conn.execute("SELECT COUNT(*) FROM entities").fetchone()
0981a08… noreply 529 return row[0] if row else 0
0ad36b7… noreply 530
0ad36b7… noreply 531 def get_relationship_count(self) -> int:
0981a08… noreply 532 row = self._conn.execute("SELECT COUNT(*) FROM relationships").fetchone()
0981a08… noreply 533 return row[0] if row else 0
0ad36b7… noreply 534
0ad36b7… noreply 535 def has_entity(self, name: str) -> bool:
0981a08… noreply 536 row = self._conn.execute(
0981a08… noreply 537 "SELECT 1 FROM entities WHERE name_lower = ?", (name.lower(),)
0981a08… noreply 538 ).fetchone()
0981a08… noreply 539 return row is not None
b363c5b… noreply 540
b363c5b… noreply 541 def raw_query(self, query_string: str) -> Any:
0981a08… noreply 542 """Execute a raw SQL query and return all rows."""
0981a08… noreply 543 cursor = self._conn.execute(query_string)
0981a08… noreply 544 return cursor.fetchall()
f4e202a… noreply 545
f4e202a… noreply 546 def add_typed_relationship(
f4e202a… noreply 547 self,
f4e202a… noreply 548 source: str,
f4e202a… noreply 549 target: str,
f4e202a… noreply 550 edge_label: str,
f4e202a… noreply 551 properties: Optional[Dict[str, Any]] = None,
f4e202a… noreply 552 ) -> None:
0981a08… noreply 553 self._conn.execute(
0981a08… noreply 554 "INSERT INTO relationships (source, target, type, properties) VALUES (?, ?, ?, ?)",
0981a08… noreply 555 (source, target, edge_label, json.dumps(properties or {})),
0981a08… noreply 556 )
0981a08… noreply 557 self._conn.commit()
f4e202a… noreply 558
f4e202a… noreply 559 def set_entity_properties(
f4e202a… noreply 560 self,
f4e202a… noreply 561 name: str,
f4e202a… noreply 562 properties: Dict[str, Any],
f4e202a… noreply 563 ) -> bool:
f4e202a… noreply 564 name_lower = name.lower()
f4e202a… noreply 565 if not self.has_entity(name):
f4e202a… noreply 566 return False
0981a08… noreply 567 if not properties:
f4e202a… noreply 568 return True
0981a08… noreply 569 row = self._conn.execute(
0981a08… noreply 570 "SELECT properties FROM entities WHERE name_lower = ?", (name_lower,)
0981a08… noreply 571 ).fetchone()
0981a08… noreply 572 existing = json.loads(row[0]) if row and row[0] else {}
0981a08… noreply 573 existing.update(properties)
0981a08… noreply 574 self._conn.execute(
0981a08… noreply 575 "UPDATE entities SET properties = ? WHERE name_lower = ?",
0981a08… noreply 576 (json.dumps(existing), name_lower),
0981a08… noreply 577 )
0981a08… noreply 578 self._conn.commit()
f4e202a… noreply 579 return True
f4e202a… noreply 580
f4e202a… noreply 581 def has_relationship(
f4e202a… noreply 582 self,
f4e202a… noreply 583 source: str,
f4e202a… noreply 584 target: str,
f4e202a… noreply 585 edge_label: Optional[str] = None,
f4e202a… noreply 586 ) -> bool:
f4e202a… noreply 587 if edge_label:
0981a08… noreply 588 row = self._conn.execute(
0981a08… noreply 589 "SELECT 1 FROM relationships "
0981a08… noreply 590 "WHERE LOWER(source) = ? AND LOWER(target) = ? AND type = ?",
0981a08… noreply 591 (source.lower(), target.lower(), edge_label),
0981a08… noreply 592 ).fetchone()
0981a08… noreply 593 else:
0981a08… noreply 594 row = self._conn.execute(
0981a08… noreply 595 "SELECT 1 FROM relationships WHERE LOWER(source) = ? AND LOWER(target) = ?",
0981a08… noreply 596 (source.lower(), target.lower()),
0981a08… noreply 597 ).fetchone()
0981a08… noreply 598 return row is not None
0981a08… noreply 599
0981a08… noreply 600 def register_source(self, source: Dict[str, Any]) -> None:
0981a08… noreply 601 source_id = source.get("source_id", "")
0981a08… noreply 602 existing = self._conn.execute(
0981a08… noreply 603 "SELECT 1 FROM sources WHERE source_id = ?", (source_id,)
0981a08… noreply 604 ).fetchone()
0981a08… noreply 605 if existing:
0981a08… noreply 606 self._conn.execute(
0981a08… noreply 607 "UPDATE sources SET source_type = ?, title = ?, path = ?, url = ?, "
0981a08… noreply 608 "mime_type = ?, ingested_at = ?, metadata = ? WHERE source_id = ?",
0981a08… noreply 609 (
0981a08… noreply 610 source.get("source_type", ""),
0981a08… noreply 611 source.get("title", ""),
0981a08… noreply 612 source.get("path"),
0981a08… noreply 613 source.get("url"),
0981a08… noreply 614 source.get("mime_type"),
0981a08… noreply 615 source.get("ingested_at", ""),
0981a08… noreply 616 json.dumps(source.get("metadata", {})),
0981a08… noreply 617 source_id,
0981a08… noreply 618 ),
f4e202a… noreply 619 )
f4e202a… noreply 620 else:
0981a08… noreply 621 self._conn.execute(
0981a08… noreply 622 "INSERT INTO sources (source_id, source_type, title, path, url, "
0981a08… noreply 623 "mime_type, ingested_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
0981a08… noreply 624 (
0981a08… noreply 625 source_id,
0981a08… noreply 626 source.get("source_type", ""),
0981a08… noreply 627 source.get("title", ""),
0981a08… noreply 628 source.get("path"),
0981a08… noreply 629 source.get("url"),
0981a08… noreply 630 source.get("mime_type"),
0981a08… noreply 631 source.get("ingested_at", ""),
0981a08… noreply 632 json.dumps(source.get("metadata", {})),
0981a08… noreply 633 ),
0981a08… noreply 634 )
0981a08… noreply 635 self._conn.commit()
0981a08… noreply 636
0981a08… noreply 637 def get_sources(self) -> List[Dict[str, Any]]:
0981a08… noreply 638 rows = self._conn.execute(
0981a08… noreply 639 "SELECT source_id, source_type, title, path, url, mime_type, "
0981a08… noreply 640 "ingested_at, metadata FROM sources"
0981a08… noreply 641 ).fetchall()
0981a08… noreply 642 return [
0981a08… noreply 643 {
0981a08… noreply 644 "source_id": r[0],
0981a08… noreply 645 "source_type": r[1],
0981a08… noreply 646 "title": r[2],
0981a08… noreply 647 "path": r[3],
0981a08… noreply 648 "url": r[4],
0981a08… noreply 649 "mime_type": r[5],
0981a08… noreply 650 "ingested_at": r[6],
0981a08… noreply 651 "metadata": json.loads(r[7]) if r[7] else {},
0981a08… noreply 652 }
0981a08… noreply 653 for r in rows
0981a08… noreply 654 ]
0981a08… noreply 655
0981a08… noreply 656 def get_source(self, source_id: str) -> Optional[Dict[str, Any]]:
0981a08… noreply 657 row = self._conn.execute(
0981a08… noreply 658 "SELECT source_id, source_type, title, path, url, mime_type, "
0981a08… noreply 659 "ingested_at, metadata FROM sources WHERE source_id = ?",
0981a08… noreply 660 (source_id,),
0981a08… noreply 661 ).fetchone()
0981a08… noreply 662 if not row:
0981a08… noreply 663 return None
0981a08… noreply 664 return {
0981a08… noreply 665 "source_id": row[0],
0981a08… noreply 666 "source_type": row[1],
0981a08… noreply 667 "title": row[2],
0981a08… noreply 668 "path": row[3],
0981a08… noreply 669 "url": row[4],
0981a08… noreply 670 "mime_type": row[5],
0981a08… noreply 671 "ingested_at": row[6],
0981a08… noreply 672 "metadata": json.loads(row[7]) if row[7] else {},
0981a08… noreply 673 }
0981a08… noreply 674
0981a08… noreply 675 def add_source_location(
0981a08… noreply 676 self,
0981a08… noreply 677 source_id: str,
0981a08… noreply 678 entity_name_lower: Optional[str] = None,
0981a08… noreply 679 relationship_id: Optional[int] = None,
0981a08… noreply 680 **kwargs,
0981a08… noreply 681 ) -> None:
0981a08… noreply 682 self._conn.execute(
0981a08… noreply 683 "INSERT INTO source_locations (source_id, entity_name_lower, relationship_id, "
0981a08… noreply 684 "timestamp, page, section, line_start, line_end, text_snippet) "
0981a08… noreply 685 "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
0981a08… noreply 686 (
0981a08… noreply 687 source_id,
0981a08… noreply 688 entity_name_lower,
0981a08… noreply 689 relationship_id,
0981a08… noreply 690 kwargs.get("timestamp"),
0981a08… noreply 691 kwargs.get("page"),
0981a08… noreply 692 kwargs.get("section"),
0981a08… noreply 693 kwargs.get("line_start"),
0981a08… noreply 694 kwargs.get("line_end"),
0981a08… noreply 695 kwargs.get("text_snippet"),
0981a08… noreply 696 ),
0981a08… noreply 697 )
0981a08… noreply 698 self._conn.commit()
0981a08… noreply 699
0981a08… noreply 700 def get_entity_provenance(self, name: str) -> List[Dict[str, Any]]:
0981a08… noreply 701 name_lower = name.lower()
0981a08… noreply 702 rows = self._conn.execute(
0981a08… noreply 703 "SELECT sl.source_id, sl.entity_name_lower, sl.relationship_id, "
0981a08… noreply 704 "sl.timestamp, sl.page, sl.section, sl.line_start, sl.line_end, "
0981a08… noreply 705 "sl.text_snippet, s.source_type, s.title, s.path, s.url, s.mime_type, "
0981a08… noreply 706 "s.ingested_at, s.metadata "
0981a08… noreply 707 "FROM source_locations sl "
0981a08… noreply 708 "JOIN sources s ON sl.source_id = s.source_id "
0981a08… noreply 709 "WHERE sl.entity_name_lower = ?",
0981a08… noreply 710 (name_lower,),
0981a08… noreply 711 ).fetchall()
0981a08… noreply 712 results = []
0981a08… noreply 713 for r in rows:
0981a08… noreply 714 results.append(
0981a08… noreply 715 {
0981a08… noreply 716 "source_id": r[0],
0981a08… noreply 717 "entity_name_lower": r[1],
0981a08… noreply 718 "relationship_id": r[2],
0981a08… noreply 719 "timestamp": r[3],
0981a08… noreply 720 "page": r[4],
0981a08… noreply 721 "section": r[5],
0981a08… noreply 722 "line_start": r[6],
0981a08… noreply 723 "line_end": r[7],
0981a08… noreply 724 "text_snippet": r[8],
0981a08… noreply 725 "source": {
0981a08… noreply 726 "source_id": r[0],
0981a08… noreply 727 "source_type": r[9],
0981a08… noreply 728 "title": r[10],
0981a08… noreply 729 "path": r[11],
0981a08… noreply 730 "url": r[12],
0981a08… noreply 731 "mime_type": r[13],
0981a08… noreply 732 "ingested_at": r[14],
0981a08… noreply 733 "metadata": json.loads(r[15]) if r[15] else {},
0981a08… noreply 734 },
0981a08… noreply 735 }
0981a08… noreply 736 )
0981a08… noreply 737 return results
0ad36b7… noreply 738
0ad36b7… noreply 739 def close(self) -> None:
0981a08… noreply 740 """Close the SQLite connection."""
0981a08… noreply 741 if self._conn:
0981a08… noreply 742 self._conn.close()
0981a08… noreply 743 self._conn = None
0ad36b7… noreply 744
0ad36b7… noreply 745
0ad36b7… noreply 746 def create_store(db_path: Optional[Union[str, Path]] = None) -> GraphStore:
0ad36b7… noreply 747 """Create the best available graph store.
0ad36b7… noreply 748
0981a08… noreply 749 If db_path is provided, uses SQLiteStore for persistent storage.
0981a08… noreply 750 Otherwise returns an InMemoryStore.
0ad36b7… noreply 751 """
0ad36b7… noreply 752 if db_path is not None:
0ad36b7… noreply 753 try:
0981a08… noreply 754 return SQLiteStore(db_path)
0ad36b7… noreply 755 except Exception as e:
0981a08… noreply 756 logger.warning(f"Failed to initialize SQLite at {db_path}: {e}. Using in-memory store.")
0ad36b7… noreply 757 return InMemoryStore()

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button