|
0ad36b7…
|
noreply
|
1 |
"""Graph storage backends for PlanOpticon knowledge graphs.""" |
|
0ad36b7…
|
noreply
|
2 |
|
|
0981a08…
|
noreply
|
3 |
import json |
|
0ad36b7…
|
noreply
|
4 |
import logging |
|
0981a08…
|
noreply
|
5 |
import sqlite3 |
|
0ad36b7…
|
noreply
|
6 |
from abc import ABC, abstractmethod |
|
0ad36b7…
|
noreply
|
7 |
from pathlib import Path |
|
0ad36b7…
|
noreply
|
8 |
from typing import Any, Dict, List, Optional, Union |
|
0ad36b7…
|
noreply
|
9 |
|
|
0ad36b7…
|
noreply
|
10 |
logger = logging.getLogger(__name__) |
|
0ad36b7…
|
noreply
|
11 |
|
|
0ad36b7…
|
noreply
|
12 |
|
|
0ad36b7…
|
noreply
|
13 |
class GraphStore(ABC): |
|
0ad36b7…
|
noreply
|
14 |
"""Abstract interface for knowledge graph storage backends.""" |
|
0ad36b7…
|
noreply
|
15 |
|
|
0ad36b7…
|
noreply
|
16 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
17 |
def merge_entity( |
|
0ad36b7…
|
noreply
|
18 |
self, |
|
0ad36b7…
|
noreply
|
19 |
name: str, |
|
0ad36b7…
|
noreply
|
20 |
entity_type: str, |
|
0ad36b7…
|
noreply
|
21 |
descriptions: List[str], |
|
0ad36b7…
|
noreply
|
22 |
source: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
23 |
) -> None: |
|
0ad36b7…
|
noreply
|
24 |
"""Upsert an entity by case-insensitive name.""" |
|
0ad36b7…
|
noreply
|
25 |
... |
|
0ad36b7…
|
noreply
|
26 |
|
|
0ad36b7…
|
noreply
|
27 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
28 |
def add_occurrence( |
|
0ad36b7…
|
noreply
|
29 |
self, |
|
0ad36b7…
|
noreply
|
30 |
entity_name: str, |
|
0ad36b7…
|
noreply
|
31 |
source: str, |
|
0ad36b7…
|
noreply
|
32 |
timestamp: Optional[float] = None, |
|
0ad36b7…
|
noreply
|
33 |
text: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
34 |
) -> None: |
|
0ad36b7…
|
noreply
|
35 |
"""Add an occurrence record to an existing entity.""" |
|
0ad36b7…
|
noreply
|
36 |
... |
|
0ad36b7…
|
noreply
|
37 |
|
|
0ad36b7…
|
noreply
|
38 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
39 |
def add_relationship( |
|
0ad36b7…
|
noreply
|
40 |
self, |
|
0ad36b7…
|
noreply
|
41 |
source: str, |
|
0ad36b7…
|
noreply
|
42 |
target: str, |
|
0ad36b7…
|
noreply
|
43 |
rel_type: str, |
|
0ad36b7…
|
noreply
|
44 |
content_source: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
45 |
timestamp: Optional[float] = None, |
|
0ad36b7…
|
noreply
|
46 |
) -> None: |
|
0ad36b7…
|
noreply
|
47 |
"""Add a relationship between two entities (both must already exist).""" |
|
0ad36b7…
|
noreply
|
48 |
... |
|
0ad36b7…
|
noreply
|
49 |
|
|
0ad36b7…
|
noreply
|
50 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
51 |
def get_entity(self, name: str) -> Optional[Dict[str, Any]]: |
|
0ad36b7…
|
noreply
|
52 |
"""Get an entity by case-insensitive name, or None.""" |
|
0ad36b7…
|
noreply
|
53 |
... |
|
0ad36b7…
|
noreply
|
54 |
|
|
0ad36b7…
|
noreply
|
55 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
56 |
def get_all_entities(self) -> List[Dict[str, Any]]: |
|
0ad36b7…
|
noreply
|
57 |
"""Return all entities as dicts.""" |
|
0ad36b7…
|
noreply
|
58 |
... |
|
0ad36b7…
|
noreply
|
59 |
|
|
0ad36b7…
|
noreply
|
60 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
61 |
def get_all_relationships(self) -> List[Dict[str, Any]]: |
|
0ad36b7…
|
noreply
|
62 |
"""Return all relationships as dicts.""" |
|
0ad36b7…
|
noreply
|
63 |
... |
|
0ad36b7…
|
noreply
|
64 |
|
|
0ad36b7…
|
noreply
|
65 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
66 |
def get_entity_count(self) -> int: ... |
|
0ad36b7…
|
noreply
|
67 |
|
|
0ad36b7…
|
noreply
|
68 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
69 |
def get_relationship_count(self) -> int: ... |
|
0ad36b7…
|
noreply
|
70 |
|
|
0ad36b7…
|
noreply
|
71 |
@abstractmethod |
|
0ad36b7…
|
noreply
|
72 |
def has_entity(self, name: str) -> bool: |
|
0ad36b7…
|
noreply
|
73 |
"""Check if an entity exists (case-insensitive).""" |
|
0ad36b7…
|
noreply
|
74 |
... |
|
0ad36b7…
|
noreply
|
75 |
|
|
f4e202a…
|
noreply
|
76 |
@abstractmethod |
|
f4e202a…
|
noreply
|
77 |
def add_typed_relationship( |
|
f4e202a…
|
noreply
|
78 |
self, |
|
f4e202a…
|
noreply
|
79 |
source: str, |
|
f4e202a…
|
noreply
|
80 |
target: str, |
|
f4e202a…
|
noreply
|
81 |
edge_label: str, |
|
f4e202a…
|
noreply
|
82 |
properties: Optional[Dict[str, Any]] = None, |
|
f4e202a…
|
noreply
|
83 |
) -> None: |
|
f4e202a…
|
noreply
|
84 |
"""Add a relationship with a custom edge label (e.g. DEPENDS_ON, USES_SYSTEM). |
|
f4e202a…
|
noreply
|
85 |
|
|
f4e202a…
|
noreply
|
86 |
Unlike add_relationship which always uses RELATED_TO, this creates edges |
|
f4e202a…
|
noreply
|
87 |
with the specified label for richer graph semantics. |
|
f4e202a…
|
noreply
|
88 |
""" |
|
f4e202a…
|
noreply
|
89 |
... |
|
f4e202a…
|
noreply
|
90 |
|
|
f4e202a…
|
noreply
|
91 |
@abstractmethod |
|
f4e202a…
|
noreply
|
92 |
def set_entity_properties( |
|
f4e202a…
|
noreply
|
93 |
self, |
|
f4e202a…
|
noreply
|
94 |
name: str, |
|
f4e202a…
|
noreply
|
95 |
properties: Dict[str, Any], |
|
f4e202a…
|
noreply
|
96 |
) -> bool: |
|
f4e202a…
|
noreply
|
97 |
"""Set arbitrary key/value properties on an existing entity. |
|
f4e202a…
|
noreply
|
98 |
|
|
f4e202a…
|
noreply
|
99 |
Returns True if the entity was found and updated, False otherwise. |
|
f4e202a…
|
noreply
|
100 |
""" |
|
f4e202a…
|
noreply
|
101 |
... |
|
f4e202a…
|
noreply
|
102 |
|
|
f4e202a…
|
noreply
|
103 |
@abstractmethod |
|
f4e202a…
|
noreply
|
104 |
def has_relationship( |
|
f4e202a…
|
noreply
|
105 |
self, |
|
f4e202a…
|
noreply
|
106 |
source: str, |
|
f4e202a…
|
noreply
|
107 |
target: str, |
|
f4e202a…
|
noreply
|
108 |
edge_label: Optional[str] = None, |
|
f4e202a…
|
noreply
|
109 |
) -> bool: |
|
f4e202a…
|
noreply
|
110 |
"""Check if a relationship exists between two entities. |
|
f4e202a…
|
noreply
|
111 |
|
|
f4e202a…
|
noreply
|
112 |
If edge_label is None, checks for any relationship type. |
|
f4e202a…
|
noreply
|
113 |
""" |
|
f4e202a…
|
noreply
|
114 |
... |
|
f4e202a…
|
noreply
|
115 |
|
|
0981a08…
|
noreply
|
116 |
def register_source(self, source: Dict[str, Any]) -> None: |
|
0981a08…
|
noreply
|
117 |
"""Register a content source. Default no-op for backends that don't support it.""" |
|
0981a08…
|
noreply
|
118 |
pass |
|
0981a08…
|
noreply
|
119 |
|
|
0981a08…
|
noreply
|
120 |
def get_sources(self) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
121 |
"""Return all registered sources.""" |
|
0981a08…
|
noreply
|
122 |
return [] |
|
0981a08…
|
noreply
|
123 |
|
|
0981a08…
|
noreply
|
124 |
def get_source(self, source_id: str) -> Optional[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
125 |
"""Get a source by ID.""" |
|
0981a08…
|
noreply
|
126 |
return None |
|
0981a08…
|
noreply
|
127 |
|
|
0981a08…
|
noreply
|
128 |
def add_source_location( |
|
0981a08…
|
noreply
|
129 |
self, |
|
0981a08…
|
noreply
|
130 |
source_id: str, |
|
0981a08…
|
noreply
|
131 |
entity_name_lower: Optional[str] = None, |
|
0981a08…
|
noreply
|
132 |
relationship_id: Optional[int] = None, |
|
0981a08…
|
noreply
|
133 |
**kwargs, |
|
0981a08…
|
noreply
|
134 |
) -> None: |
|
0981a08…
|
noreply
|
135 |
"""Link a source to an entity or relationship with location details.""" |
|
0981a08…
|
noreply
|
136 |
pass |
|
0981a08…
|
noreply
|
137 |
|
|
0981a08…
|
noreply
|
138 |
def get_entity_provenance(self, name: str) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
139 |
"""Get all source locations for an entity.""" |
|
0981a08…
|
noreply
|
140 |
return [] |
|
0981a08…
|
noreply
|
141 |
|
|
b363c5b…
|
noreply
|
142 |
def raw_query(self, query_string: str) -> Any: |
|
0981a08…
|
noreply
|
143 |
"""Execute a raw query against the backend (e.g. SQL for SQLite). |
|
b363c5b…
|
noreply
|
144 |
|
|
b363c5b…
|
noreply
|
145 |
Not supported by all backends — raises NotImplementedError by default. |
|
b363c5b…
|
noreply
|
146 |
""" |
|
b363c5b…
|
noreply
|
147 |
raise NotImplementedError(f"{type(self).__name__} does not support raw queries") |
|
b363c5b…
|
noreply
|
148 |
|
|
0ad36b7…
|
noreply
|
149 |
def to_dict(self) -> Dict[str, Any]: |
|
0ad36b7…
|
noreply
|
150 |
"""Export to JSON-compatible dict matching knowledge_graph.json format.""" |
|
0ad36b7…
|
noreply
|
151 |
entities = self.get_all_entities() |
|
0ad36b7…
|
noreply
|
152 |
nodes = [] |
|
0ad36b7…
|
noreply
|
153 |
for e in entities: |
|
0ad36b7…
|
noreply
|
154 |
descs = e.get("descriptions", []) |
|
0ad36b7…
|
noreply
|
155 |
if isinstance(descs, set): |
|
0ad36b7…
|
noreply
|
156 |
descs = list(descs) |
|
0ad36b7…
|
noreply
|
157 |
nodes.append( |
|
0ad36b7…
|
noreply
|
158 |
{ |
|
0ad36b7…
|
noreply
|
159 |
"id": e.get("id", e["name"]), |
|
0ad36b7…
|
noreply
|
160 |
"name": e["name"], |
|
0ad36b7…
|
noreply
|
161 |
"type": e.get("type", "concept"), |
|
0ad36b7…
|
noreply
|
162 |
"descriptions": descs, |
|
0ad36b7…
|
noreply
|
163 |
"occurrences": e.get("occurrences", []), |
|
0ad36b7…
|
noreply
|
164 |
} |
|
0ad36b7…
|
noreply
|
165 |
) |
|
0981a08…
|
noreply
|
166 |
result = {"nodes": nodes, "relationships": self.get_all_relationships()} |
|
0981a08…
|
noreply
|
167 |
sources = self.get_sources() |
|
0981a08…
|
noreply
|
168 |
if sources: |
|
0981a08…
|
noreply
|
169 |
result["sources"] = sources |
|
0981a08…
|
noreply
|
170 |
return result |
|
0ad36b7…
|
noreply
|
171 |
|
|
0ad36b7…
|
noreply
|
172 |
|
|
0ad36b7…
|
noreply
|
173 |
class InMemoryStore(GraphStore): |
|
0ad36b7…
|
noreply
|
174 |
"""In-memory graph store using Python dicts. Default fallback.""" |
|
0ad36b7…
|
noreply
|
175 |
|
|
0ad36b7…
|
noreply
|
176 |
def __init__(self) -> None: |
|
0ad36b7…
|
noreply
|
177 |
self._nodes: Dict[str, Dict[str, Any]] = {} # keyed by name.lower() |
|
0ad36b7…
|
noreply
|
178 |
self._relationships: List[Dict[str, Any]] = [] |
|
0981a08…
|
noreply
|
179 |
self._sources: Dict[str, Dict[str, Any]] = {} # keyed by source_id |
|
0981a08…
|
noreply
|
180 |
self._source_locations: List[Dict[str, Any]] = [] |
|
0ad36b7…
|
noreply
|
181 |
|
|
0ad36b7…
|
noreply
|
182 |
def merge_entity( |
|
0ad36b7…
|
noreply
|
183 |
self, |
|
0ad36b7…
|
noreply
|
184 |
name: str, |
|
0ad36b7…
|
noreply
|
185 |
entity_type: str, |
|
0ad36b7…
|
noreply
|
186 |
descriptions: List[str], |
|
0ad36b7…
|
noreply
|
187 |
source: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
188 |
) -> None: |
|
0ad36b7…
|
noreply
|
189 |
key = name.lower() |
|
0ad36b7…
|
noreply
|
190 |
if key in self._nodes: |
|
0ad36b7…
|
noreply
|
191 |
if descriptions: |
|
0ad36b7…
|
noreply
|
192 |
self._nodes[key]["descriptions"].update(descriptions) |
|
0981a08…
|
noreply
|
193 |
if entity_type and entity_type != self._nodes[key]["type"]: |
|
0981a08…
|
noreply
|
194 |
self._nodes[key]["type"] = entity_type |
|
0ad36b7…
|
noreply
|
195 |
else: |
|
0ad36b7…
|
noreply
|
196 |
self._nodes[key] = { |
|
0ad36b7…
|
noreply
|
197 |
"id": name, |
|
0ad36b7…
|
noreply
|
198 |
"name": name, |
|
0ad36b7…
|
noreply
|
199 |
"type": entity_type, |
|
0ad36b7…
|
noreply
|
200 |
"descriptions": set(descriptions), |
|
0ad36b7…
|
noreply
|
201 |
"occurrences": [], |
|
0ad36b7…
|
noreply
|
202 |
"source": source, |
|
0ad36b7…
|
noreply
|
203 |
} |
|
0ad36b7…
|
noreply
|
204 |
|
|
0ad36b7…
|
noreply
|
205 |
def add_occurrence( |
|
0ad36b7…
|
noreply
|
206 |
self, |
|
0ad36b7…
|
noreply
|
207 |
entity_name: str, |
|
0ad36b7…
|
noreply
|
208 |
source: str, |
|
0ad36b7…
|
noreply
|
209 |
timestamp: Optional[float] = None, |
|
0ad36b7…
|
noreply
|
210 |
text: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
211 |
) -> None: |
|
0ad36b7…
|
noreply
|
212 |
key = entity_name.lower() |
|
0ad36b7…
|
noreply
|
213 |
if key in self._nodes: |
|
0ad36b7…
|
noreply
|
214 |
self._nodes[key]["occurrences"].append( |
|
0ad36b7…
|
noreply
|
215 |
{"source": source, "timestamp": timestamp, "text": text} |
|
0ad36b7…
|
noreply
|
216 |
) |
|
0ad36b7…
|
noreply
|
217 |
|
|
0ad36b7…
|
noreply
|
218 |
def add_relationship( |
|
0ad36b7…
|
noreply
|
219 |
self, |
|
0ad36b7…
|
noreply
|
220 |
source: str, |
|
0ad36b7…
|
noreply
|
221 |
target: str, |
|
0ad36b7…
|
noreply
|
222 |
rel_type: str, |
|
0ad36b7…
|
noreply
|
223 |
content_source: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
224 |
timestamp: Optional[float] = None, |
|
0ad36b7…
|
noreply
|
225 |
) -> None: |
|
0ad36b7…
|
noreply
|
226 |
self._relationships.append( |
|
0ad36b7…
|
noreply
|
227 |
{ |
|
0ad36b7…
|
noreply
|
228 |
"source": source, |
|
0ad36b7…
|
noreply
|
229 |
"target": target, |
|
0ad36b7…
|
noreply
|
230 |
"type": rel_type, |
|
0ad36b7…
|
noreply
|
231 |
"content_source": content_source, |
|
0ad36b7…
|
noreply
|
232 |
"timestamp": timestamp, |
|
0ad36b7…
|
noreply
|
233 |
} |
|
0ad36b7…
|
noreply
|
234 |
) |
|
0ad36b7…
|
noreply
|
235 |
|
|
0ad36b7…
|
noreply
|
236 |
def get_entity(self, name: str) -> Optional[Dict[str, Any]]: |
|
0ad36b7…
|
noreply
|
237 |
return self._nodes.get(name.lower()) |
|
0ad36b7…
|
noreply
|
238 |
|
|
0ad36b7…
|
noreply
|
239 |
def get_all_entities(self) -> List[Dict[str, Any]]: |
|
0ad36b7…
|
noreply
|
240 |
return list(self._nodes.values()) |
|
0ad36b7…
|
noreply
|
241 |
|
|
0ad36b7…
|
noreply
|
242 |
def get_all_relationships(self) -> List[Dict[str, Any]]: |
|
0ad36b7…
|
noreply
|
243 |
return list(self._relationships) |
|
0ad36b7…
|
noreply
|
244 |
|
|
0ad36b7…
|
noreply
|
245 |
def get_entity_count(self) -> int: |
|
0ad36b7…
|
noreply
|
246 |
return len(self._nodes) |
|
0ad36b7…
|
noreply
|
247 |
|
|
0ad36b7…
|
noreply
|
248 |
def get_relationship_count(self) -> int: |
|
0ad36b7…
|
noreply
|
249 |
return len(self._relationships) |
|
0ad36b7…
|
noreply
|
250 |
|
|
0ad36b7…
|
noreply
|
251 |
def has_entity(self, name: str) -> bool: |
|
0ad36b7…
|
noreply
|
252 |
return name.lower() in self._nodes |
|
0ad36b7…
|
noreply
|
253 |
|
|
f4e202a…
|
noreply
|
254 |
def add_typed_relationship( |
|
f4e202a…
|
noreply
|
255 |
self, |
|
f4e202a…
|
noreply
|
256 |
source: str, |
|
f4e202a…
|
noreply
|
257 |
target: str, |
|
f4e202a…
|
noreply
|
258 |
edge_label: str, |
|
f4e202a…
|
noreply
|
259 |
properties: Optional[Dict[str, Any]] = None, |
|
f4e202a…
|
noreply
|
260 |
) -> None: |
|
f4e202a…
|
noreply
|
261 |
entry: Dict[str, Any] = { |
|
f4e202a…
|
noreply
|
262 |
"source": source, |
|
f4e202a…
|
noreply
|
263 |
"target": target, |
|
f4e202a…
|
noreply
|
264 |
"type": edge_label, |
|
f4e202a…
|
noreply
|
265 |
} |
|
f4e202a…
|
noreply
|
266 |
if properties: |
|
f4e202a…
|
noreply
|
267 |
entry.update(properties) |
|
f4e202a…
|
noreply
|
268 |
self._relationships.append(entry) |
|
f4e202a…
|
noreply
|
269 |
|
|
f4e202a…
|
noreply
|
270 |
def set_entity_properties( |
|
f4e202a…
|
noreply
|
271 |
self, |
|
f4e202a…
|
noreply
|
272 |
name: str, |
|
f4e202a…
|
noreply
|
273 |
properties: Dict[str, Any], |
|
f4e202a…
|
noreply
|
274 |
) -> bool: |
|
f4e202a…
|
noreply
|
275 |
key = name.lower() |
|
f4e202a…
|
noreply
|
276 |
if key not in self._nodes: |
|
f4e202a…
|
noreply
|
277 |
return False |
|
f4e202a…
|
noreply
|
278 |
self._nodes[key].update(properties) |
|
f4e202a…
|
noreply
|
279 |
return True |
|
f4e202a…
|
noreply
|
280 |
|
|
0981a08…
|
noreply
|
281 |
def register_source(self, source: Dict[str, Any]) -> None: |
|
0981a08…
|
noreply
|
282 |
source_id = source.get("source_id", "") |
|
0981a08…
|
noreply
|
283 |
self._sources[source_id] = dict(source) |
|
0981a08…
|
noreply
|
284 |
|
|
0981a08…
|
noreply
|
285 |
def get_sources(self) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
286 |
return list(self._sources.values()) |
|
0981a08…
|
noreply
|
287 |
|
|
0981a08…
|
noreply
|
288 |
def get_source(self, source_id: str) -> Optional[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
289 |
return self._sources.get(source_id) |
|
0981a08…
|
noreply
|
290 |
|
|
0981a08…
|
noreply
|
291 |
def add_source_location( |
|
0981a08…
|
noreply
|
292 |
self, |
|
0981a08…
|
noreply
|
293 |
source_id: str, |
|
0981a08…
|
noreply
|
294 |
entity_name_lower: Optional[str] = None, |
|
0981a08…
|
noreply
|
295 |
relationship_id: Optional[int] = None, |
|
0981a08…
|
noreply
|
296 |
**kwargs, |
|
0981a08…
|
noreply
|
297 |
) -> None: |
|
0981a08…
|
noreply
|
298 |
entry: Dict[str, Any] = { |
|
0981a08…
|
noreply
|
299 |
"source_id": source_id, |
|
0981a08…
|
noreply
|
300 |
"entity_name_lower": entity_name_lower, |
|
0981a08…
|
noreply
|
301 |
"relationship_id": relationship_id, |
|
0981a08…
|
noreply
|
302 |
} |
|
0981a08…
|
noreply
|
303 |
entry.update(kwargs) |
|
0981a08…
|
noreply
|
304 |
self._source_locations.append(entry) |
|
0981a08…
|
noreply
|
305 |
|
|
0981a08…
|
noreply
|
306 |
def get_entity_provenance(self, name: str) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
307 |
name_lower = name.lower() |
|
0981a08…
|
noreply
|
308 |
results = [] |
|
0981a08…
|
noreply
|
309 |
for loc in self._source_locations: |
|
0981a08…
|
noreply
|
310 |
if loc.get("entity_name_lower") == name_lower: |
|
0981a08…
|
noreply
|
311 |
entry = dict(loc) |
|
0981a08…
|
noreply
|
312 |
src = self._sources.get(loc.get("source_id", "")) |
|
0981a08…
|
noreply
|
313 |
if src: |
|
0981a08…
|
noreply
|
314 |
entry["source"] = src |
|
0981a08…
|
noreply
|
315 |
results.append(entry) |
|
0981a08…
|
noreply
|
316 |
return results |
|
0981a08…
|
noreply
|
317 |
|
|
f4e202a…
|
noreply
|
318 |
def has_relationship( |
|
f4e202a…
|
noreply
|
319 |
self, |
|
f4e202a…
|
noreply
|
320 |
source: str, |
|
f4e202a…
|
noreply
|
321 |
target: str, |
|
f4e202a…
|
noreply
|
322 |
edge_label: Optional[str] = None, |
|
f4e202a…
|
noreply
|
323 |
) -> bool: |
|
f4e202a…
|
noreply
|
324 |
src_lower = source.lower() |
|
f4e202a…
|
noreply
|
325 |
tgt_lower = target.lower() |
|
f4e202a…
|
noreply
|
326 |
for rel in self._relationships: |
|
f4e202a…
|
noreply
|
327 |
if rel["source"].lower() == src_lower and rel["target"].lower() == tgt_lower: |
|
f4e202a…
|
noreply
|
328 |
if edge_label is None or rel.get("type") == edge_label: |
|
f4e202a…
|
noreply
|
329 |
return True |
|
f4e202a…
|
noreply
|
330 |
return False |
|
f4e202a…
|
noreply
|
331 |
|
|
f4e202a…
|
noreply
|
332 |
|
|
0981a08…
|
noreply
|
333 |
class SQLiteStore(GraphStore): |
|
0981a08…
|
noreply
|
334 |
"""SQLite-backed graph store. Uses Python's built-in sqlite3 module.""" |
|
0981a08…
|
noreply
|
335 |
|
|
0981a08…
|
noreply
|
336 |
_SCHEMA = """ |
|
0981a08…
|
noreply
|
337 |
CREATE TABLE IF NOT EXISTS entities ( |
|
0981a08…
|
noreply
|
338 |
name TEXT NOT NULL, |
|
0981a08…
|
noreply
|
339 |
name_lower TEXT NOT NULL UNIQUE, |
|
0981a08…
|
noreply
|
340 |
type TEXT NOT NULL DEFAULT 'concept', |
|
0981a08…
|
noreply
|
341 |
descriptions TEXT NOT NULL DEFAULT '[]', |
|
0981a08…
|
noreply
|
342 |
source TEXT, |
|
0981a08…
|
noreply
|
343 |
properties TEXT NOT NULL DEFAULT '{}' |
|
0981a08…
|
noreply
|
344 |
); |
|
0981a08…
|
noreply
|
345 |
CREATE TABLE IF NOT EXISTS occurrences ( |
|
0981a08…
|
noreply
|
346 |
entity_name_lower TEXT NOT NULL, |
|
0981a08…
|
noreply
|
347 |
source TEXT NOT NULL, |
|
0981a08…
|
noreply
|
348 |
timestamp REAL, |
|
0981a08…
|
noreply
|
349 |
text TEXT, |
|
0981a08…
|
noreply
|
350 |
FOREIGN KEY (entity_name_lower) REFERENCES entities(name_lower) |
|
0981a08…
|
noreply
|
351 |
); |
|
0981a08…
|
noreply
|
352 |
CREATE TABLE IF NOT EXISTS relationships ( |
|
0981a08…
|
noreply
|
353 |
source TEXT NOT NULL, |
|
0981a08…
|
noreply
|
354 |
target TEXT NOT NULL, |
|
0981a08…
|
noreply
|
355 |
type TEXT NOT NULL DEFAULT 'related_to', |
|
0981a08…
|
noreply
|
356 |
content_source TEXT, |
|
0981a08…
|
noreply
|
357 |
timestamp REAL, |
|
0981a08…
|
noreply
|
358 |
properties TEXT NOT NULL DEFAULT '{}' |
|
0981a08…
|
noreply
|
359 |
); |
|
0981a08…
|
noreply
|
360 |
CREATE INDEX IF NOT EXISTS idx_entities_name_lower ON entities(name_lower); |
|
0981a08…
|
noreply
|
361 |
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(type); |
|
0981a08…
|
noreply
|
362 |
CREATE INDEX IF NOT EXISTS idx_occurrences_entity ON occurrences(entity_name_lower); |
|
0981a08…
|
noreply
|
363 |
CREATE INDEX IF NOT EXISTS idx_relationships_source ON relationships(source); |
|
0981a08…
|
noreply
|
364 |
CREATE INDEX IF NOT EXISTS idx_relationships_target ON relationships(target); |
|
0981a08…
|
noreply
|
365 |
|
|
0981a08…
|
noreply
|
366 |
CREATE TABLE IF NOT EXISTS sources ( |
|
0981a08…
|
noreply
|
367 |
source_id TEXT PRIMARY KEY, |
|
0981a08…
|
noreply
|
368 |
source_type TEXT NOT NULL, |
|
0981a08…
|
noreply
|
369 |
title TEXT NOT NULL, |
|
0981a08…
|
noreply
|
370 |
path TEXT, |
|
0981a08…
|
noreply
|
371 |
url TEXT, |
|
0981a08…
|
noreply
|
372 |
mime_type TEXT, |
|
0981a08…
|
noreply
|
373 |
ingested_at TEXT NOT NULL, |
|
0981a08…
|
noreply
|
374 |
metadata TEXT NOT NULL DEFAULT '{}' |
|
0981a08…
|
noreply
|
375 |
); |
|
0981a08…
|
noreply
|
376 |
CREATE TABLE IF NOT EXISTS source_locations ( |
|
0981a08…
|
noreply
|
377 |
id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
0981a08…
|
noreply
|
378 |
source_id TEXT NOT NULL REFERENCES sources(source_id), |
|
0981a08…
|
noreply
|
379 |
entity_name_lower TEXT, |
|
0981a08…
|
noreply
|
380 |
relationship_id INTEGER, |
|
0981a08…
|
noreply
|
381 |
timestamp REAL, |
|
0981a08…
|
noreply
|
382 |
page INTEGER, |
|
0981a08…
|
noreply
|
383 |
section TEXT, |
|
0981a08…
|
noreply
|
384 |
line_start INTEGER, |
|
0981a08…
|
noreply
|
385 |
line_end INTEGER, |
|
0981a08…
|
noreply
|
386 |
text_snippet TEXT |
|
0981a08…
|
noreply
|
387 |
); |
|
0981a08…
|
noreply
|
388 |
CREATE INDEX IF NOT EXISTS idx_source_locations_source ON source_locations(source_id); |
|
0981a08…
|
noreply
|
389 |
CREATE INDEX IF NOT EXISTS idx_source_locations_entity |
|
0981a08…
|
noreply
|
390 |
ON source_locations(entity_name_lower); |
|
0981a08…
|
noreply
|
391 |
""" |
|
0ad36b7…
|
noreply
|
392 |
|
|
0ad36b7…
|
noreply
|
393 |
def __init__(self, db_path: Union[str, Path]) -> None: |
|
0ad36b7…
|
noreply
|
394 |
self._db_path = str(db_path) |
|
0981a08…
|
noreply
|
395 |
self._conn = sqlite3.connect(self._db_path) |
|
0981a08…
|
noreply
|
396 |
self._conn.execute("PRAGMA journal_mode=WAL") |
|
0981a08…
|
noreply
|
397 |
self._conn.execute("PRAGMA foreign_keys=ON") |
|
0981a08…
|
noreply
|
398 |
self._conn.executescript(self._SCHEMA) |
|
0981a08…
|
noreply
|
399 |
self._conn.commit() |
|
0ad36b7…
|
noreply
|
400 |
|
|
0ad36b7…
|
noreply
|
401 |
def merge_entity( |
|
0ad36b7…
|
noreply
|
402 |
self, |
|
0ad36b7…
|
noreply
|
403 |
name: str, |
|
0ad36b7…
|
noreply
|
404 |
entity_type: str, |
|
0ad36b7…
|
noreply
|
405 |
descriptions: List[str], |
|
0ad36b7…
|
noreply
|
406 |
source: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
407 |
) -> None: |
|
0ad36b7…
|
noreply
|
408 |
name_lower = name.lower() |
|
0981a08…
|
noreply
|
409 |
row = self._conn.execute( |
|
0981a08…
|
noreply
|
410 |
"SELECT descriptions FROM entities WHERE name_lower = ?", |
|
0981a08…
|
noreply
|
411 |
(name_lower,), |
|
0981a08…
|
noreply
|
412 |
).fetchone() |
|
0981a08…
|
noreply
|
413 |
|
|
0981a08…
|
noreply
|
414 |
if row: |
|
0981a08…
|
noreply
|
415 |
existing = json.loads(row[0]) |
|
0981a08…
|
noreply
|
416 |
merged = list(set(existing + descriptions)) |
|
0981a08…
|
noreply
|
417 |
self._conn.execute( |
|
0981a08…
|
noreply
|
418 |
"UPDATE entities SET descriptions = ?, type = ? WHERE name_lower = ?", |
|
0981a08…
|
noreply
|
419 |
(json.dumps(merged), entity_type, name_lower), |
|
0ad36b7…
|
noreply
|
420 |
) |
|
0ad36b7…
|
noreply
|
421 |
else: |
|
0981a08…
|
noreply
|
422 |
self._conn.execute( |
|
0981a08…
|
noreply
|
423 |
"INSERT INTO entities (name, name_lower, type, descriptions, source) " |
|
0981a08…
|
noreply
|
424 |
"VALUES (?, ?, ?, ?, ?)", |
|
0981a08…
|
noreply
|
425 |
(name, name_lower, entity_type, json.dumps(descriptions), source), |
|
0981a08…
|
noreply
|
426 |
) |
|
0981a08…
|
noreply
|
427 |
self._conn.commit() |
|
0ad36b7…
|
noreply
|
428 |
|
|
0ad36b7…
|
noreply
|
429 |
def add_occurrence( |
|
0ad36b7…
|
noreply
|
430 |
self, |
|
0ad36b7…
|
noreply
|
431 |
entity_name: str, |
|
0ad36b7…
|
noreply
|
432 |
source: str, |
|
0ad36b7…
|
noreply
|
433 |
timestamp: Optional[float] = None, |
|
0ad36b7…
|
noreply
|
434 |
text: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
435 |
) -> None: |
|
0ad36b7…
|
noreply
|
436 |
name_lower = entity_name.lower() |
|
0981a08…
|
noreply
|
437 |
exists = self._conn.execute( |
|
0981a08…
|
noreply
|
438 |
"SELECT 1 FROM entities WHERE name_lower = ?", (name_lower,) |
|
0981a08…
|
noreply
|
439 |
).fetchone() |
|
0981a08…
|
noreply
|
440 |
if not exists: |
|
0981a08…
|
noreply
|
441 |
return |
|
0981a08…
|
noreply
|
442 |
self._conn.execute( |
|
0981a08…
|
noreply
|
443 |
"INSERT INTO occurrences (entity_name_lower, source, timestamp, text) " |
|
0981a08…
|
noreply
|
444 |
"VALUES (?, ?, ?, ?)", |
|
0981a08…
|
noreply
|
445 |
(name_lower, source, timestamp, text), |
|
0981a08…
|
noreply
|
446 |
) |
|
0981a08…
|
noreply
|
447 |
self._conn.commit() |
|
0ad36b7…
|
noreply
|
448 |
|
|
0ad36b7…
|
noreply
|
449 |
def add_relationship( |
|
0ad36b7…
|
noreply
|
450 |
self, |
|
0ad36b7…
|
noreply
|
451 |
source: str, |
|
0ad36b7…
|
noreply
|
452 |
target: str, |
|
0ad36b7…
|
noreply
|
453 |
rel_type: str, |
|
0ad36b7…
|
noreply
|
454 |
content_source: Optional[str] = None, |
|
0ad36b7…
|
noreply
|
455 |
timestamp: Optional[float] = None, |
|
0ad36b7…
|
noreply
|
456 |
) -> None: |
|
0981a08…
|
noreply
|
457 |
self._conn.execute( |
|
0981a08…
|
noreply
|
458 |
"INSERT INTO relationships (source, target, type, content_source, timestamp) " |
|
0981a08…
|
noreply
|
459 |
"VALUES (?, ?, ?, ?, ?)", |
|
0981a08…
|
noreply
|
460 |
(source, target, rel_type, content_source, timestamp), |
|
0981a08…
|
noreply
|
461 |
) |
|
0981a08…
|
noreply
|
462 |
self._conn.commit() |
|
0ad36b7…
|
noreply
|
463 |
|
|
0ad36b7…
|
noreply
|
464 |
def get_entity(self, name: str) -> Optional[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
465 |
row = self._conn.execute( |
|
0981a08…
|
noreply
|
466 |
"SELECT name, type, descriptions, source FROM entities WHERE name_lower = ?", |
|
0981a08…
|
noreply
|
467 |
(name.lower(),), |
|
0981a08…
|
noreply
|
468 |
).fetchone() |
|
0981a08…
|
noreply
|
469 |
if not row: |
|
0ad36b7…
|
noreply
|
470 |
return None |
|
0ad36b7…
|
noreply
|
471 |
|
|
0ad36b7…
|
noreply
|
472 |
entity_name = row[0] |
|
0981a08…
|
noreply
|
473 |
occ_rows = self._conn.execute( |
|
0981a08…
|
noreply
|
474 |
"SELECT source, timestamp, text FROM occurrences WHERE entity_name_lower = ?", |
|
0981a08…
|
noreply
|
475 |
(name.lower(),), |
|
0981a08…
|
noreply
|
476 |
).fetchall() |
|
0981a08…
|
noreply
|
477 |
occurrences = [{"source": o[0], "timestamp": o[1], "text": o[2]} for o in occ_rows] |
|
0ad36b7…
|
noreply
|
478 |
|
|
0ad36b7…
|
noreply
|
479 |
return { |
|
0ad36b7…
|
noreply
|
480 |
"id": entity_name, |
|
0ad36b7…
|
noreply
|
481 |
"name": entity_name, |
|
0ad36b7…
|
noreply
|
482 |
"type": row[1] or "concept", |
|
0981a08…
|
noreply
|
483 |
"descriptions": json.loads(row[2]) if row[2] else [], |
|
0ad36b7…
|
noreply
|
484 |
"occurrences": occurrences, |
|
0ad36b7…
|
noreply
|
485 |
"source": row[3], |
|
0ad36b7…
|
noreply
|
486 |
} |
|
0ad36b7…
|
noreply
|
487 |
|
|
0ad36b7…
|
noreply
|
488 |
def get_all_entities(self) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
489 |
rows = self._conn.execute( |
|
0981a08…
|
noreply
|
490 |
"SELECT name, name_lower, type, descriptions, source FROM entities" |
|
0981a08…
|
noreply
|
491 |
).fetchall() |
|
0ad36b7…
|
noreply
|
492 |
entities = [] |
|
0981a08…
|
noreply
|
493 |
for row in rows: |
|
0ad36b7…
|
noreply
|
494 |
name_lower = row[1] |
|
0981a08…
|
noreply
|
495 |
occ_rows = self._conn.execute( |
|
0981a08…
|
noreply
|
496 |
"SELECT source, timestamp, text FROM occurrences WHERE entity_name_lower = ?", |
|
0981a08…
|
noreply
|
497 |
(name_lower,), |
|
0981a08…
|
noreply
|
498 |
).fetchall() |
|
0981a08…
|
noreply
|
499 |
occurrences = [{"source": o[0], "timestamp": o[1], "text": o[2]} for o in occ_rows] |
|
0ad36b7…
|
noreply
|
500 |
entities.append( |
|
0ad36b7…
|
noreply
|
501 |
{ |
|
0ad36b7…
|
noreply
|
502 |
"id": row[0], |
|
0ad36b7…
|
noreply
|
503 |
"name": row[0], |
|
0ad36b7…
|
noreply
|
504 |
"type": row[2] or "concept", |
|
0981a08…
|
noreply
|
505 |
"descriptions": json.loads(row[3]) if row[3] else [], |
|
0ad36b7…
|
noreply
|
506 |
"occurrences": occurrences, |
|
0ad36b7…
|
noreply
|
507 |
"source": row[4], |
|
0ad36b7…
|
noreply
|
508 |
} |
|
0ad36b7…
|
noreply
|
509 |
) |
|
0ad36b7…
|
noreply
|
510 |
return entities |
|
0ad36b7…
|
noreply
|
511 |
|
|
0ad36b7…
|
noreply
|
512 |
def get_all_relationships(self) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
513 |
rows = self._conn.execute( |
|
0981a08…
|
noreply
|
514 |
"SELECT source, target, type, content_source, timestamp FROM relationships" |
|
0981a08…
|
noreply
|
515 |
).fetchall() |
|
0ad36b7…
|
noreply
|
516 |
return [ |
|
0ad36b7…
|
noreply
|
517 |
{ |
|
0ad36b7…
|
noreply
|
518 |
"source": row[0], |
|
0ad36b7…
|
noreply
|
519 |
"target": row[1], |
|
0ad36b7…
|
noreply
|
520 |
"type": row[2] or "related_to", |
|
0ad36b7…
|
noreply
|
521 |
"content_source": row[3], |
|
0ad36b7…
|
noreply
|
522 |
"timestamp": row[4], |
|
0ad36b7…
|
noreply
|
523 |
} |
|
0981a08…
|
noreply
|
524 |
for row in rows |
|
0ad36b7…
|
noreply
|
525 |
] |
|
0ad36b7…
|
noreply
|
526 |
|
|
0ad36b7…
|
noreply
|
527 |
def get_entity_count(self) -> int: |
|
0981a08…
|
noreply
|
528 |
row = self._conn.execute("SELECT COUNT(*) FROM entities").fetchone() |
|
0981a08…
|
noreply
|
529 |
return row[0] if row else 0 |
|
0ad36b7…
|
noreply
|
530 |
|
|
0ad36b7…
|
noreply
|
531 |
def get_relationship_count(self) -> int: |
|
0981a08…
|
noreply
|
532 |
row = self._conn.execute("SELECT COUNT(*) FROM relationships").fetchone() |
|
0981a08…
|
noreply
|
533 |
return row[0] if row else 0 |
|
0ad36b7…
|
noreply
|
534 |
|
|
0ad36b7…
|
noreply
|
535 |
def has_entity(self, name: str) -> bool: |
|
0981a08…
|
noreply
|
536 |
row = self._conn.execute( |
|
0981a08…
|
noreply
|
537 |
"SELECT 1 FROM entities WHERE name_lower = ?", (name.lower(),) |
|
0981a08…
|
noreply
|
538 |
).fetchone() |
|
0981a08…
|
noreply
|
539 |
return row is not None |
|
b363c5b…
|
noreply
|
540 |
|
|
b363c5b…
|
noreply
|
541 |
def raw_query(self, query_string: str) -> Any: |
|
0981a08…
|
noreply
|
542 |
"""Execute a raw SQL query and return all rows.""" |
|
0981a08…
|
noreply
|
543 |
cursor = self._conn.execute(query_string) |
|
0981a08…
|
noreply
|
544 |
return cursor.fetchall() |
|
f4e202a…
|
noreply
|
545 |
|
|
f4e202a…
|
noreply
|
546 |
def add_typed_relationship( |
|
f4e202a…
|
noreply
|
547 |
self, |
|
f4e202a…
|
noreply
|
548 |
source: str, |
|
f4e202a…
|
noreply
|
549 |
target: str, |
|
f4e202a…
|
noreply
|
550 |
edge_label: str, |
|
f4e202a…
|
noreply
|
551 |
properties: Optional[Dict[str, Any]] = None, |
|
f4e202a…
|
noreply
|
552 |
) -> None: |
|
0981a08…
|
noreply
|
553 |
self._conn.execute( |
|
0981a08…
|
noreply
|
554 |
"INSERT INTO relationships (source, target, type, properties) VALUES (?, ?, ?, ?)", |
|
0981a08…
|
noreply
|
555 |
(source, target, edge_label, json.dumps(properties or {})), |
|
0981a08…
|
noreply
|
556 |
) |
|
0981a08…
|
noreply
|
557 |
self._conn.commit() |
|
f4e202a…
|
noreply
|
558 |
|
|
f4e202a…
|
noreply
|
559 |
def set_entity_properties( |
|
f4e202a…
|
noreply
|
560 |
self, |
|
f4e202a…
|
noreply
|
561 |
name: str, |
|
f4e202a…
|
noreply
|
562 |
properties: Dict[str, Any], |
|
f4e202a…
|
noreply
|
563 |
) -> bool: |
|
f4e202a…
|
noreply
|
564 |
name_lower = name.lower() |
|
f4e202a…
|
noreply
|
565 |
if not self.has_entity(name): |
|
f4e202a…
|
noreply
|
566 |
return False |
|
0981a08…
|
noreply
|
567 |
if not properties: |
|
f4e202a…
|
noreply
|
568 |
return True |
|
0981a08…
|
noreply
|
569 |
row = self._conn.execute( |
|
0981a08…
|
noreply
|
570 |
"SELECT properties FROM entities WHERE name_lower = ?", (name_lower,) |
|
0981a08…
|
noreply
|
571 |
).fetchone() |
|
0981a08…
|
noreply
|
572 |
existing = json.loads(row[0]) if row and row[0] else {} |
|
0981a08…
|
noreply
|
573 |
existing.update(properties) |
|
0981a08…
|
noreply
|
574 |
self._conn.execute( |
|
0981a08…
|
noreply
|
575 |
"UPDATE entities SET properties = ? WHERE name_lower = ?", |
|
0981a08…
|
noreply
|
576 |
(json.dumps(existing), name_lower), |
|
0981a08…
|
noreply
|
577 |
) |
|
0981a08…
|
noreply
|
578 |
self._conn.commit() |
|
f4e202a…
|
noreply
|
579 |
return True |
|
f4e202a…
|
noreply
|
580 |
|
|
f4e202a…
|
noreply
|
581 |
def has_relationship( |
|
f4e202a…
|
noreply
|
582 |
self, |
|
f4e202a…
|
noreply
|
583 |
source: str, |
|
f4e202a…
|
noreply
|
584 |
target: str, |
|
f4e202a…
|
noreply
|
585 |
edge_label: Optional[str] = None, |
|
f4e202a…
|
noreply
|
586 |
) -> bool: |
|
f4e202a…
|
noreply
|
587 |
if edge_label: |
|
0981a08…
|
noreply
|
588 |
row = self._conn.execute( |
|
0981a08…
|
noreply
|
589 |
"SELECT 1 FROM relationships " |
|
0981a08…
|
noreply
|
590 |
"WHERE LOWER(source) = ? AND LOWER(target) = ? AND type = ?", |
|
0981a08…
|
noreply
|
591 |
(source.lower(), target.lower(), edge_label), |
|
0981a08…
|
noreply
|
592 |
).fetchone() |
|
0981a08…
|
noreply
|
593 |
else: |
|
0981a08…
|
noreply
|
594 |
row = self._conn.execute( |
|
0981a08…
|
noreply
|
595 |
"SELECT 1 FROM relationships WHERE LOWER(source) = ? AND LOWER(target) = ?", |
|
0981a08…
|
noreply
|
596 |
(source.lower(), target.lower()), |
|
0981a08…
|
noreply
|
597 |
).fetchone() |
|
0981a08…
|
noreply
|
598 |
return row is not None |
|
0981a08…
|
noreply
|
599 |
|
|
0981a08…
|
noreply
|
600 |
def register_source(self, source: Dict[str, Any]) -> None: |
|
0981a08…
|
noreply
|
601 |
source_id = source.get("source_id", "") |
|
0981a08…
|
noreply
|
602 |
existing = self._conn.execute( |
|
0981a08…
|
noreply
|
603 |
"SELECT 1 FROM sources WHERE source_id = ?", (source_id,) |
|
0981a08…
|
noreply
|
604 |
).fetchone() |
|
0981a08…
|
noreply
|
605 |
if existing: |
|
0981a08…
|
noreply
|
606 |
self._conn.execute( |
|
0981a08…
|
noreply
|
607 |
"UPDATE sources SET source_type = ?, title = ?, path = ?, url = ?, " |
|
0981a08…
|
noreply
|
608 |
"mime_type = ?, ingested_at = ?, metadata = ? WHERE source_id = ?", |
|
0981a08…
|
noreply
|
609 |
( |
|
0981a08…
|
noreply
|
610 |
source.get("source_type", ""), |
|
0981a08…
|
noreply
|
611 |
source.get("title", ""), |
|
0981a08…
|
noreply
|
612 |
source.get("path"), |
|
0981a08…
|
noreply
|
613 |
source.get("url"), |
|
0981a08…
|
noreply
|
614 |
source.get("mime_type"), |
|
0981a08…
|
noreply
|
615 |
source.get("ingested_at", ""), |
|
0981a08…
|
noreply
|
616 |
json.dumps(source.get("metadata", {})), |
|
0981a08…
|
noreply
|
617 |
source_id, |
|
0981a08…
|
noreply
|
618 |
), |
|
f4e202a…
|
noreply
|
619 |
) |
|
f4e202a…
|
noreply
|
620 |
else: |
|
0981a08…
|
noreply
|
621 |
self._conn.execute( |
|
0981a08…
|
noreply
|
622 |
"INSERT INTO sources (source_id, source_type, title, path, url, " |
|
0981a08…
|
noreply
|
623 |
"mime_type, ingested_at, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", |
|
0981a08…
|
noreply
|
624 |
( |
|
0981a08…
|
noreply
|
625 |
source_id, |
|
0981a08…
|
noreply
|
626 |
source.get("source_type", ""), |
|
0981a08…
|
noreply
|
627 |
source.get("title", ""), |
|
0981a08…
|
noreply
|
628 |
source.get("path"), |
|
0981a08…
|
noreply
|
629 |
source.get("url"), |
|
0981a08…
|
noreply
|
630 |
source.get("mime_type"), |
|
0981a08…
|
noreply
|
631 |
source.get("ingested_at", ""), |
|
0981a08…
|
noreply
|
632 |
json.dumps(source.get("metadata", {})), |
|
0981a08…
|
noreply
|
633 |
), |
|
0981a08…
|
noreply
|
634 |
) |
|
0981a08…
|
noreply
|
635 |
self._conn.commit() |
|
0981a08…
|
noreply
|
636 |
|
|
0981a08…
|
noreply
|
637 |
def get_sources(self) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
638 |
rows = self._conn.execute( |
|
0981a08…
|
noreply
|
639 |
"SELECT source_id, source_type, title, path, url, mime_type, " |
|
0981a08…
|
noreply
|
640 |
"ingested_at, metadata FROM sources" |
|
0981a08…
|
noreply
|
641 |
).fetchall() |
|
0981a08…
|
noreply
|
642 |
return [ |
|
0981a08…
|
noreply
|
643 |
{ |
|
0981a08…
|
noreply
|
644 |
"source_id": r[0], |
|
0981a08…
|
noreply
|
645 |
"source_type": r[1], |
|
0981a08…
|
noreply
|
646 |
"title": r[2], |
|
0981a08…
|
noreply
|
647 |
"path": r[3], |
|
0981a08…
|
noreply
|
648 |
"url": r[4], |
|
0981a08…
|
noreply
|
649 |
"mime_type": r[5], |
|
0981a08…
|
noreply
|
650 |
"ingested_at": r[6], |
|
0981a08…
|
noreply
|
651 |
"metadata": json.loads(r[7]) if r[7] else {}, |
|
0981a08…
|
noreply
|
652 |
} |
|
0981a08…
|
noreply
|
653 |
for r in rows |
|
0981a08…
|
noreply
|
654 |
] |
|
0981a08…
|
noreply
|
655 |
|
|
0981a08…
|
noreply
|
656 |
def get_source(self, source_id: str) -> Optional[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
657 |
row = self._conn.execute( |
|
0981a08…
|
noreply
|
658 |
"SELECT source_id, source_type, title, path, url, mime_type, " |
|
0981a08…
|
noreply
|
659 |
"ingested_at, metadata FROM sources WHERE source_id = ?", |
|
0981a08…
|
noreply
|
660 |
(source_id,), |
|
0981a08…
|
noreply
|
661 |
).fetchone() |
|
0981a08…
|
noreply
|
662 |
if not row: |
|
0981a08…
|
noreply
|
663 |
return None |
|
0981a08…
|
noreply
|
664 |
return { |
|
0981a08…
|
noreply
|
665 |
"source_id": row[0], |
|
0981a08…
|
noreply
|
666 |
"source_type": row[1], |
|
0981a08…
|
noreply
|
667 |
"title": row[2], |
|
0981a08…
|
noreply
|
668 |
"path": row[3], |
|
0981a08…
|
noreply
|
669 |
"url": row[4], |
|
0981a08…
|
noreply
|
670 |
"mime_type": row[5], |
|
0981a08…
|
noreply
|
671 |
"ingested_at": row[6], |
|
0981a08…
|
noreply
|
672 |
"metadata": json.loads(row[7]) if row[7] else {}, |
|
0981a08…
|
noreply
|
673 |
} |
|
0981a08…
|
noreply
|
674 |
|
|
0981a08…
|
noreply
|
675 |
def add_source_location( |
|
0981a08…
|
noreply
|
676 |
self, |
|
0981a08…
|
noreply
|
677 |
source_id: str, |
|
0981a08…
|
noreply
|
678 |
entity_name_lower: Optional[str] = None, |
|
0981a08…
|
noreply
|
679 |
relationship_id: Optional[int] = None, |
|
0981a08…
|
noreply
|
680 |
**kwargs, |
|
0981a08…
|
noreply
|
681 |
) -> None: |
|
0981a08…
|
noreply
|
682 |
self._conn.execute( |
|
0981a08…
|
noreply
|
683 |
"INSERT INTO source_locations (source_id, entity_name_lower, relationship_id, " |
|
0981a08…
|
noreply
|
684 |
"timestamp, page, section, line_start, line_end, text_snippet) " |
|
0981a08…
|
noreply
|
685 |
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", |
|
0981a08…
|
noreply
|
686 |
( |
|
0981a08…
|
noreply
|
687 |
source_id, |
|
0981a08…
|
noreply
|
688 |
entity_name_lower, |
|
0981a08…
|
noreply
|
689 |
relationship_id, |
|
0981a08…
|
noreply
|
690 |
kwargs.get("timestamp"), |
|
0981a08…
|
noreply
|
691 |
kwargs.get("page"), |
|
0981a08…
|
noreply
|
692 |
kwargs.get("section"), |
|
0981a08…
|
noreply
|
693 |
kwargs.get("line_start"), |
|
0981a08…
|
noreply
|
694 |
kwargs.get("line_end"), |
|
0981a08…
|
noreply
|
695 |
kwargs.get("text_snippet"), |
|
0981a08…
|
noreply
|
696 |
), |
|
0981a08…
|
noreply
|
697 |
) |
|
0981a08…
|
noreply
|
698 |
self._conn.commit() |
|
0981a08…
|
noreply
|
699 |
|
|
0981a08…
|
noreply
|
700 |
def get_entity_provenance(self, name: str) -> List[Dict[str, Any]]: |
|
0981a08…
|
noreply
|
701 |
name_lower = name.lower() |
|
0981a08…
|
noreply
|
702 |
rows = self._conn.execute( |
|
0981a08…
|
noreply
|
703 |
"SELECT sl.source_id, sl.entity_name_lower, sl.relationship_id, " |
|
0981a08…
|
noreply
|
704 |
"sl.timestamp, sl.page, sl.section, sl.line_start, sl.line_end, " |
|
0981a08…
|
noreply
|
705 |
"sl.text_snippet, s.source_type, s.title, s.path, s.url, s.mime_type, " |
|
0981a08…
|
noreply
|
706 |
"s.ingested_at, s.metadata " |
|
0981a08…
|
noreply
|
707 |
"FROM source_locations sl " |
|
0981a08…
|
noreply
|
708 |
"JOIN sources s ON sl.source_id = s.source_id " |
|
0981a08…
|
noreply
|
709 |
"WHERE sl.entity_name_lower = ?", |
|
0981a08…
|
noreply
|
710 |
(name_lower,), |
|
0981a08…
|
noreply
|
711 |
).fetchall() |
|
0981a08…
|
noreply
|
712 |
results = [] |
|
0981a08…
|
noreply
|
713 |
for r in rows: |
|
0981a08…
|
noreply
|
714 |
results.append( |
|
0981a08…
|
noreply
|
715 |
{ |
|
0981a08…
|
noreply
|
716 |
"source_id": r[0], |
|
0981a08…
|
noreply
|
717 |
"entity_name_lower": r[1], |
|
0981a08…
|
noreply
|
718 |
"relationship_id": r[2], |
|
0981a08…
|
noreply
|
719 |
"timestamp": r[3], |
|
0981a08…
|
noreply
|
720 |
"page": r[4], |
|
0981a08…
|
noreply
|
721 |
"section": r[5], |
|
0981a08…
|
noreply
|
722 |
"line_start": r[6], |
|
0981a08…
|
noreply
|
723 |
"line_end": r[7], |
|
0981a08…
|
noreply
|
724 |
"text_snippet": r[8], |
|
0981a08…
|
noreply
|
725 |
"source": { |
|
0981a08…
|
noreply
|
726 |
"source_id": r[0], |
|
0981a08…
|
noreply
|
727 |
"source_type": r[9], |
|
0981a08…
|
noreply
|
728 |
"title": r[10], |
|
0981a08…
|
noreply
|
729 |
"path": r[11], |
|
0981a08…
|
noreply
|
730 |
"url": r[12], |
|
0981a08…
|
noreply
|
731 |
"mime_type": r[13], |
|
0981a08…
|
noreply
|
732 |
"ingested_at": r[14], |
|
0981a08…
|
noreply
|
733 |
"metadata": json.loads(r[15]) if r[15] else {}, |
|
0981a08…
|
noreply
|
734 |
}, |
|
0981a08…
|
noreply
|
735 |
} |
|
0981a08…
|
noreply
|
736 |
) |
|
0981a08…
|
noreply
|
737 |
return results |
|
0ad36b7…
|
noreply
|
738 |
|
|
0ad36b7…
|
noreply
|
739 |
def close(self) -> None: |
|
0981a08…
|
noreply
|
740 |
"""Close the SQLite connection.""" |
|
0981a08…
|
noreply
|
741 |
if self._conn: |
|
0981a08…
|
noreply
|
742 |
self._conn.close() |
|
0981a08…
|
noreply
|
743 |
self._conn = None |
|
0ad36b7…
|
noreply
|
744 |
|
|
0ad36b7…
|
noreply
|
745 |
|
|
0ad36b7…
|
noreply
|
746 |
def create_store(db_path: Optional[Union[str, Path]] = None) -> GraphStore: |
|
0ad36b7…
|
noreply
|
747 |
"""Create the best available graph store. |
|
0ad36b7…
|
noreply
|
748 |
|
|
0981a08…
|
noreply
|
749 |
If db_path is provided, uses SQLiteStore for persistent storage. |
|
0981a08…
|
noreply
|
750 |
Otherwise returns an InMemoryStore. |
|
0ad36b7…
|
noreply
|
751 |
""" |
|
0ad36b7…
|
noreply
|
752 |
if db_path is not None: |
|
0ad36b7…
|
noreply
|
753 |
try: |
|
0981a08…
|
noreply
|
754 |
return SQLiteStore(db_path) |
|
0ad36b7…
|
noreply
|
755 |
except Exception as e: |
|
0981a08…
|
noreply
|
756 |
logger.warning(f"Failed to initialize SQLite at {db_path}: {e}. Using in-memory store.") |
|
0ad36b7…
|
noreply
|
757 |
return InMemoryStore() |