Navegador

navegador / navegador / multirepo.py
Blame History Raw 317 lines
1
"""
2
Multi-repo support — index and query across multiple repositories.
3
4
Issue: #62 adds WorkspaceMode (UNIFIED / FEDERATED) and WorkspaceManager.
5
6
Usage::
7
8
from navegador.multirepo import MultiRepoManager, WorkspaceMode, WorkspaceManager
9
10
# Legacy: single shared graph
11
mgr = MultiRepoManager(store)
12
mgr.add_repo("backend", "/path/to/backend")
13
mgr.add_repo("frontend", "/path/to/frontend")
14
stats = mgr.ingest_all()
15
results = mgr.cross_repo_search("authenticate")
16
17
# v0.4: workspace with explicit mode
18
ws = WorkspaceManager(store, mode=WorkspaceMode.UNIFIED)
19
ws.add_repo("backend", "/path/to/backend")
20
ws.add_repo("frontend", "/path/to/frontend")
21
stats = ws.ingest_all()
22
results = ws.search("authenticate")
23
24
# Federated: each repo gets its own graph; cross-repo queries merge results
25
ws_fed = WorkspaceManager(store, mode=WorkspaceMode.FEDERATED)
26
ws_fed.add_repo("backend", "/path/to/backend")
27
results = ws_fed.search("authenticate")
28
"""
29
30
from __future__ import annotations
31
32
import logging
33
from enum import Enum
34
from pathlib import Path
35
from typing import Any
36
37
from navegador.graph.schema import NodeLabel
38
from navegador.graph.store import GraphStore
39
40
logger = logging.getLogger(__name__)
41
42
# Key used to store repo registry as a special node in the graph
43
_REGISTRY_LABEL = "RepoRegistry"
44
45
46
# ── WorkspaceMode ─────────────────────────────────────────────────────────────
47
48
49
class WorkspaceMode(str, Enum):
50
"""
51
Controls how a multi-repo workspace stores its graph data.
52
53
UNIFIED
54
All repositories share one graph. Cross-repo traversal is trivial
55
but repo isolation is not enforced.
56
57
FEDERATED
58
Each repository gets its own named graph. Cross-repo queries are
59
executed against each graph in turn and the results are merged.
60
Provides namespace isolation — nodes in repo A cannot accidentally
61
collide with nodes in repo B.
62
"""
63
64
UNIFIED = "unified"
65
FEDERATED = "federated"
66
67
68
# ── WorkspaceManager ──────────────────────────────────────────────────────────
69
70
71
class WorkspaceManager:
72
"""
73
Multi-repo workspace with explicit UNIFIED or FEDERATED mode.
74
75
In UNIFIED mode this is a thin wrapper around :class:`MultiRepoManager`
76
backed by a single shared :class:`~navegador.graph.store.GraphStore`.
77
78
In FEDERATED mode each repo is tracked with its own graph name. Queries
79
fan out across all per-repo graphs and merge the result lists.
80
"""
81
82
def __init__(self, store: GraphStore, mode: WorkspaceMode = WorkspaceMode.UNIFIED) -> None:
83
self.store = store
84
self.mode = mode
85
# repo name → {"path": str, "graph_name": str}
86
self._repos: dict[str, dict[str, str]] = {}
87
88
# ── Registration ──────────────────────────────────────────────────────────
89
90
def add_repo(self, name: str, path: str | Path) -> None:
91
"""Register a repository by name and filesystem path."""
92
resolved = str(Path(path).resolve())
93
graph_name = f"navegador_{name}" if self.mode == WorkspaceMode.FEDERATED else "navegador"
94
self._repos[name] = {"path": resolved, "graph_name": graph_name}
95
96
# Persist registration as a Repository node in the shared store
97
self.store.create_node(
98
NodeLabel.Repository,
99
{
100
"name": name,
101
"path": resolved,
102
"description": f"workspace:{self.mode.value}",
103
"language": "",
104
"file_path": resolved,
105
},
106
)
107
logger.info("WorkspaceManager (%s): registered %s → %s", self.mode.value, name, resolved)
108
109
def list_repos(self) -> list[dict[str, str]]:
110
"""Return all registered repositories."""
111
return [
112
{"name": name, "path": info["path"], "graph_name": info["graph_name"]}
113
for name, info in self._repos.items()
114
]
115
116
# ── Ingestion ─────────────────────────────────────────────────────────────
117
118
def ingest_all(self, clear: bool = False) -> dict[str, Any]:
119
"""
120
Ingest every registered repository according to the workspace mode.
121
122
In UNIFIED mode all repos are ingested into the shared store.
123
In FEDERATED mode each repo is ingested into its own named graph.
124
125
Returns
126
-------
127
dict keyed by repo name → ingestion stats
128
"""
129
from navegador.ingestion.parser import RepoIngester
130
131
if not self._repos:
132
logger.warning("WorkspaceManager: no repositories registered")
133
return {}
134
135
if clear:
136
self.store.clear()
137
138
summary: dict[str, Any] = {}
139
140
for name, info in self._repos.items():
141
path = info["path"]
142
logger.info("WorkspaceManager (%s): ingesting %s", self.mode.value, name)
143
144
if self.mode == WorkspaceMode.FEDERATED:
145
# Each repo uses its own graph — create a per-repo store
146
target_store = self._federated_store(info["graph_name"])
147
else:
148
target_store = self.store
149
150
try:
151
ingester = RepoIngester(target_store)
152
stats = ingester.ingest(path, clear=False)
153
summary[name] = stats
154
except Exception as exc: # noqa: BLE001
155
logger.error("WorkspaceManager: failed to ingest %s: %s", name, exc)
156
summary[name] = {"error": str(exc)}
157
158
return summary
159
160
# ── Search ────────────────────────────────────────────────────────────────
161
162
def search(self, query: str, limit: int = 20) -> list[dict[str, Any]]:
163
"""
164
Search across all repositories.
165
166
In UNIFIED mode queries the single shared graph.
167
In FEDERATED mode fans out across each per-repo graph and merges.
168
169
Returns
170
-------
171
list of dicts with keys: label, name, file_path, repo
172
"""
173
if self.mode == WorkspaceMode.UNIFIED:
174
return self._search_store(self.store, query, limit)
175
176
# Federated: merge results from each repo's graph
177
all_results: list[dict[str, Any]] = []
178
seen: set[tuple[str, str]] = set()
179
180
for name, info in self._repos.items():
181
try:
182
target_store = self._federated_store(info["graph_name"])
183
results = self._search_store(target_store, query, limit)
184
for r in results:
185
key = (r.get("label", ""), r.get("name", ""))
186
if key not in seen:
187
seen.add(key)
188
r["repo"] = name
189
all_results.append(r)
190
except Exception:
191
logger.debug("WorkspaceManager: search failed for repo %s", name, exc_info=True)
192
193
return all_results[:limit]
194
195
# ── Helpers ───────────────────────────────────────────────────────────────
196
197
def _federated_store(self, graph_name: str) -> GraphStore:
198
"""
199
Return a GraphStore that uses the per-repo graph name.
200
201
Shares the underlying DB client from self.store but selects a
202
different named graph.
203
"""
204
store = GraphStore.__new__(GraphStore)
205
store._client = self.store._client
206
store._graph = self.store._client.select_graph(graph_name)
207
return store
208
209
@staticmethod
210
def _search_store(store: GraphStore, query: str, limit: int) -> list[dict[str, Any]]:
211
cypher = (
212
"MATCH (n) "
213
"WHERE toLower(n.name) CONTAINS toLower($q) "
214
"RETURN labels(n)[0] AS label, n.name AS name, "
215
" coalesce(n.file_path, n.path, '') AS file_path "
216
f"LIMIT {int(limit)}"
217
)
218
try:
219
result = store.query(cypher, {"q": query})
220
rows = result.result_set or []
221
except Exception:
222
return []
223
return [
224
{"label": row[0] or "", "name": row[1] or "", "file_path": row[2] or "", "repo": ""}
225
for row in rows
226
]
227
228
229
class MultiRepoManager:
230
"""
231
Register, ingest, and query across multiple repositories.
232
233
Repos are persisted as Repository nodes in the graph so they survive
234
process restarts. A lightweight in-memory cache is layered on top for
235
the current session.
236
"""
237
238
def __init__(self, store: GraphStore) -> None:
239
self.store = store
240
241
# ── Registration ──────────────────────────────────────────────────────────
242
243
def add_repo(self, name: str, path: str | Path) -> None:
244
"""Register a repository by name and filesystem path."""
245
resolved = str(Path(path).resolve())
246
self.store.create_node(
247
NodeLabel.Repository,
248
{
249
"name": name,
250
"path": resolved,
251
"description": "",
252
"file_path": resolved,
253
},
254
)
255
logger.info("MultiRepo: registered %s → %s", name, resolved)
256
257
# ── Query ─────────────────────────────────────────────────────────────────
258
259
def list_repos(self) -> list[dict[str, Any]]:
260
"""Return all registered repositories."""
261
result = self.store.query("MATCH (r:Repository) RETURN r.name, r.path ORDER BY r.name")
262
rows = result.result_set or []
263
return [{"name": row[0], "path": row[1]} for row in rows]
264
265
# ── Ingestion ─────────────────────────────────────────────────────────────
266
267
def ingest_all(self, clear: bool = False) -> dict[str, Any]:
268
"""
269
Ingest every registered repository.
270
271
Returns a summary dict keyed by repo name, each value being the
272
ingestion stats returned by RepoIngester.
273
"""
274
from navegador.ingestion.parser import RepoIngester
275
276
repos = self.list_repos()
277
if not repos:
278
logger.warning("MultiRepo: no repositories registered")
279
return {}
280
281
if clear:
282
self.store.clear()
283
284
summary: dict[str, Any] = {}
285
for repo in repos:
286
name = repo["name"]
287
path = repo["path"]
288
logger.info("MultiRepo: ingesting %s from %s", name, path)
289
try:
290
ingester = RepoIngester(self.store)
291
stats = ingester.ingest(path, clear=False)
292
summary[name] = stats
293
except Exception as exc: # noqa: BLE001
294
logger.error("MultiRepo: failed to ingest %s: %s", name, exc)
295
summary[name] = {"error": str(exc)}
296
297
return summary
298
299
# ── Search ────────────────────────────────────────────────────────────────
300
301
def cross_repo_search(self, query: str, limit: int = 20) -> list[dict[str, Any]]:
302
"""
303
Full-text name search across all node types in all registered repos.
304
305
Returns a list of dicts with keys: label, name, file_path.
306
"""
307
cypher = (
308
"MATCH (n) "
309
"WHERE toLower(n.name) CONTAINS toLower($q) "
310
"RETURN labels(n)[0] AS label, n.name AS name, "
311
" coalesce(n.file_path, n.path, '') AS file_path "
312
f"LIMIT {int(limit)}"
313
)
314
result = self.store.query(cypher, {"q": query})
315
rows = result.result_set or []
316
return [{"label": row[0], "name": row[1], "file_path": row[2]} for row in rows]
317

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button