Navegador

feat: incremental ingestion with content hashing and watch mode Files are tracked by SHA-256 content hash on File nodes. With --incremental, unchanged files are skipped and changed files have their subgraph cleared before re-parsing. --watch enables polling-based continuous re-ingestion. Closes #21

lmata 2026-03-23 04:49 trunk
Commit df3d6fa8b5d1133a8671472c424cd4dac096f1802d4fe00ae597017db4cfe302
--- navegador/cli/commands.py
+++ navegador/cli/commands.py
@@ -104,24 +104,46 @@
104104
105105
@main.command()
106106
@click.argument("repo_path", type=click.Path(exists=True))
107107
@DB_OPTION
108108
@click.option("--clear", is_flag=True, help="Clear existing graph before ingesting.")
109
+@click.option("--incremental", is_flag=True, help="Only re-parse changed files.")
110
+@click.option("--watch", is_flag=True, help="Watch for changes and re-ingest incrementally.")
111
+@click.option("--interval", default=2.0, show_default=True, help="Watch poll interval (seconds).")
109112
@click.option("--json", "as_json", is_flag=True, help="Output stats as JSON.")
110
-def ingest(repo_path: str, db: str, clear: bool, as_json: bool):
113
+def ingest(repo_path: str, db: str, clear: bool, incremental: bool, watch: bool,
114
+ interval: float, as_json: bool):
111115
"""Ingest a repository's code into the graph (AST + call graph)."""
112116
from navegador.ingestion import RepoIngester
113117
114118
store = _get_store(db)
115119
ingester = RepoIngester(store)
116120
121
+ if watch:
122
+ console.print(f"[bold]Watching[/bold] {repo_path} (interval={interval}s, Ctrl-C to stop)")
123
+
124
+ def _on_cycle(stats):
125
+ changed = stats["files"]
126
+ skipped = stats["skipped"]
127
+ if changed:
128
+ console.print(
129
+ f" [green]{changed} changed[/green], {skipped} unchanged"
130
+ )
131
+ return True # keep watching
132
+
133
+ try:
134
+ ingester.watch(repo_path, interval=interval, callback=_on_cycle)
135
+ except KeyboardInterrupt:
136
+ console.print("\n[yellow]Watch stopped.[/yellow]")
137
+ return
138
+
117139
if as_json:
118
- stats = ingester.ingest(repo_path, clear=clear)
140
+ stats = ingester.ingest(repo_path, clear=clear, incremental=incremental)
119141
click.echo(json.dumps(stats, indent=2))
120142
else:
121143
with console.status(f"[bold]Ingesting[/bold] {repo_path}..."):
122
- stats = ingester.ingest(repo_path, clear=clear)
144
+ stats = ingester.ingest(repo_path, clear=clear, incremental=incremental)
123145
table = Table(title="Ingestion complete")
124146
table.add_column("Metric", style="cyan")
125147
table.add_column("Count", justify="right", style="green")
126148
for k, v in stats.items():
127149
table.add_row(k.capitalize(), str(v))
128150
--- navegador/cli/commands.py
+++ navegador/cli/commands.py
@@ -104,24 +104,46 @@
104
105 @main.command()
106 @click.argument("repo_path", type=click.Path(exists=True))
107 @DB_OPTION
108 @click.option("--clear", is_flag=True, help="Clear existing graph before ingesting.")
 
 
 
109 @click.option("--json", "as_json", is_flag=True, help="Output stats as JSON.")
110 def ingest(repo_path: str, db: str, clear: bool, as_json: bool):
 
111 """Ingest a repository's code into the graph (AST + call graph)."""
112 from navegador.ingestion import RepoIngester
113
114 store = _get_store(db)
115 ingester = RepoIngester(store)
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117 if as_json:
118 stats = ingester.ingest(repo_path, clear=clear)
119 click.echo(json.dumps(stats, indent=2))
120 else:
121 with console.status(f"[bold]Ingesting[/bold] {repo_path}..."):
122 stats = ingester.ingest(repo_path, clear=clear)
123 table = Table(title="Ingestion complete")
124 table.add_column("Metric", style="cyan")
125 table.add_column("Count", justify="right", style="green")
126 for k, v in stats.items():
127 table.add_row(k.capitalize(), str(v))
128
--- navegador/cli/commands.py
+++ navegador/cli/commands.py
@@ -104,24 +104,46 @@
104
105 @main.command()
106 @click.argument("repo_path", type=click.Path(exists=True))
107 @DB_OPTION
108 @click.option("--clear", is_flag=True, help="Clear existing graph before ingesting.")
109 @click.option("--incremental", is_flag=True, help="Only re-parse changed files.")
110 @click.option("--watch", is_flag=True, help="Watch for changes and re-ingest incrementally.")
111 @click.option("--interval", default=2.0, show_default=True, help="Watch poll interval (seconds).")
112 @click.option("--json", "as_json", is_flag=True, help="Output stats as JSON.")
113 def ingest(repo_path: str, db: str, clear: bool, incremental: bool, watch: bool,
114 interval: float, as_json: bool):
115 """Ingest a repository's code into the graph (AST + call graph)."""
116 from navegador.ingestion import RepoIngester
117
118 store = _get_store(db)
119 ingester = RepoIngester(store)
120
121 if watch:
122 console.print(f"[bold]Watching[/bold] {repo_path} (interval={interval}s, Ctrl-C to stop)")
123
124 def _on_cycle(stats):
125 changed = stats["files"]
126 skipped = stats["skipped"]
127 if changed:
128 console.print(
129 f" [green]{changed} changed[/green], {skipped} unchanged"
130 )
131 return True # keep watching
132
133 try:
134 ingester.watch(repo_path, interval=interval, callback=_on_cycle)
135 except KeyboardInterrupt:
136 console.print("\n[yellow]Watch stopped.[/yellow]")
137 return
138
139 if as_json:
140 stats = ingester.ingest(repo_path, clear=clear, incremental=incremental)
141 click.echo(json.dumps(stats, indent=2))
142 else:
143 with console.status(f"[bold]Ingesting[/bold] {repo_path}..."):
144 stats = ingester.ingest(repo_path, clear=clear, incremental=incremental)
145 table = Table(title="Ingestion complete")
146 table.add_column("Metric", style="cyan")
147 table.add_column("Count", justify="right", style="green")
148 for k, v in stats.items():
149 table.add_row(k.capitalize(), str(v))
150
--- navegador/graph/queries.py
+++ navegador/graph/queries.py
@@ -209,10 +209,22 @@
209209
MATCH (n)-[:ASSIGNED_TO]->(p:Person)
210210
WHERE n.name = $name AND ($file_path = '' OR n.file_path = $file_path)
211211
RETURN labels(n)[0] AS node_type, n.name AS node_name,
212212
p.name AS owner, p.email AS email, p.role AS role, p.team AS team
213213
"""
214
+
215
+# ── Incremental ingestion ─────────────────────────────────────────────────────
216
+
217
+FILE_HASH = """
218
+MATCH (f:File {path: $path})
219
+RETURN f.content_hash AS hash
220
+"""
221
+
222
+DELETE_FILE_SUBGRAPH = """
223
+MATCH (f:File {path: $path})-[:CONTAINS]->(child)
224
+DETACH DELETE child
225
+"""
214226
215227
# ── Stats ─────────────────────────────────────────────────────────────────────
216228
217229
NODE_TYPE_COUNTS = """
218230
MATCH (n)
219231
--- navegador/graph/queries.py
+++ navegador/graph/queries.py
@@ -209,10 +209,22 @@
209 MATCH (n)-[:ASSIGNED_TO]->(p:Person)
210 WHERE n.name = $name AND ($file_path = '' OR n.file_path = $file_path)
211 RETURN labels(n)[0] AS node_type, n.name AS node_name,
212 p.name AS owner, p.email AS email, p.role AS role, p.team AS team
213 """
 
 
 
 
 
 
 
 
 
 
 
 
214
215 # ── Stats ─────────────────────────────────────────────────────────────────────
216
217 NODE_TYPE_COUNTS = """
218 MATCH (n)
219
--- navegador/graph/queries.py
+++ navegador/graph/queries.py
@@ -209,10 +209,22 @@
209 MATCH (n)-[:ASSIGNED_TO]->(p:Person)
210 WHERE n.name = $name AND ($file_path = '' OR n.file_path = $file_path)
211 RETURN labels(n)[0] AS node_type, n.name AS node_name,
212 p.name AS owner, p.email AS email, p.role AS role, p.team AS team
213 """
214
215 # ── Incremental ingestion ─────────────────────────────────────────────────────
216
217 FILE_HASH = """
218 MATCH (f:File {path: $path})
219 RETURN f.content_hash AS hash
220 """
221
222 DELETE_FILE_SUBGRAPH = """
223 MATCH (f:File {path: $path})-[:CONTAINS]->(child)
224 DETACH DELETE child
225 """
226
227 # ── Stats ─────────────────────────────────────────────────────────────────────
228
229 NODE_TYPE_COUNTS = """
230 MATCH (n)
231
--- navegador/graph/schema.py
+++ navegador/graph/schema.py
@@ -60,11 +60,11 @@
6060
# ── Property keys per node label ──────────────────────────────────────────────
6161
6262
NODE_PROPS = {
6363
# Code layer
6464
NodeLabel.Repository: ["name", "path", "language", "description"],
65
- NodeLabel.File: ["name", "path", "language", "size", "line_count"],
65
+ NodeLabel.File: ["name", "path", "language", "size", "line_count", "content_hash"],
6666
NodeLabel.Module: ["name", "file_path", "docstring"],
6767
NodeLabel.Class: ["name", "file_path", "line_start", "line_end", "docstring", "source"],
6868
NodeLabel.Function: [
6969
"name",
7070
"file_path",
7171
--- navegador/graph/schema.py
+++ navegador/graph/schema.py
@@ -60,11 +60,11 @@
60 # ── Property keys per node label ──────────────────────────────────────────────
61
62 NODE_PROPS = {
63 # Code layer
64 NodeLabel.Repository: ["name", "path", "language", "description"],
65 NodeLabel.File: ["name", "path", "language", "size", "line_count"],
66 NodeLabel.Module: ["name", "file_path", "docstring"],
67 NodeLabel.Class: ["name", "file_path", "line_start", "line_end", "docstring", "source"],
68 NodeLabel.Function: [
69 "name",
70 "file_path",
71
--- navegador/graph/schema.py
+++ navegador/graph/schema.py
@@ -60,11 +60,11 @@
60 # ── Property keys per node label ──────────────────────────────────────────────
61
62 NODE_PROPS = {
63 # Code layer
64 NodeLabel.Repository: ["name", "path", "language", "description"],
65 NodeLabel.File: ["name", "path", "language", "size", "line_count", "content_hash"],
66 NodeLabel.Module: ["name", "file_path", "docstring"],
67 NodeLabel.Class: ["name", "file_path", "line_start", "line_end", "docstring", "source"],
68 NodeLabel.Function: [
69 "name",
70 "file_path",
71
--- navegador/ingestion/parser.py
+++ navegador/ingestion/parser.py
@@ -9,13 +9,16 @@
99
Go .go
1010
Rust .rs
1111
Java .java
1212
"""
1313
14
+import hashlib
1415
import logging
16
+import time
1517
from pathlib import Path
1618
19
+from navegador.graph import queries
1720
from navegador.graph.schema import NodeLabel
1821
from navegador.graph.store import GraphStore
1922
2023
logger = logging.getLogger(__name__)
2124
@@ -44,20 +47,26 @@
4447
4548
def __init__(self, store: GraphStore) -> None:
4649
self.store = store
4750
self._parsers: dict[str, "LanguageParser"] = {}
4851
49
- def ingest(self, repo_path: str | Path, clear: bool = False) -> dict[str, int]:
52
+ def ingest(
53
+ self,
54
+ repo_path: str | Path,
55
+ clear: bool = False,
56
+ incremental: bool = False,
57
+ ) -> dict[str, int]:
5058
"""
5159
Ingest a repository into the graph.
5260
5361
Args:
5462
repo_path: Path to the repository root.
5563
clear: If True, wipe the graph before ingesting.
64
+ incremental: If True, skip files whose content hash hasn't changed.
5665
5766
Returns:
58
- Dict with counts: files, functions, classes, edges.
67
+ Dict with counts: files, functions, classes, edges, skipped.
5968
"""
6069
repo_path = Path(repo_path).resolve()
6170
if not repo_path.exists():
6271
raise FileNotFoundError(f"Repository not found: {repo_path}")
6372
@@ -71,34 +80,100 @@
7180
"name": repo_path.name,
7281
"path": str(repo_path),
7382
},
7483
)
7584
76
- stats: dict[str, int] = {"files": 0, "functions": 0, "classes": 0, "edges": 0}
85
+ stats: dict[str, int] = {
86
+ "files": 0,
87
+ "functions": 0,
88
+ "classes": 0,
89
+ "edges": 0,
90
+ "skipped": 0,
91
+ }
7792
7893
for source_file in self._iter_source_files(repo_path):
7994
language = LANGUAGE_MAP.get(source_file.suffix)
8095
if not language:
8196
continue
97
+
98
+ rel_path = str(source_file.relative_to(repo_path))
99
+ content_hash = _file_hash(source_file)
100
+
101
+ if incremental and self._file_unchanged(rel_path, content_hash):
102
+ stats["skipped"] += 1
103
+ continue
104
+
105
+ if incremental:
106
+ self._clear_file_subgraph(rel_path)
107
+
82108
try:
83109
parser = self._get_parser(language)
84110
file_stats = parser.parse_file(source_file, repo_path, self.store)
85111
stats["files"] += 1
86112
stats["functions"] += file_stats.get("functions", 0)
87113
stats["classes"] += file_stats.get("classes", 0)
88114
stats["edges"] += file_stats.get("edges", 0)
115
+
116
+ self._store_file_hash(rel_path, content_hash)
89117
except Exception:
90118
logger.exception("Failed to parse %s", source_file)
91119
92120
logger.info(
93
- "Ingested %s: %d files, %d functions, %d classes",
121
+ "Ingested %s: %d files, %d functions, %d classes, %d skipped",
94122
repo_path.name,
95123
stats["files"],
96124
stats["functions"],
97125
stats["classes"],
126
+ stats["skipped"],
98127
)
99128
return stats
129
+
130
+ def watch(
131
+ self,
132
+ repo_path: str | Path,
133
+ interval: float = 2.0,
134
+ callback=None,
135
+ ) -> None:
136
+ """
137
+ Watch a repo for changes and re-ingest incrementally.
138
+
139
+ Args:
140
+ repo_path: Path to the repository root.
141
+ interval: Seconds between polls.
142
+ callback: Optional callable receiving stats dict after each cycle.
143
+ If callback returns False, the watch loop stops.
144
+ """
145
+ repo_path = Path(repo_path).resolve()
146
+ if not repo_path.exists():
147
+ raise FileNotFoundError(f"Repository not found: {repo_path}")
148
+
149
+ # Initial full ingest
150
+ stats = self.ingest(repo_path, incremental=True)
151
+ if callback and callback(stats) is False:
152
+ return
153
+
154
+ while True:
155
+ time.sleep(interval)
156
+ stats = self.ingest(repo_path, incremental=True)
157
+ if callback and callback(stats) is False:
158
+ return
159
+
160
+ def _file_unchanged(self, rel_path: str, content_hash: str) -> bool:
161
+ result = self.store.query(queries.FILE_HASH, {"path": rel_path})
162
+ rows = result.result_set or []
163
+ if not rows or rows[0][0] is None:
164
+ return False
165
+ return rows[0][0] == content_hash
166
+
167
+ def _clear_file_subgraph(self, rel_path: str) -> None:
168
+ self.store.query(queries.DELETE_FILE_SUBGRAPH, {"path": rel_path})
169
+
170
+ def _store_file_hash(self, rel_path: str, content_hash: str) -> None:
171
+ self.store.query(
172
+ "MATCH (f:File {path: $path}) SET f.content_hash = $hash",
173
+ {"path": rel_path, "hash": content_hash},
174
+ )
100175
101176
def _iter_source_files(self, repo_path: Path):
102177
skip_dirs = {
103178
".git",
104179
".venv",
@@ -141,11 +216,18 @@
141216
self._parsers[language] = JavaParser()
142217
else:
143218
raise ValueError(f"Unsupported language: {language}")
144219
return self._parsers[language]
145220
221
+
222
+def _file_hash(path: Path) -> str:
223
+ """SHA-256 content hash for a file."""
224
+ h = hashlib.sha256()
225
+ h.update(path.read_bytes())
226
+ return h.hexdigest()
227
+
146228
147229
class LanguageParser:
148230
"""Base class for language-specific AST parsers."""
149231
150232
def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
151233
raise NotImplementedError
152234
--- navegador/ingestion/parser.py
+++ navegador/ingestion/parser.py
@@ -9,13 +9,16 @@
9 Go .go
10 Rust .rs
11 Java .java
12 """
13
 
14 import logging
 
15 from pathlib import Path
16
 
17 from navegador.graph.schema import NodeLabel
18 from navegador.graph.store import GraphStore
19
20 logger = logging.getLogger(__name__)
21
@@ -44,20 +47,26 @@
44
45 def __init__(self, store: GraphStore) -> None:
46 self.store = store
47 self._parsers: dict[str, "LanguageParser"] = {}
48
49 def ingest(self, repo_path: str | Path, clear: bool = False) -> dict[str, int]:
 
 
 
 
 
50 """
51 Ingest a repository into the graph.
52
53 Args:
54 repo_path: Path to the repository root.
55 clear: If True, wipe the graph before ingesting.
 
56
57 Returns:
58 Dict with counts: files, functions, classes, edges.
59 """
60 repo_path = Path(repo_path).resolve()
61 if not repo_path.exists():
62 raise FileNotFoundError(f"Repository not found: {repo_path}")
63
@@ -71,34 +80,100 @@
71 "name": repo_path.name,
72 "path": str(repo_path),
73 },
74 )
75
76 stats: dict[str, int] = {"files": 0, "functions": 0, "classes": 0, "edges": 0}
 
 
 
 
 
 
77
78 for source_file in self._iter_source_files(repo_path):
79 language = LANGUAGE_MAP.get(source_file.suffix)
80 if not language:
81 continue
 
 
 
 
 
 
 
 
 
 
 
82 try:
83 parser = self._get_parser(language)
84 file_stats = parser.parse_file(source_file, repo_path, self.store)
85 stats["files"] += 1
86 stats["functions"] += file_stats.get("functions", 0)
87 stats["classes"] += file_stats.get("classes", 0)
88 stats["edges"] += file_stats.get("edges", 0)
 
 
89 except Exception:
90 logger.exception("Failed to parse %s", source_file)
91
92 logger.info(
93 "Ingested %s: %d files, %d functions, %d classes",
94 repo_path.name,
95 stats["files"],
96 stats["functions"],
97 stats["classes"],
 
98 )
99 return stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
101 def _iter_source_files(self, repo_path: Path):
102 skip_dirs = {
103 ".git",
104 ".venv",
@@ -141,11 +216,18 @@
141 self._parsers[language] = JavaParser()
142 else:
143 raise ValueError(f"Unsupported language: {language}")
144 return self._parsers[language]
145
 
 
 
 
 
 
 
146
147 class LanguageParser:
148 """Base class for language-specific AST parsers."""
149
150 def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
151 raise NotImplementedError
152
--- navegador/ingestion/parser.py
+++ navegador/ingestion/parser.py
@@ -9,13 +9,16 @@
9 Go .go
10 Rust .rs
11 Java .java
12 """
13
14 import hashlib
15 import logging
16 import time
17 from pathlib import Path
18
19 from navegador.graph import queries
20 from navegador.graph.schema import NodeLabel
21 from navegador.graph.store import GraphStore
22
23 logger = logging.getLogger(__name__)
24
@@ -44,20 +47,26 @@
47
48 def __init__(self, store: GraphStore) -> None:
49 self.store = store
50 self._parsers: dict[str, "LanguageParser"] = {}
51
52 def ingest(
53 self,
54 repo_path: str | Path,
55 clear: bool = False,
56 incremental: bool = False,
57 ) -> dict[str, int]:
58 """
59 Ingest a repository into the graph.
60
61 Args:
62 repo_path: Path to the repository root.
63 clear: If True, wipe the graph before ingesting.
64 incremental: If True, skip files whose content hash hasn't changed.
65
66 Returns:
67 Dict with counts: files, functions, classes, edges, skipped.
68 """
69 repo_path = Path(repo_path).resolve()
70 if not repo_path.exists():
71 raise FileNotFoundError(f"Repository not found: {repo_path}")
72
@@ -71,34 +80,100 @@
80 "name": repo_path.name,
81 "path": str(repo_path),
82 },
83 )
84
85 stats: dict[str, int] = {
86 "files": 0,
87 "functions": 0,
88 "classes": 0,
89 "edges": 0,
90 "skipped": 0,
91 }
92
93 for source_file in self._iter_source_files(repo_path):
94 language = LANGUAGE_MAP.get(source_file.suffix)
95 if not language:
96 continue
97
98 rel_path = str(source_file.relative_to(repo_path))
99 content_hash = _file_hash(source_file)
100
101 if incremental and self._file_unchanged(rel_path, content_hash):
102 stats["skipped"] += 1
103 continue
104
105 if incremental:
106 self._clear_file_subgraph(rel_path)
107
108 try:
109 parser = self._get_parser(language)
110 file_stats = parser.parse_file(source_file, repo_path, self.store)
111 stats["files"] += 1
112 stats["functions"] += file_stats.get("functions", 0)
113 stats["classes"] += file_stats.get("classes", 0)
114 stats["edges"] += file_stats.get("edges", 0)
115
116 self._store_file_hash(rel_path, content_hash)
117 except Exception:
118 logger.exception("Failed to parse %s", source_file)
119
120 logger.info(
121 "Ingested %s: %d files, %d functions, %d classes, %d skipped",
122 repo_path.name,
123 stats["files"],
124 stats["functions"],
125 stats["classes"],
126 stats["skipped"],
127 )
128 return stats
129
130 def watch(
131 self,
132 repo_path: str | Path,
133 interval: float = 2.0,
134 callback=None,
135 ) -> None:
136 """
137 Watch a repo for changes and re-ingest incrementally.
138
139 Args:
140 repo_path: Path to the repository root.
141 interval: Seconds between polls.
142 callback: Optional callable receiving stats dict after each cycle.
143 If callback returns False, the watch loop stops.
144 """
145 repo_path = Path(repo_path).resolve()
146 if not repo_path.exists():
147 raise FileNotFoundError(f"Repository not found: {repo_path}")
148
149 # Initial full ingest
150 stats = self.ingest(repo_path, incremental=True)
151 if callback and callback(stats) is False:
152 return
153
154 while True:
155 time.sleep(interval)
156 stats = self.ingest(repo_path, incremental=True)
157 if callback and callback(stats) is False:
158 return
159
160 def _file_unchanged(self, rel_path: str, content_hash: str) -> bool:
161 result = self.store.query(queries.FILE_HASH, {"path": rel_path})
162 rows = result.result_set or []
163 if not rows or rows[0][0] is None:
164 return False
165 return rows[0][0] == content_hash
166
167 def _clear_file_subgraph(self, rel_path: str) -> None:
168 self.store.query(queries.DELETE_FILE_SUBGRAPH, {"path": rel_path})
169
170 def _store_file_hash(self, rel_path: str, content_hash: str) -> None:
171 self.store.query(
172 "MATCH (f:File {path: $path}) SET f.content_hash = $hash",
173 {"path": rel_path, "hash": content_hash},
174 )
175
176 def _iter_source_files(self, repo_path: Path):
177 skip_dirs = {
178 ".git",
179 ".venv",
@@ -141,11 +216,18 @@
216 self._parsers[language] = JavaParser()
217 else:
218 raise ValueError(f"Unsupported language: {language}")
219 return self._parsers[language]
220
221
222 def _file_hash(path: Path) -> str:
223 """SHA-256 content hash for a file."""
224 h = hashlib.sha256()
225 h.update(path.read_bytes())
226 return h.hexdigest()
227
228
229 class LanguageParser:
230 """Base class for language-specific AST parsers."""
231
232 def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
233 raise NotImplementedError
234
--- tests/test_cli.py
+++ tests/test_cli.py
@@ -73,10 +73,48 @@
7373
MockRI.return_value.ingest.return_value = {"files": 5}
7474
result = runner.invoke(main, ["ingest", "src", "--json"])
7575
assert result.exit_code == 0
7676
data = json.loads(result.output)
7777
assert data["files"] == 5
78
+
79
+ def test_incremental_flag_passes_through(self):
80
+ runner = CliRunner()
81
+ with runner.isolated_filesystem():
82
+ Path("src").mkdir()
83
+ with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \
84
+ patch("navegador.ingestion.RepoIngester") as MockRI:
85
+ MockRI.return_value.ingest.return_value = {
86
+ "files": 2, "functions": 5, "classes": 1, "edges": 3, "skipped": 8
87
+ }
88
+ result = runner.invoke(main, ["ingest", "src", "--incremental"])
89
+ assert result.exit_code == 0
90
+ MockRI.return_value.ingest.assert_called_once()
91
+ _, kwargs = MockRI.return_value.ingest.call_args
92
+ assert kwargs["incremental"] is True
93
+
94
+ def test_watch_flag_calls_watch(self):
95
+ runner = CliRunner()
96
+ with runner.isolated_filesystem():
97
+ Path("src").mkdir()
98
+ with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \
99
+ patch("navegador.ingestion.RepoIngester") as MockRI:
100
+ # watch should be called, simulate immediate stop
101
+ MockRI.return_value.watch.side_effect = KeyboardInterrupt()
102
+ result = runner.invoke(main, ["ingest", "src", "--watch", "--interval", "0.1"])
103
+ assert result.exit_code == 0
104
+ MockRI.return_value.watch.assert_called_once()
105
+
106
+ def test_watch_with_interval(self):
107
+ runner = CliRunner()
108
+ with runner.isolated_filesystem():
109
+ Path("src").mkdir()
110
+ with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \
111
+ patch("navegador.ingestion.RepoIngester") as MockRI:
112
+ MockRI.return_value.watch.side_effect = KeyboardInterrupt()
113
+ runner.invoke(main, ["ingest", "src", "--watch", "--interval", "5.0"])
114
+ _, kwargs = MockRI.return_value.watch.call_args
115
+ assert kwargs["interval"] == 5.0
78116
79117
80118
# ── context ───────────────────────────────────────────────────────────────────
81119
82120
class TestContextCommand:
83121
--- tests/test_cli.py
+++ tests/test_cli.py
@@ -73,10 +73,48 @@
73 MockRI.return_value.ingest.return_value = {"files": 5}
74 result = runner.invoke(main, ["ingest", "src", "--json"])
75 assert result.exit_code == 0
76 data = json.loads(result.output)
77 assert data["files"] == 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
79
80 # ── context ───────────────────────────────────────────────────────────────────
81
82 class TestContextCommand:
83
--- tests/test_cli.py
+++ tests/test_cli.py
@@ -73,10 +73,48 @@
73 MockRI.return_value.ingest.return_value = {"files": 5}
74 result = runner.invoke(main, ["ingest", "src", "--json"])
75 assert result.exit_code == 0
76 data = json.loads(result.output)
77 assert data["files"] == 5
78
79 def test_incremental_flag_passes_through(self):
80 runner = CliRunner()
81 with runner.isolated_filesystem():
82 Path("src").mkdir()
83 with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \
84 patch("navegador.ingestion.RepoIngester") as MockRI:
85 MockRI.return_value.ingest.return_value = {
86 "files": 2, "functions": 5, "classes": 1, "edges": 3, "skipped": 8
87 }
88 result = runner.invoke(main, ["ingest", "src", "--incremental"])
89 assert result.exit_code == 0
90 MockRI.return_value.ingest.assert_called_once()
91 _, kwargs = MockRI.return_value.ingest.call_args
92 assert kwargs["incremental"] is True
93
94 def test_watch_flag_calls_watch(self):
95 runner = CliRunner()
96 with runner.isolated_filesystem():
97 Path("src").mkdir()
98 with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \
99 patch("navegador.ingestion.RepoIngester") as MockRI:
100 # watch should be called, simulate immediate stop
101 MockRI.return_value.watch.side_effect = KeyboardInterrupt()
102 result = runner.invoke(main, ["ingest", "src", "--watch", "--interval", "0.1"])
103 assert result.exit_code == 0
104 MockRI.return_value.watch.assert_called_once()
105
106 def test_watch_with_interval(self):
107 runner = CliRunner()
108 with runner.isolated_filesystem():
109 Path("src").mkdir()
110 with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \
111 patch("navegador.ingestion.RepoIngester") as MockRI:
112 MockRI.return_value.watch.side_effect = KeyboardInterrupt()
113 runner.invoke(main, ["ingest", "src", "--watch", "--interval", "5.0"])
114 _, kwargs = MockRI.return_value.watch.call_args
115 assert kwargs["interval"] == 5.0
116
117
118 # ── context ───────────────────────────────────────────────────────────────────
119
120 class TestContextCommand:
121
--- tests/test_ingestion_code.py
+++ tests/test_ingestion_code.py
@@ -345,10 +345,158 @@
345345
stats = ingester.ingest(tmpdir)
346346
assert stats["files"] == 0
347347
348348
349349
# ── LanguageParser base class ─────────────────────────────────────────────────
350
+
351
+# ── Incremental ingestion ─────────────────────────────────────────────────────
352
+
353
+class TestIncrementalIngestion:
354
+ def test_incremental_returns_skipped_count(self):
355
+ store = _make_store()
356
+ ingester = RepoIngester(store)
357
+ with tempfile.TemporaryDirectory() as tmpdir:
358
+ stats = ingester.ingest(tmpdir, incremental=True)
359
+ assert "skipped" in stats
360
+
361
+ def test_incremental_skips_unchanged_file(self):
362
+ store = _make_store()
363
+ ingester = RepoIngester(store)
364
+ mock_parser = MagicMock()
365
+ mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
366
+ ingester._parsers["python"] = mock_parser
367
+
368
+ with tempfile.TemporaryDirectory() as tmpdir:
369
+ py_file = Path(tmpdir) / "app.py"
370
+ py_file.write_text("def foo(): pass")
371
+
372
+ # First ingest: file is new, should be parsed
373
+ stats1 = ingester.ingest(tmpdir, incremental=True)
374
+ assert stats1["files"] == 1
375
+ assert stats1["skipped"] == 0
376
+
377
+ # Simulate stored hash matching
378
+ from navegador.ingestion.parser import _file_hash
379
+ current_hash = _file_hash(py_file)
380
+ rel_path = "app.py"
381
+
382
+ # Mock _file_unchanged to return True
383
+ ingester._file_unchanged = MagicMock(return_value=True)
384
+ stats2 = ingester.ingest(tmpdir, incremental=True)
385
+ assert stats2["files"] == 0
386
+ assert stats2["skipped"] == 1
387
+
388
+ def test_incremental_reparses_changed_file(self):
389
+ store = _make_store()
390
+ ingester = RepoIngester(store)
391
+ mock_parser = MagicMock()
392
+ mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
393
+ ingester._parsers["python"] = mock_parser
394
+
395
+ with tempfile.TemporaryDirectory() as tmpdir:
396
+ py_file = Path(tmpdir) / "app.py"
397
+ py_file.write_text("def foo(): pass")
398
+
399
+ ingester._file_unchanged = MagicMock(return_value=False)
400
+ ingester._clear_file_subgraph = MagicMock()
401
+ stats = ingester.ingest(tmpdir, incremental=True)
402
+ assert stats["files"] == 1
403
+ ingester._clear_file_subgraph.assert_called_once()
404
+
405
+ def test_non_incremental_does_not_check_hash(self):
406
+ store = _make_store()
407
+ ingester = RepoIngester(store)
408
+ mock_parser = MagicMock()
409
+ mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
410
+ ingester._parsers["python"] = mock_parser
411
+
412
+ with tempfile.TemporaryDirectory() as tmpdir:
413
+ (Path(tmpdir) / "app.py").write_text("def foo(): pass")
414
+ ingester._file_unchanged = MagicMock()
415
+ ingester.ingest(tmpdir, incremental=False)
416
+ ingester._file_unchanged.assert_not_called()
417
+
418
+ def test_file_hash_is_deterministic(self):
419
+ from navegador.ingestion.parser import _file_hash
420
+ with tempfile.TemporaryDirectory() as tmpdir:
421
+ f = Path(tmpdir) / "test.py"
422
+ f.write_text("x = 1")
423
+ h1 = _file_hash(f)
424
+ h2 = _file_hash(f)
425
+ assert h1 == h2
426
+ assert len(h1) == 64 # SHA-256 hex
427
+
428
+ def test_file_hash_changes_on_content_change(self):
429
+ from navegador.ingestion.parser import _file_hash
430
+ with tempfile.TemporaryDirectory() as tmpdir:
431
+ f = Path(tmpdir) / "test.py"
432
+ f.write_text("x = 1")
433
+ h1 = _file_hash(f)
434
+ f.write_text("x = 2")
435
+ h2 = _file_hash(f)
436
+ assert h1 != h2
437
+
438
+
439
+class TestFileUnchanged:
440
+ def test_returns_false_for_new_file(self):
441
+ store = _make_store()
442
+ store.query.return_value = MagicMock(result_set=[])
443
+ ingester = RepoIngester(store)
444
+ assert ingester._file_unchanged("app.py", "abc123") is False
445
+
446
+ def test_returns_false_for_null_hash(self):
447
+ store = _make_store()
448
+ store.query.return_value = MagicMock(result_set=[[None]])
449
+ ingester = RepoIngester(store)
450
+ assert ingester._file_unchanged("app.py", "abc123") is False
451
+
452
+ def test_returns_true_when_hash_matches(self):
453
+ store = _make_store()
454
+ store.query.return_value = MagicMock(result_set=[["abc123"]])
455
+ ingester = RepoIngester(store)
456
+ assert ingester._file_unchanged("app.py", "abc123") is True
457
+
458
+ def test_returns_false_when_hash_differs(self):
459
+ store = _make_store()
460
+ store.query.return_value = MagicMock(result_set=[["old_hash"]])
461
+ ingester = RepoIngester(store)
462
+ assert ingester._file_unchanged("app.py", "new_hash") is False
463
+
464
+
465
+class TestWatch:
466
+ def test_watch_raises_on_missing_dir(self):
467
+ store = _make_store()
468
+ ingester = RepoIngester(store)
469
+ with pytest.raises(FileNotFoundError):
470
+ ingester.watch("/nonexistent/repo")
471
+
472
+ def test_watch_calls_callback_and_stops_on_false(self):
473
+ store = _make_store()
474
+ ingester = RepoIngester(store)
475
+ call_count = [0]
476
+
477
+ def callback(stats):
478
+ call_count[0] += 1
479
+ return False # stop immediately
480
+
481
+ with tempfile.TemporaryDirectory() as tmpdir:
482
+ ingester.watch(tmpdir, interval=0.01, callback=callback)
483
+ assert call_count[0] == 1
484
+
485
+ def test_watch_runs_multiple_cycles(self):
486
+ store = _make_store()
487
+ ingester = RepoIngester(store)
488
+ call_count = [0]
489
+
490
+ def callback(stats):
491
+ call_count[0] += 1
492
+ return call_count[0] < 3 # run 3 times then stop
493
+
494
+ with tempfile.TemporaryDirectory() as tmpdir:
495
+ ingester.watch(tmpdir, interval=0.01, callback=callback)
496
+ assert call_count[0] == 3
497
+
350498
351499
class TestLanguageParserBase:
352500
def test_parse_file_raises_not_implemented(self):
353501
from pathlib import Path
354502
355503
--- tests/test_ingestion_code.py
+++ tests/test_ingestion_code.py
@@ -345,10 +345,158 @@
345 stats = ingester.ingest(tmpdir)
346 assert stats["files"] == 0
347
348
349 # ── LanguageParser base class ─────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
351 class TestLanguageParserBase:
352 def test_parse_file_raises_not_implemented(self):
353 from pathlib import Path
354
355
--- tests/test_ingestion_code.py
+++ tests/test_ingestion_code.py
@@ -345,10 +345,158 @@
345 stats = ingester.ingest(tmpdir)
346 assert stats["files"] == 0
347
348
349 # ── LanguageParser base class ─────────────────────────────────────────────────
350
351 # ── Incremental ingestion ─────────────────────────────────────────────────────
352
353 class TestIncrementalIngestion:
354 def test_incremental_returns_skipped_count(self):
355 store = _make_store()
356 ingester = RepoIngester(store)
357 with tempfile.TemporaryDirectory() as tmpdir:
358 stats = ingester.ingest(tmpdir, incremental=True)
359 assert "skipped" in stats
360
361 def test_incremental_skips_unchanged_file(self):
362 store = _make_store()
363 ingester = RepoIngester(store)
364 mock_parser = MagicMock()
365 mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
366 ingester._parsers["python"] = mock_parser
367
368 with tempfile.TemporaryDirectory() as tmpdir:
369 py_file = Path(tmpdir) / "app.py"
370 py_file.write_text("def foo(): pass")
371
372 # First ingest: file is new, should be parsed
373 stats1 = ingester.ingest(tmpdir, incremental=True)
374 assert stats1["files"] == 1
375 assert stats1["skipped"] == 0
376
377 # Simulate stored hash matching
378 from navegador.ingestion.parser import _file_hash
379 current_hash = _file_hash(py_file)
380 rel_path = "app.py"
381
382 # Mock _file_unchanged to return True
383 ingester._file_unchanged = MagicMock(return_value=True)
384 stats2 = ingester.ingest(tmpdir, incremental=True)
385 assert stats2["files"] == 0
386 assert stats2["skipped"] == 1
387
388 def test_incremental_reparses_changed_file(self):
389 store = _make_store()
390 ingester = RepoIngester(store)
391 mock_parser = MagicMock()
392 mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
393 ingester._parsers["python"] = mock_parser
394
395 with tempfile.TemporaryDirectory() as tmpdir:
396 py_file = Path(tmpdir) / "app.py"
397 py_file.write_text("def foo(): pass")
398
399 ingester._file_unchanged = MagicMock(return_value=False)
400 ingester._clear_file_subgraph = MagicMock()
401 stats = ingester.ingest(tmpdir, incremental=True)
402 assert stats["files"] == 1
403 ingester._clear_file_subgraph.assert_called_once()
404
405 def test_non_incremental_does_not_check_hash(self):
406 store = _make_store()
407 ingester = RepoIngester(store)
408 mock_parser = MagicMock()
409 mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
410 ingester._parsers["python"] = mock_parser
411
412 with tempfile.TemporaryDirectory() as tmpdir:
413 (Path(tmpdir) / "app.py").write_text("def foo(): pass")
414 ingester._file_unchanged = MagicMock()
415 ingester.ingest(tmpdir, incremental=False)
416 ingester._file_unchanged.assert_not_called()
417
418 def test_file_hash_is_deterministic(self):
419 from navegador.ingestion.parser import _file_hash
420 with tempfile.TemporaryDirectory() as tmpdir:
421 f = Path(tmpdir) / "test.py"
422 f.write_text("x = 1")
423 h1 = _file_hash(f)
424 h2 = _file_hash(f)
425 assert h1 == h2
426 assert len(h1) == 64 # SHA-256 hex
427
428 def test_file_hash_changes_on_content_change(self):
429 from navegador.ingestion.parser import _file_hash
430 with tempfile.TemporaryDirectory() as tmpdir:
431 f = Path(tmpdir) / "test.py"
432 f.write_text("x = 1")
433 h1 = _file_hash(f)
434 f.write_text("x = 2")
435 h2 = _file_hash(f)
436 assert h1 != h2
437
438
439 class TestFileUnchanged:
440 def test_returns_false_for_new_file(self):
441 store = _make_store()
442 store.query.return_value = MagicMock(result_set=[])
443 ingester = RepoIngester(store)
444 assert ingester._file_unchanged("app.py", "abc123") is False
445
446 def test_returns_false_for_null_hash(self):
447 store = _make_store()
448 store.query.return_value = MagicMock(result_set=[[None]])
449 ingester = RepoIngester(store)
450 assert ingester._file_unchanged("app.py", "abc123") is False
451
452 def test_returns_true_when_hash_matches(self):
453 store = _make_store()
454 store.query.return_value = MagicMock(result_set=[["abc123"]])
455 ingester = RepoIngester(store)
456 assert ingester._file_unchanged("app.py", "abc123") is True
457
458 def test_returns_false_when_hash_differs(self):
459 store = _make_store()
460 store.query.return_value = MagicMock(result_set=[["old_hash"]])
461 ingester = RepoIngester(store)
462 assert ingester._file_unchanged("app.py", "new_hash") is False
463
464
465 class TestWatch:
466 def test_watch_raises_on_missing_dir(self):
467 store = _make_store()
468 ingester = RepoIngester(store)
469 with pytest.raises(FileNotFoundError):
470 ingester.watch("/nonexistent/repo")
471
472 def test_watch_calls_callback_and_stops_on_false(self):
473 store = _make_store()
474 ingester = RepoIngester(store)
475 call_count = [0]
476
477 def callback(stats):
478 call_count[0] += 1
479 return False # stop immediately
480
481 with tempfile.TemporaryDirectory() as tmpdir:
482 ingester.watch(tmpdir, interval=0.01, callback=callback)
483 assert call_count[0] == 1
484
485 def test_watch_runs_multiple_cycles(self):
486 store = _make_store()
487 ingester = RepoIngester(store)
488 call_count = [0]
489
490 def callback(stats):
491 call_count[0] += 1
492 return call_count[0] < 3 # run 3 times then stop
493
494 with tempfile.TemporaryDirectory() as tmpdir:
495 ingester.watch(tmpdir, interval=0.01, callback=callback)
496 assert call_count[0] == 3
497
498
499 class TestLanguageParserBase:
500 def test_parse_file_raises_not_implemented(self):
501 from pathlib import Path
502
503

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button