Navegador
feat: incremental ingestion with content hashing and watch mode Files are tracked by SHA-256 content hash on File nodes. With --incremental, unchanged files are skipped and changed files have their subgraph cleared before re-parsing. --watch enables polling-based continuous re-ingestion. Closes #21
Commit
df3d6fa8b5d1133a8671472c424cd4dac096f1802d4fe00ae597017db4cfe302
Parent
ece88cab8dc2f26…
6 files changed
+25
-3
+12
+1
-1
+86
-4
+38
+148
+25
-3
| --- navegador/cli/commands.py | ||
| +++ navegador/cli/commands.py | ||
| @@ -104,24 +104,46 @@ | ||
| 104 | 104 | |
| 105 | 105 | @main.command() |
| 106 | 106 | @click.argument("repo_path", type=click.Path(exists=True)) |
| 107 | 107 | @DB_OPTION |
| 108 | 108 | @click.option("--clear", is_flag=True, help="Clear existing graph before ingesting.") |
| 109 | +@click.option("--incremental", is_flag=True, help="Only re-parse changed files.") | |
| 110 | +@click.option("--watch", is_flag=True, help="Watch for changes and re-ingest incrementally.") | |
| 111 | +@click.option("--interval", default=2.0, show_default=True, help="Watch poll interval (seconds).") | |
| 109 | 112 | @click.option("--json", "as_json", is_flag=True, help="Output stats as JSON.") |
| 110 | -def ingest(repo_path: str, db: str, clear: bool, as_json: bool): | |
| 113 | +def ingest(repo_path: str, db: str, clear: bool, incremental: bool, watch: bool, | |
| 114 | + interval: float, as_json: bool): | |
| 111 | 115 | """Ingest a repository's code into the graph (AST + call graph).""" |
| 112 | 116 | from navegador.ingestion import RepoIngester |
| 113 | 117 | |
| 114 | 118 | store = _get_store(db) |
| 115 | 119 | ingester = RepoIngester(store) |
| 116 | 120 | |
| 121 | + if watch: | |
| 122 | + console.print(f"[bold]Watching[/bold] {repo_path} (interval={interval}s, Ctrl-C to stop)") | |
| 123 | + | |
| 124 | + def _on_cycle(stats): | |
| 125 | + changed = stats["files"] | |
| 126 | + skipped = stats["skipped"] | |
| 127 | + if changed: | |
| 128 | + console.print( | |
| 129 | + f" [green]{changed} changed[/green], {skipped} unchanged" | |
| 130 | + ) | |
| 131 | + return True # keep watching | |
| 132 | + | |
| 133 | + try: | |
| 134 | + ingester.watch(repo_path, interval=interval, callback=_on_cycle) | |
| 135 | + except KeyboardInterrupt: | |
| 136 | + console.print("\n[yellow]Watch stopped.[/yellow]") | |
| 137 | + return | |
| 138 | + | |
| 117 | 139 | if as_json: |
| 118 | - stats = ingester.ingest(repo_path, clear=clear) | |
| 140 | + stats = ingester.ingest(repo_path, clear=clear, incremental=incremental) | |
| 119 | 141 | click.echo(json.dumps(stats, indent=2)) |
| 120 | 142 | else: |
| 121 | 143 | with console.status(f"[bold]Ingesting[/bold] {repo_path}..."): |
| 122 | - stats = ingester.ingest(repo_path, clear=clear) | |
| 144 | + stats = ingester.ingest(repo_path, clear=clear, incremental=incremental) | |
| 123 | 145 | table = Table(title="Ingestion complete") |
| 124 | 146 | table.add_column("Metric", style="cyan") |
| 125 | 147 | table.add_column("Count", justify="right", style="green") |
| 126 | 148 | for k, v in stats.items(): |
| 127 | 149 | table.add_row(k.capitalize(), str(v)) |
| 128 | 150 |
| --- navegador/cli/commands.py | |
| +++ navegador/cli/commands.py | |
| @@ -104,24 +104,46 @@ | |
| 104 | |
| 105 | @main.command() |
| 106 | @click.argument("repo_path", type=click.Path(exists=True)) |
| 107 | @DB_OPTION |
| 108 | @click.option("--clear", is_flag=True, help="Clear existing graph before ingesting.") |
| 109 | @click.option("--json", "as_json", is_flag=True, help="Output stats as JSON.") |
| 110 | def ingest(repo_path: str, db: str, clear: bool, as_json: bool): |
| 111 | """Ingest a repository's code into the graph (AST + call graph).""" |
| 112 | from navegador.ingestion import RepoIngester |
| 113 | |
| 114 | store = _get_store(db) |
| 115 | ingester = RepoIngester(store) |
| 116 | |
| 117 | if as_json: |
| 118 | stats = ingester.ingest(repo_path, clear=clear) |
| 119 | click.echo(json.dumps(stats, indent=2)) |
| 120 | else: |
| 121 | with console.status(f"[bold]Ingesting[/bold] {repo_path}..."): |
| 122 | stats = ingester.ingest(repo_path, clear=clear) |
| 123 | table = Table(title="Ingestion complete") |
| 124 | table.add_column("Metric", style="cyan") |
| 125 | table.add_column("Count", justify="right", style="green") |
| 126 | for k, v in stats.items(): |
| 127 | table.add_row(k.capitalize(), str(v)) |
| 128 |
| --- navegador/cli/commands.py | |
| +++ navegador/cli/commands.py | |
| @@ -104,24 +104,46 @@ | |
| 104 | |
| 105 | @main.command() |
| 106 | @click.argument("repo_path", type=click.Path(exists=True)) |
| 107 | @DB_OPTION |
| 108 | @click.option("--clear", is_flag=True, help="Clear existing graph before ingesting.") |
| 109 | @click.option("--incremental", is_flag=True, help="Only re-parse changed files.") |
| 110 | @click.option("--watch", is_flag=True, help="Watch for changes and re-ingest incrementally.") |
| 111 | @click.option("--interval", default=2.0, show_default=True, help="Watch poll interval (seconds).") |
| 112 | @click.option("--json", "as_json", is_flag=True, help="Output stats as JSON.") |
| 113 | def ingest(repo_path: str, db: str, clear: bool, incremental: bool, watch: bool, |
| 114 | interval: float, as_json: bool): |
| 115 | """Ingest a repository's code into the graph (AST + call graph).""" |
| 116 | from navegador.ingestion import RepoIngester |
| 117 | |
| 118 | store = _get_store(db) |
| 119 | ingester = RepoIngester(store) |
| 120 | |
| 121 | if watch: |
| 122 | console.print(f"[bold]Watching[/bold] {repo_path} (interval={interval}s, Ctrl-C to stop)") |
| 123 | |
| 124 | def _on_cycle(stats): |
| 125 | changed = stats["files"] |
| 126 | skipped = stats["skipped"] |
| 127 | if changed: |
| 128 | console.print( |
| 129 | f" [green]{changed} changed[/green], {skipped} unchanged" |
| 130 | ) |
| 131 | return True # keep watching |
| 132 | |
| 133 | try: |
| 134 | ingester.watch(repo_path, interval=interval, callback=_on_cycle) |
| 135 | except KeyboardInterrupt: |
| 136 | console.print("\n[yellow]Watch stopped.[/yellow]") |
| 137 | return |
| 138 | |
| 139 | if as_json: |
| 140 | stats = ingester.ingest(repo_path, clear=clear, incremental=incremental) |
| 141 | click.echo(json.dumps(stats, indent=2)) |
| 142 | else: |
| 143 | with console.status(f"[bold]Ingesting[/bold] {repo_path}..."): |
| 144 | stats = ingester.ingest(repo_path, clear=clear, incremental=incremental) |
| 145 | table = Table(title="Ingestion complete") |
| 146 | table.add_column("Metric", style="cyan") |
| 147 | table.add_column("Count", justify="right", style="green") |
| 148 | for k, v in stats.items(): |
| 149 | table.add_row(k.capitalize(), str(v)) |
| 150 |
| --- navegador/graph/queries.py | ||
| +++ navegador/graph/queries.py | ||
| @@ -209,10 +209,22 @@ | ||
| 209 | 209 | MATCH (n)-[:ASSIGNED_TO]->(p:Person) |
| 210 | 210 | WHERE n.name = $name AND ($file_path = '' OR n.file_path = $file_path) |
| 211 | 211 | RETURN labels(n)[0] AS node_type, n.name AS node_name, |
| 212 | 212 | p.name AS owner, p.email AS email, p.role AS role, p.team AS team |
| 213 | 213 | """ |
| 214 | + | |
| 215 | +# ── Incremental ingestion ───────────────────────────────────────────────────── | |
| 216 | + | |
| 217 | +FILE_HASH = """ | |
| 218 | +MATCH (f:File {path: $path}) | |
| 219 | +RETURN f.content_hash AS hash | |
| 220 | +""" | |
| 221 | + | |
| 222 | +DELETE_FILE_SUBGRAPH = """ | |
| 223 | +MATCH (f:File {path: $path})-[:CONTAINS]->(child) | |
| 224 | +DETACH DELETE child | |
| 225 | +""" | |
| 214 | 226 | |
| 215 | 227 | # ── Stats ───────────────────────────────────────────────────────────────────── |
| 216 | 228 | |
| 217 | 229 | NODE_TYPE_COUNTS = """ |
| 218 | 230 | MATCH (n) |
| 219 | 231 |
| --- navegador/graph/queries.py | |
| +++ navegador/graph/queries.py | |
| @@ -209,10 +209,22 @@ | |
| 209 | MATCH (n)-[:ASSIGNED_TO]->(p:Person) |
| 210 | WHERE n.name = $name AND ($file_path = '' OR n.file_path = $file_path) |
| 211 | RETURN labels(n)[0] AS node_type, n.name AS node_name, |
| 212 | p.name AS owner, p.email AS email, p.role AS role, p.team AS team |
| 213 | """ |
| 214 | |
| 215 | # ── Stats ───────────────────────────────────────────────────────────────────── |
| 216 | |
| 217 | NODE_TYPE_COUNTS = """ |
| 218 | MATCH (n) |
| 219 |
| --- navegador/graph/queries.py | |
| +++ navegador/graph/queries.py | |
| @@ -209,10 +209,22 @@ | |
| 209 | MATCH (n)-[:ASSIGNED_TO]->(p:Person) |
| 210 | WHERE n.name = $name AND ($file_path = '' OR n.file_path = $file_path) |
| 211 | RETURN labels(n)[0] AS node_type, n.name AS node_name, |
| 212 | p.name AS owner, p.email AS email, p.role AS role, p.team AS team |
| 213 | """ |
| 214 | |
| 215 | # ── Incremental ingestion ───────────────────────────────────────────────────── |
| 216 | |
| 217 | FILE_HASH = """ |
| 218 | MATCH (f:File {path: $path}) |
| 219 | RETURN f.content_hash AS hash |
| 220 | """ |
| 221 | |
| 222 | DELETE_FILE_SUBGRAPH = """ |
| 223 | MATCH (f:File {path: $path})-[:CONTAINS]->(child) |
| 224 | DETACH DELETE child |
| 225 | """ |
| 226 | |
| 227 | # ── Stats ───────────────────────────────────────────────────────────────────── |
| 228 | |
| 229 | NODE_TYPE_COUNTS = """ |
| 230 | MATCH (n) |
| 231 |
+1
-1
| --- navegador/graph/schema.py | ||
| +++ navegador/graph/schema.py | ||
| @@ -60,11 +60,11 @@ | ||
| 60 | 60 | # ── Property keys per node label ────────────────────────────────────────────── |
| 61 | 61 | |
| 62 | 62 | NODE_PROPS = { |
| 63 | 63 | # Code layer |
| 64 | 64 | NodeLabel.Repository: ["name", "path", "language", "description"], |
| 65 | - NodeLabel.File: ["name", "path", "language", "size", "line_count"], | |
| 65 | + NodeLabel.File: ["name", "path", "language", "size", "line_count", "content_hash"], | |
| 66 | 66 | NodeLabel.Module: ["name", "file_path", "docstring"], |
| 67 | 67 | NodeLabel.Class: ["name", "file_path", "line_start", "line_end", "docstring", "source"], |
| 68 | 68 | NodeLabel.Function: [ |
| 69 | 69 | "name", |
| 70 | 70 | "file_path", |
| 71 | 71 |
| --- navegador/graph/schema.py | |
| +++ navegador/graph/schema.py | |
| @@ -60,11 +60,11 @@ | |
| 60 | # ── Property keys per node label ────────────────────────────────────────────── |
| 61 | |
| 62 | NODE_PROPS = { |
| 63 | # Code layer |
| 64 | NodeLabel.Repository: ["name", "path", "language", "description"], |
| 65 | NodeLabel.File: ["name", "path", "language", "size", "line_count"], |
| 66 | NodeLabel.Module: ["name", "file_path", "docstring"], |
| 67 | NodeLabel.Class: ["name", "file_path", "line_start", "line_end", "docstring", "source"], |
| 68 | NodeLabel.Function: [ |
| 69 | "name", |
| 70 | "file_path", |
| 71 |
| --- navegador/graph/schema.py | |
| +++ navegador/graph/schema.py | |
| @@ -60,11 +60,11 @@ | |
| 60 | # ── Property keys per node label ────────────────────────────────────────────── |
| 61 | |
| 62 | NODE_PROPS = { |
| 63 | # Code layer |
| 64 | NodeLabel.Repository: ["name", "path", "language", "description"], |
| 65 | NodeLabel.File: ["name", "path", "language", "size", "line_count", "content_hash"], |
| 66 | NodeLabel.Module: ["name", "file_path", "docstring"], |
| 67 | NodeLabel.Class: ["name", "file_path", "line_start", "line_end", "docstring", "source"], |
| 68 | NodeLabel.Function: [ |
| 69 | "name", |
| 70 | "file_path", |
| 71 |
+86
-4
| --- navegador/ingestion/parser.py | ||
| +++ navegador/ingestion/parser.py | ||
| @@ -9,13 +9,16 @@ | ||
| 9 | 9 | Go .go |
| 10 | 10 | Rust .rs |
| 11 | 11 | Java .java |
| 12 | 12 | """ |
| 13 | 13 | |
| 14 | +import hashlib | |
| 14 | 15 | import logging |
| 16 | +import time | |
| 15 | 17 | from pathlib import Path |
| 16 | 18 | |
| 19 | +from navegador.graph import queries | |
| 17 | 20 | from navegador.graph.schema import NodeLabel |
| 18 | 21 | from navegador.graph.store import GraphStore |
| 19 | 22 | |
| 20 | 23 | logger = logging.getLogger(__name__) |
| 21 | 24 | |
| @@ -44,20 +47,26 @@ | ||
| 44 | 47 | |
| 45 | 48 | def __init__(self, store: GraphStore) -> None: |
| 46 | 49 | self.store = store |
| 47 | 50 | self._parsers: dict[str, "LanguageParser"] = {} |
| 48 | 51 | |
| 49 | - def ingest(self, repo_path: str | Path, clear: bool = False) -> dict[str, int]: | |
| 52 | + def ingest( | |
| 53 | + self, | |
| 54 | + repo_path: str | Path, | |
| 55 | + clear: bool = False, | |
| 56 | + incremental: bool = False, | |
| 57 | + ) -> dict[str, int]: | |
| 50 | 58 | """ |
| 51 | 59 | Ingest a repository into the graph. |
| 52 | 60 | |
| 53 | 61 | Args: |
| 54 | 62 | repo_path: Path to the repository root. |
| 55 | 63 | clear: If True, wipe the graph before ingesting. |
| 64 | + incremental: If True, skip files whose content hash hasn't changed. | |
| 56 | 65 | |
| 57 | 66 | Returns: |
| 58 | - Dict with counts: files, functions, classes, edges. | |
| 67 | + Dict with counts: files, functions, classes, edges, skipped. | |
| 59 | 68 | """ |
| 60 | 69 | repo_path = Path(repo_path).resolve() |
| 61 | 70 | if not repo_path.exists(): |
| 62 | 71 | raise FileNotFoundError(f"Repository not found: {repo_path}") |
| 63 | 72 | |
| @@ -71,34 +80,100 @@ | ||
| 71 | 80 | "name": repo_path.name, |
| 72 | 81 | "path": str(repo_path), |
| 73 | 82 | }, |
| 74 | 83 | ) |
| 75 | 84 | |
| 76 | - stats: dict[str, int] = {"files": 0, "functions": 0, "classes": 0, "edges": 0} | |
| 85 | + stats: dict[str, int] = { | |
| 86 | + "files": 0, | |
| 87 | + "functions": 0, | |
| 88 | + "classes": 0, | |
| 89 | + "edges": 0, | |
| 90 | + "skipped": 0, | |
| 91 | + } | |
| 77 | 92 | |
| 78 | 93 | for source_file in self._iter_source_files(repo_path): |
| 79 | 94 | language = LANGUAGE_MAP.get(source_file.suffix) |
| 80 | 95 | if not language: |
| 81 | 96 | continue |
| 97 | + | |
| 98 | + rel_path = str(source_file.relative_to(repo_path)) | |
| 99 | + content_hash = _file_hash(source_file) | |
| 100 | + | |
| 101 | + if incremental and self._file_unchanged(rel_path, content_hash): | |
| 102 | + stats["skipped"] += 1 | |
| 103 | + continue | |
| 104 | + | |
| 105 | + if incremental: | |
| 106 | + self._clear_file_subgraph(rel_path) | |
| 107 | + | |
| 82 | 108 | try: |
| 83 | 109 | parser = self._get_parser(language) |
| 84 | 110 | file_stats = parser.parse_file(source_file, repo_path, self.store) |
| 85 | 111 | stats["files"] += 1 |
| 86 | 112 | stats["functions"] += file_stats.get("functions", 0) |
| 87 | 113 | stats["classes"] += file_stats.get("classes", 0) |
| 88 | 114 | stats["edges"] += file_stats.get("edges", 0) |
| 115 | + | |
| 116 | + self._store_file_hash(rel_path, content_hash) | |
| 89 | 117 | except Exception: |
| 90 | 118 | logger.exception("Failed to parse %s", source_file) |
| 91 | 119 | |
| 92 | 120 | logger.info( |
| 93 | - "Ingested %s: %d files, %d functions, %d classes", | |
| 121 | + "Ingested %s: %d files, %d functions, %d classes, %d skipped", | |
| 94 | 122 | repo_path.name, |
| 95 | 123 | stats["files"], |
| 96 | 124 | stats["functions"], |
| 97 | 125 | stats["classes"], |
| 126 | + stats["skipped"], | |
| 98 | 127 | ) |
| 99 | 128 | return stats |
| 129 | + | |
| 130 | + def watch( | |
| 131 | + self, | |
| 132 | + repo_path: str | Path, | |
| 133 | + interval: float = 2.0, | |
| 134 | + callback=None, | |
| 135 | + ) -> None: | |
| 136 | + """ | |
| 137 | + Watch a repo for changes and re-ingest incrementally. | |
| 138 | + | |
| 139 | + Args: | |
| 140 | + repo_path: Path to the repository root. | |
| 141 | + interval: Seconds between polls. | |
| 142 | + callback: Optional callable receiving stats dict after each cycle. | |
| 143 | + If callback returns False, the watch loop stops. | |
| 144 | + """ | |
| 145 | + repo_path = Path(repo_path).resolve() | |
| 146 | + if not repo_path.exists(): | |
| 147 | + raise FileNotFoundError(f"Repository not found: {repo_path}") | |
| 148 | + | |
| 149 | + # Initial full ingest | |
| 150 | + stats = self.ingest(repo_path, incremental=True) | |
| 151 | + if callback and callback(stats) is False: | |
| 152 | + return | |
| 153 | + | |
| 154 | + while True: | |
| 155 | + time.sleep(interval) | |
| 156 | + stats = self.ingest(repo_path, incremental=True) | |
| 157 | + if callback and callback(stats) is False: | |
| 158 | + return | |
| 159 | + | |
| 160 | + def _file_unchanged(self, rel_path: str, content_hash: str) -> bool: | |
| 161 | + result = self.store.query(queries.FILE_HASH, {"path": rel_path}) | |
| 162 | + rows = result.result_set or [] | |
| 163 | + if not rows or rows[0][0] is None: | |
| 164 | + return False | |
| 165 | + return rows[0][0] == content_hash | |
| 166 | + | |
| 167 | + def _clear_file_subgraph(self, rel_path: str) -> None: | |
| 168 | + self.store.query(queries.DELETE_FILE_SUBGRAPH, {"path": rel_path}) | |
| 169 | + | |
| 170 | + def _store_file_hash(self, rel_path: str, content_hash: str) -> None: | |
| 171 | + self.store.query( | |
| 172 | + "MATCH (f:File {path: $path}) SET f.content_hash = $hash", | |
| 173 | + {"path": rel_path, "hash": content_hash}, | |
| 174 | + ) | |
| 100 | 175 | |
| 101 | 176 | def _iter_source_files(self, repo_path: Path): |
| 102 | 177 | skip_dirs = { |
| 103 | 178 | ".git", |
| 104 | 179 | ".venv", |
| @@ -141,11 +216,18 @@ | ||
| 141 | 216 | self._parsers[language] = JavaParser() |
| 142 | 217 | else: |
| 143 | 218 | raise ValueError(f"Unsupported language: {language}") |
| 144 | 219 | return self._parsers[language] |
| 145 | 220 | |
| 221 | + | |
| 222 | +def _file_hash(path: Path) -> str: | |
| 223 | + """SHA-256 content hash for a file.""" | |
| 224 | + h = hashlib.sha256() | |
| 225 | + h.update(path.read_bytes()) | |
| 226 | + return h.hexdigest() | |
| 227 | + | |
| 146 | 228 | |
| 147 | 229 | class LanguageParser: |
| 148 | 230 | """Base class for language-specific AST parsers.""" |
| 149 | 231 | |
| 150 | 232 | def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: |
| 151 | 233 | raise NotImplementedError |
| 152 | 234 |
| --- navegador/ingestion/parser.py | |
| +++ navegador/ingestion/parser.py | |
| @@ -9,13 +9,16 @@ | |
| 9 | Go .go |
| 10 | Rust .rs |
| 11 | Java .java |
| 12 | """ |
| 13 | |
| 14 | import logging |
| 15 | from pathlib import Path |
| 16 | |
| 17 | from navegador.graph.schema import NodeLabel |
| 18 | from navegador.graph.store import GraphStore |
| 19 | |
| 20 | logger = logging.getLogger(__name__) |
| 21 | |
| @@ -44,20 +47,26 @@ | |
| 44 | |
| 45 | def __init__(self, store: GraphStore) -> None: |
| 46 | self.store = store |
| 47 | self._parsers: dict[str, "LanguageParser"] = {} |
| 48 | |
| 49 | def ingest(self, repo_path: str | Path, clear: bool = False) -> dict[str, int]: |
| 50 | """ |
| 51 | Ingest a repository into the graph. |
| 52 | |
| 53 | Args: |
| 54 | repo_path: Path to the repository root. |
| 55 | clear: If True, wipe the graph before ingesting. |
| 56 | |
| 57 | Returns: |
| 58 | Dict with counts: files, functions, classes, edges. |
| 59 | """ |
| 60 | repo_path = Path(repo_path).resolve() |
| 61 | if not repo_path.exists(): |
| 62 | raise FileNotFoundError(f"Repository not found: {repo_path}") |
| 63 | |
| @@ -71,34 +80,100 @@ | |
| 71 | "name": repo_path.name, |
| 72 | "path": str(repo_path), |
| 73 | }, |
| 74 | ) |
| 75 | |
| 76 | stats: dict[str, int] = {"files": 0, "functions": 0, "classes": 0, "edges": 0} |
| 77 | |
| 78 | for source_file in self._iter_source_files(repo_path): |
| 79 | language = LANGUAGE_MAP.get(source_file.suffix) |
| 80 | if not language: |
| 81 | continue |
| 82 | try: |
| 83 | parser = self._get_parser(language) |
| 84 | file_stats = parser.parse_file(source_file, repo_path, self.store) |
| 85 | stats["files"] += 1 |
| 86 | stats["functions"] += file_stats.get("functions", 0) |
| 87 | stats["classes"] += file_stats.get("classes", 0) |
| 88 | stats["edges"] += file_stats.get("edges", 0) |
| 89 | except Exception: |
| 90 | logger.exception("Failed to parse %s", source_file) |
| 91 | |
| 92 | logger.info( |
| 93 | "Ingested %s: %d files, %d functions, %d classes", |
| 94 | repo_path.name, |
| 95 | stats["files"], |
| 96 | stats["functions"], |
| 97 | stats["classes"], |
| 98 | ) |
| 99 | return stats |
| 100 | |
| 101 | def _iter_source_files(self, repo_path: Path): |
| 102 | skip_dirs = { |
| 103 | ".git", |
| 104 | ".venv", |
| @@ -141,11 +216,18 @@ | |
| 141 | self._parsers[language] = JavaParser() |
| 142 | else: |
| 143 | raise ValueError(f"Unsupported language: {language}") |
| 144 | return self._parsers[language] |
| 145 | |
| 146 | |
| 147 | class LanguageParser: |
| 148 | """Base class for language-specific AST parsers.""" |
| 149 | |
| 150 | def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: |
| 151 | raise NotImplementedError |
| 152 |
| --- navegador/ingestion/parser.py | |
| +++ navegador/ingestion/parser.py | |
| @@ -9,13 +9,16 @@ | |
| 9 | Go .go |
| 10 | Rust .rs |
| 11 | Java .java |
| 12 | """ |
| 13 | |
| 14 | import hashlib |
| 15 | import logging |
| 16 | import time |
| 17 | from pathlib import Path |
| 18 | |
| 19 | from navegador.graph import queries |
| 20 | from navegador.graph.schema import NodeLabel |
| 21 | from navegador.graph.store import GraphStore |
| 22 | |
| 23 | logger = logging.getLogger(__name__) |
| 24 | |
| @@ -44,20 +47,26 @@ | |
| 47 | |
| 48 | def __init__(self, store: GraphStore) -> None: |
| 49 | self.store = store |
| 50 | self._parsers: dict[str, "LanguageParser"] = {} |
| 51 | |
| 52 | def ingest( |
| 53 | self, |
| 54 | repo_path: str | Path, |
| 55 | clear: bool = False, |
| 56 | incremental: bool = False, |
| 57 | ) -> dict[str, int]: |
| 58 | """ |
| 59 | Ingest a repository into the graph. |
| 60 | |
| 61 | Args: |
| 62 | repo_path: Path to the repository root. |
| 63 | clear: If True, wipe the graph before ingesting. |
| 64 | incremental: If True, skip files whose content hash hasn't changed. |
| 65 | |
| 66 | Returns: |
| 67 | Dict with counts: files, functions, classes, edges, skipped. |
| 68 | """ |
| 69 | repo_path = Path(repo_path).resolve() |
| 70 | if not repo_path.exists(): |
| 71 | raise FileNotFoundError(f"Repository not found: {repo_path}") |
| 72 | |
| @@ -71,34 +80,100 @@ | |
| 80 | "name": repo_path.name, |
| 81 | "path": str(repo_path), |
| 82 | }, |
| 83 | ) |
| 84 | |
| 85 | stats: dict[str, int] = { |
| 86 | "files": 0, |
| 87 | "functions": 0, |
| 88 | "classes": 0, |
| 89 | "edges": 0, |
| 90 | "skipped": 0, |
| 91 | } |
| 92 | |
| 93 | for source_file in self._iter_source_files(repo_path): |
| 94 | language = LANGUAGE_MAP.get(source_file.suffix) |
| 95 | if not language: |
| 96 | continue |
| 97 | |
| 98 | rel_path = str(source_file.relative_to(repo_path)) |
| 99 | content_hash = _file_hash(source_file) |
| 100 | |
| 101 | if incremental and self._file_unchanged(rel_path, content_hash): |
| 102 | stats["skipped"] += 1 |
| 103 | continue |
| 104 | |
| 105 | if incremental: |
| 106 | self._clear_file_subgraph(rel_path) |
| 107 | |
| 108 | try: |
| 109 | parser = self._get_parser(language) |
| 110 | file_stats = parser.parse_file(source_file, repo_path, self.store) |
| 111 | stats["files"] += 1 |
| 112 | stats["functions"] += file_stats.get("functions", 0) |
| 113 | stats["classes"] += file_stats.get("classes", 0) |
| 114 | stats["edges"] += file_stats.get("edges", 0) |
| 115 | |
| 116 | self._store_file_hash(rel_path, content_hash) |
| 117 | except Exception: |
| 118 | logger.exception("Failed to parse %s", source_file) |
| 119 | |
| 120 | logger.info( |
| 121 | "Ingested %s: %d files, %d functions, %d classes, %d skipped", |
| 122 | repo_path.name, |
| 123 | stats["files"], |
| 124 | stats["functions"], |
| 125 | stats["classes"], |
| 126 | stats["skipped"], |
| 127 | ) |
| 128 | return stats |
| 129 | |
| 130 | def watch( |
| 131 | self, |
| 132 | repo_path: str | Path, |
| 133 | interval: float = 2.0, |
| 134 | callback=None, |
| 135 | ) -> None: |
| 136 | """ |
| 137 | Watch a repo for changes and re-ingest incrementally. |
| 138 | |
| 139 | Args: |
| 140 | repo_path: Path to the repository root. |
| 141 | interval: Seconds between polls. |
| 142 | callback: Optional callable receiving stats dict after each cycle. |
| 143 | If callback returns False, the watch loop stops. |
| 144 | """ |
| 145 | repo_path = Path(repo_path).resolve() |
| 146 | if not repo_path.exists(): |
| 147 | raise FileNotFoundError(f"Repository not found: {repo_path}") |
| 148 | |
| 149 | # Initial full ingest |
| 150 | stats = self.ingest(repo_path, incremental=True) |
| 151 | if callback and callback(stats) is False: |
| 152 | return |
| 153 | |
| 154 | while True: |
| 155 | time.sleep(interval) |
| 156 | stats = self.ingest(repo_path, incremental=True) |
| 157 | if callback and callback(stats) is False: |
| 158 | return |
| 159 | |
| 160 | def _file_unchanged(self, rel_path: str, content_hash: str) -> bool: |
| 161 | result = self.store.query(queries.FILE_HASH, {"path": rel_path}) |
| 162 | rows = result.result_set or [] |
| 163 | if not rows or rows[0][0] is None: |
| 164 | return False |
| 165 | return rows[0][0] == content_hash |
| 166 | |
| 167 | def _clear_file_subgraph(self, rel_path: str) -> None: |
| 168 | self.store.query(queries.DELETE_FILE_SUBGRAPH, {"path": rel_path}) |
| 169 | |
| 170 | def _store_file_hash(self, rel_path: str, content_hash: str) -> None: |
| 171 | self.store.query( |
| 172 | "MATCH (f:File {path: $path}) SET f.content_hash = $hash", |
| 173 | {"path": rel_path, "hash": content_hash}, |
| 174 | ) |
| 175 | |
| 176 | def _iter_source_files(self, repo_path: Path): |
| 177 | skip_dirs = { |
| 178 | ".git", |
| 179 | ".venv", |
| @@ -141,11 +216,18 @@ | |
| 216 | self._parsers[language] = JavaParser() |
| 217 | else: |
| 218 | raise ValueError(f"Unsupported language: {language}") |
| 219 | return self._parsers[language] |
| 220 | |
| 221 | |
| 222 | def _file_hash(path: Path) -> str: |
| 223 | """SHA-256 content hash for a file.""" |
| 224 | h = hashlib.sha256() |
| 225 | h.update(path.read_bytes()) |
| 226 | return h.hexdigest() |
| 227 | |
| 228 | |
| 229 | class LanguageParser: |
| 230 | """Base class for language-specific AST parsers.""" |
| 231 | |
| 232 | def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: |
| 233 | raise NotImplementedError |
| 234 |
+38
| --- tests/test_cli.py | ||
| +++ tests/test_cli.py | ||
| @@ -73,10 +73,48 @@ | ||
| 73 | 73 | MockRI.return_value.ingest.return_value = {"files": 5} |
| 74 | 74 | result = runner.invoke(main, ["ingest", "src", "--json"]) |
| 75 | 75 | assert result.exit_code == 0 |
| 76 | 76 | data = json.loads(result.output) |
| 77 | 77 | assert data["files"] == 5 |
| 78 | + | |
| 79 | + def test_incremental_flag_passes_through(self): | |
| 80 | + runner = CliRunner() | |
| 81 | + with runner.isolated_filesystem(): | |
| 82 | + Path("src").mkdir() | |
| 83 | + with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \ | |
| 84 | + patch("navegador.ingestion.RepoIngester") as MockRI: | |
| 85 | + MockRI.return_value.ingest.return_value = { | |
| 86 | + "files": 2, "functions": 5, "classes": 1, "edges": 3, "skipped": 8 | |
| 87 | + } | |
| 88 | + result = runner.invoke(main, ["ingest", "src", "--incremental"]) | |
| 89 | + assert result.exit_code == 0 | |
| 90 | + MockRI.return_value.ingest.assert_called_once() | |
| 91 | + _, kwargs = MockRI.return_value.ingest.call_args | |
| 92 | + assert kwargs["incremental"] is True | |
| 93 | + | |
| 94 | + def test_watch_flag_calls_watch(self): | |
| 95 | + runner = CliRunner() | |
| 96 | + with runner.isolated_filesystem(): | |
| 97 | + Path("src").mkdir() | |
| 98 | + with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \ | |
| 99 | + patch("navegador.ingestion.RepoIngester") as MockRI: | |
| 100 | + # watch should be called, simulate immediate stop | |
| 101 | + MockRI.return_value.watch.side_effect = KeyboardInterrupt() | |
| 102 | + result = runner.invoke(main, ["ingest", "src", "--watch", "--interval", "0.1"]) | |
| 103 | + assert result.exit_code == 0 | |
| 104 | + MockRI.return_value.watch.assert_called_once() | |
| 105 | + | |
| 106 | + def test_watch_with_interval(self): | |
| 107 | + runner = CliRunner() | |
| 108 | + with runner.isolated_filesystem(): | |
| 109 | + Path("src").mkdir() | |
| 110 | + with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \ | |
| 111 | + patch("navegador.ingestion.RepoIngester") as MockRI: | |
| 112 | + MockRI.return_value.watch.side_effect = KeyboardInterrupt() | |
| 113 | + runner.invoke(main, ["ingest", "src", "--watch", "--interval", "5.0"]) | |
| 114 | + _, kwargs = MockRI.return_value.watch.call_args | |
| 115 | + assert kwargs["interval"] == 5.0 | |
| 78 | 116 | |
| 79 | 117 | |
| 80 | 118 | # ── context ─────────────────────────────────────────────────────────────────── |
| 81 | 119 | |
| 82 | 120 | class TestContextCommand: |
| 83 | 121 |
| --- tests/test_cli.py | |
| +++ tests/test_cli.py | |
| @@ -73,10 +73,48 @@ | |
| 73 | MockRI.return_value.ingest.return_value = {"files": 5} |
| 74 | result = runner.invoke(main, ["ingest", "src", "--json"]) |
| 75 | assert result.exit_code == 0 |
| 76 | data = json.loads(result.output) |
| 77 | assert data["files"] == 5 |
| 78 | |
| 79 | |
| 80 | # ── context ─────────────────────────────────────────────────────────────────── |
| 81 | |
| 82 | class TestContextCommand: |
| 83 |
| --- tests/test_cli.py | |
| +++ tests/test_cli.py | |
| @@ -73,10 +73,48 @@ | |
| 73 | MockRI.return_value.ingest.return_value = {"files": 5} |
| 74 | result = runner.invoke(main, ["ingest", "src", "--json"]) |
| 75 | assert result.exit_code == 0 |
| 76 | data = json.loads(result.output) |
| 77 | assert data["files"] == 5 |
| 78 | |
| 79 | def test_incremental_flag_passes_through(self): |
| 80 | runner = CliRunner() |
| 81 | with runner.isolated_filesystem(): |
| 82 | Path("src").mkdir() |
| 83 | with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \ |
| 84 | patch("navegador.ingestion.RepoIngester") as MockRI: |
| 85 | MockRI.return_value.ingest.return_value = { |
| 86 | "files": 2, "functions": 5, "classes": 1, "edges": 3, "skipped": 8 |
| 87 | } |
| 88 | result = runner.invoke(main, ["ingest", "src", "--incremental"]) |
| 89 | assert result.exit_code == 0 |
| 90 | MockRI.return_value.ingest.assert_called_once() |
| 91 | _, kwargs = MockRI.return_value.ingest.call_args |
| 92 | assert kwargs["incremental"] is True |
| 93 | |
| 94 | def test_watch_flag_calls_watch(self): |
| 95 | runner = CliRunner() |
| 96 | with runner.isolated_filesystem(): |
| 97 | Path("src").mkdir() |
| 98 | with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \ |
| 99 | patch("navegador.ingestion.RepoIngester") as MockRI: |
| 100 | # watch should be called, simulate immediate stop |
| 101 | MockRI.return_value.watch.side_effect = KeyboardInterrupt() |
| 102 | result = runner.invoke(main, ["ingest", "src", "--watch", "--interval", "0.1"]) |
| 103 | assert result.exit_code == 0 |
| 104 | MockRI.return_value.watch.assert_called_once() |
| 105 | |
| 106 | def test_watch_with_interval(self): |
| 107 | runner = CliRunner() |
| 108 | with runner.isolated_filesystem(): |
| 109 | Path("src").mkdir() |
| 110 | with patch("navegador.cli.commands._get_store", return_value=_mock_store()), \ |
| 111 | patch("navegador.ingestion.RepoIngester") as MockRI: |
| 112 | MockRI.return_value.watch.side_effect = KeyboardInterrupt() |
| 113 | runner.invoke(main, ["ingest", "src", "--watch", "--interval", "5.0"]) |
| 114 | _, kwargs = MockRI.return_value.watch.call_args |
| 115 | assert kwargs["interval"] == 5.0 |
| 116 | |
| 117 | |
| 118 | # ── context ─────────────────────────────────────────────────────────────────── |
| 119 | |
| 120 | class TestContextCommand: |
| 121 |
+148
| --- tests/test_ingestion_code.py | ||
| +++ tests/test_ingestion_code.py | ||
| @@ -345,10 +345,158 @@ | ||
| 345 | 345 | stats = ingester.ingest(tmpdir) |
| 346 | 346 | assert stats["files"] == 0 |
| 347 | 347 | |
| 348 | 348 | |
| 349 | 349 | # ── LanguageParser base class ───────────────────────────────────────────────── |
| 350 | + | |
| 351 | +# ── Incremental ingestion ───────────────────────────────────────────────────── | |
| 352 | + | |
| 353 | +class TestIncrementalIngestion: | |
| 354 | + def test_incremental_returns_skipped_count(self): | |
| 355 | + store = _make_store() | |
| 356 | + ingester = RepoIngester(store) | |
| 357 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 358 | + stats = ingester.ingest(tmpdir, incremental=True) | |
| 359 | + assert "skipped" in stats | |
| 360 | + | |
| 361 | + def test_incremental_skips_unchanged_file(self): | |
| 362 | + store = _make_store() | |
| 363 | + ingester = RepoIngester(store) | |
| 364 | + mock_parser = MagicMock() | |
| 365 | + mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0} | |
| 366 | + ingester._parsers["python"] = mock_parser | |
| 367 | + | |
| 368 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 369 | + py_file = Path(tmpdir) / "app.py" | |
| 370 | + py_file.write_text("def foo(): pass") | |
| 371 | + | |
| 372 | + # First ingest: file is new, should be parsed | |
| 373 | + stats1 = ingester.ingest(tmpdir, incremental=True) | |
| 374 | + assert stats1["files"] == 1 | |
| 375 | + assert stats1["skipped"] == 0 | |
| 376 | + | |
| 377 | + # Simulate stored hash matching | |
| 378 | + from navegador.ingestion.parser import _file_hash | |
| 379 | + current_hash = _file_hash(py_file) | |
| 380 | + rel_path = "app.py" | |
| 381 | + | |
| 382 | + # Mock _file_unchanged to return True | |
| 383 | + ingester._file_unchanged = MagicMock(return_value=True) | |
| 384 | + stats2 = ingester.ingest(tmpdir, incremental=True) | |
| 385 | + assert stats2["files"] == 0 | |
| 386 | + assert stats2["skipped"] == 1 | |
| 387 | + | |
| 388 | + def test_incremental_reparses_changed_file(self): | |
| 389 | + store = _make_store() | |
| 390 | + ingester = RepoIngester(store) | |
| 391 | + mock_parser = MagicMock() | |
| 392 | + mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0} | |
| 393 | + ingester._parsers["python"] = mock_parser | |
| 394 | + | |
| 395 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 396 | + py_file = Path(tmpdir) / "app.py" | |
| 397 | + py_file.write_text("def foo(): pass") | |
| 398 | + | |
| 399 | + ingester._file_unchanged = MagicMock(return_value=False) | |
| 400 | + ingester._clear_file_subgraph = MagicMock() | |
| 401 | + stats = ingester.ingest(tmpdir, incremental=True) | |
| 402 | + assert stats["files"] == 1 | |
| 403 | + ingester._clear_file_subgraph.assert_called_once() | |
| 404 | + | |
| 405 | + def test_non_incremental_does_not_check_hash(self): | |
| 406 | + store = _make_store() | |
| 407 | + ingester = RepoIngester(store) | |
| 408 | + mock_parser = MagicMock() | |
| 409 | + mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0} | |
| 410 | + ingester._parsers["python"] = mock_parser | |
| 411 | + | |
| 412 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 413 | + (Path(tmpdir) / "app.py").write_text("def foo(): pass") | |
| 414 | + ingester._file_unchanged = MagicMock() | |
| 415 | + ingester.ingest(tmpdir, incremental=False) | |
| 416 | + ingester._file_unchanged.assert_not_called() | |
| 417 | + | |
| 418 | + def test_file_hash_is_deterministic(self): | |
| 419 | + from navegador.ingestion.parser import _file_hash | |
| 420 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 421 | + f = Path(tmpdir) / "test.py" | |
| 422 | + f.write_text("x = 1") | |
| 423 | + h1 = _file_hash(f) | |
| 424 | + h2 = _file_hash(f) | |
| 425 | + assert h1 == h2 | |
| 426 | + assert len(h1) == 64 # SHA-256 hex | |
| 427 | + | |
| 428 | + def test_file_hash_changes_on_content_change(self): | |
| 429 | + from navegador.ingestion.parser import _file_hash | |
| 430 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 431 | + f = Path(tmpdir) / "test.py" | |
| 432 | + f.write_text("x = 1") | |
| 433 | + h1 = _file_hash(f) | |
| 434 | + f.write_text("x = 2") | |
| 435 | + h2 = _file_hash(f) | |
| 436 | + assert h1 != h2 | |
| 437 | + | |
| 438 | + | |
| 439 | +class TestFileUnchanged: | |
| 440 | + def test_returns_false_for_new_file(self): | |
| 441 | + store = _make_store() | |
| 442 | + store.query.return_value = MagicMock(result_set=[]) | |
| 443 | + ingester = RepoIngester(store) | |
| 444 | + assert ingester._file_unchanged("app.py", "abc123") is False | |
| 445 | + | |
| 446 | + def test_returns_false_for_null_hash(self): | |
| 447 | + store = _make_store() | |
| 448 | + store.query.return_value = MagicMock(result_set=[[None]]) | |
| 449 | + ingester = RepoIngester(store) | |
| 450 | + assert ingester._file_unchanged("app.py", "abc123") is False | |
| 451 | + | |
| 452 | + def test_returns_true_when_hash_matches(self): | |
| 453 | + store = _make_store() | |
| 454 | + store.query.return_value = MagicMock(result_set=[["abc123"]]) | |
| 455 | + ingester = RepoIngester(store) | |
| 456 | + assert ingester._file_unchanged("app.py", "abc123") is True | |
| 457 | + | |
| 458 | + def test_returns_false_when_hash_differs(self): | |
| 459 | + store = _make_store() | |
| 460 | + store.query.return_value = MagicMock(result_set=[["old_hash"]]) | |
| 461 | + ingester = RepoIngester(store) | |
| 462 | + assert ingester._file_unchanged("app.py", "new_hash") is False | |
| 463 | + | |
| 464 | + | |
| 465 | +class TestWatch: | |
| 466 | + def test_watch_raises_on_missing_dir(self): | |
| 467 | + store = _make_store() | |
| 468 | + ingester = RepoIngester(store) | |
| 469 | + with pytest.raises(FileNotFoundError): | |
| 470 | + ingester.watch("/nonexistent/repo") | |
| 471 | + | |
| 472 | + def test_watch_calls_callback_and_stops_on_false(self): | |
| 473 | + store = _make_store() | |
| 474 | + ingester = RepoIngester(store) | |
| 475 | + call_count = [0] | |
| 476 | + | |
| 477 | + def callback(stats): | |
| 478 | + call_count[0] += 1 | |
| 479 | + return False # stop immediately | |
| 480 | + | |
| 481 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 482 | + ingester.watch(tmpdir, interval=0.01, callback=callback) | |
| 483 | + assert call_count[0] == 1 | |
| 484 | + | |
| 485 | + def test_watch_runs_multiple_cycles(self): | |
| 486 | + store = _make_store() | |
| 487 | + ingester = RepoIngester(store) | |
| 488 | + call_count = [0] | |
| 489 | + | |
| 490 | + def callback(stats): | |
| 491 | + call_count[0] += 1 | |
| 492 | + return call_count[0] < 3 # run 3 times then stop | |
| 493 | + | |
| 494 | + with tempfile.TemporaryDirectory() as tmpdir: | |
| 495 | + ingester.watch(tmpdir, interval=0.01, callback=callback) | |
| 496 | + assert call_count[0] == 3 | |
| 497 | + | |
| 350 | 498 | |
| 351 | 499 | class TestLanguageParserBase: |
| 352 | 500 | def test_parse_file_raises_not_implemented(self): |
| 353 | 501 | from pathlib import Path |
| 354 | 502 | |
| 355 | 503 |
| --- tests/test_ingestion_code.py | |
| +++ tests/test_ingestion_code.py | |
| @@ -345,10 +345,158 @@ | |
| 345 | stats = ingester.ingest(tmpdir) |
| 346 | assert stats["files"] == 0 |
| 347 | |
| 348 | |
| 349 | # ── LanguageParser base class ───────────────────────────────────────────────── |
| 350 | |
| 351 | class TestLanguageParserBase: |
| 352 | def test_parse_file_raises_not_implemented(self): |
| 353 | from pathlib import Path |
| 354 | |
| 355 |
| --- tests/test_ingestion_code.py | |
| +++ tests/test_ingestion_code.py | |
| @@ -345,10 +345,158 @@ | |
| 345 | stats = ingester.ingest(tmpdir) |
| 346 | assert stats["files"] == 0 |
| 347 | |
| 348 | |
| 349 | # ── LanguageParser base class ───────────────────────────────────────────────── |
| 350 | |
| 351 | # ── Incremental ingestion ───────────────────────────────────────────────────── |
| 352 | |
| 353 | class TestIncrementalIngestion: |
| 354 | def test_incremental_returns_skipped_count(self): |
| 355 | store = _make_store() |
| 356 | ingester = RepoIngester(store) |
| 357 | with tempfile.TemporaryDirectory() as tmpdir: |
| 358 | stats = ingester.ingest(tmpdir, incremental=True) |
| 359 | assert "skipped" in stats |
| 360 | |
| 361 | def test_incremental_skips_unchanged_file(self): |
| 362 | store = _make_store() |
| 363 | ingester = RepoIngester(store) |
| 364 | mock_parser = MagicMock() |
| 365 | mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0} |
| 366 | ingester._parsers["python"] = mock_parser |
| 367 | |
| 368 | with tempfile.TemporaryDirectory() as tmpdir: |
| 369 | py_file = Path(tmpdir) / "app.py" |
| 370 | py_file.write_text("def foo(): pass") |
| 371 | |
| 372 | # First ingest: file is new, should be parsed |
| 373 | stats1 = ingester.ingest(tmpdir, incremental=True) |
| 374 | assert stats1["files"] == 1 |
| 375 | assert stats1["skipped"] == 0 |
| 376 | |
| 377 | # Simulate stored hash matching |
| 378 | from navegador.ingestion.parser import _file_hash |
| 379 | current_hash = _file_hash(py_file) |
| 380 | rel_path = "app.py" |
| 381 | |
| 382 | # Mock _file_unchanged to return True |
| 383 | ingester._file_unchanged = MagicMock(return_value=True) |
| 384 | stats2 = ingester.ingest(tmpdir, incremental=True) |
| 385 | assert stats2["files"] == 0 |
| 386 | assert stats2["skipped"] == 1 |
| 387 | |
| 388 | def test_incremental_reparses_changed_file(self): |
| 389 | store = _make_store() |
| 390 | ingester = RepoIngester(store) |
| 391 | mock_parser = MagicMock() |
| 392 | mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0} |
| 393 | ingester._parsers["python"] = mock_parser |
| 394 | |
| 395 | with tempfile.TemporaryDirectory() as tmpdir: |
| 396 | py_file = Path(tmpdir) / "app.py" |
| 397 | py_file.write_text("def foo(): pass") |
| 398 | |
| 399 | ingester._file_unchanged = MagicMock(return_value=False) |
| 400 | ingester._clear_file_subgraph = MagicMock() |
| 401 | stats = ingester.ingest(tmpdir, incremental=True) |
| 402 | assert stats["files"] == 1 |
| 403 | ingester._clear_file_subgraph.assert_called_once() |
| 404 | |
| 405 | def test_non_incremental_does_not_check_hash(self): |
| 406 | store = _make_store() |
| 407 | ingester = RepoIngester(store) |
| 408 | mock_parser = MagicMock() |
| 409 | mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0} |
| 410 | ingester._parsers["python"] = mock_parser |
| 411 | |
| 412 | with tempfile.TemporaryDirectory() as tmpdir: |
| 413 | (Path(tmpdir) / "app.py").write_text("def foo(): pass") |
| 414 | ingester._file_unchanged = MagicMock() |
| 415 | ingester.ingest(tmpdir, incremental=False) |
| 416 | ingester._file_unchanged.assert_not_called() |
| 417 | |
| 418 | def test_file_hash_is_deterministic(self): |
| 419 | from navegador.ingestion.parser import _file_hash |
| 420 | with tempfile.TemporaryDirectory() as tmpdir: |
| 421 | f = Path(tmpdir) / "test.py" |
| 422 | f.write_text("x = 1") |
| 423 | h1 = _file_hash(f) |
| 424 | h2 = _file_hash(f) |
| 425 | assert h1 == h2 |
| 426 | assert len(h1) == 64 # SHA-256 hex |
| 427 | |
| 428 | def test_file_hash_changes_on_content_change(self): |
| 429 | from navegador.ingestion.parser import _file_hash |
| 430 | with tempfile.TemporaryDirectory() as tmpdir: |
| 431 | f = Path(tmpdir) / "test.py" |
| 432 | f.write_text("x = 1") |
| 433 | h1 = _file_hash(f) |
| 434 | f.write_text("x = 2") |
| 435 | h2 = _file_hash(f) |
| 436 | assert h1 != h2 |
| 437 | |
| 438 | |
| 439 | class TestFileUnchanged: |
| 440 | def test_returns_false_for_new_file(self): |
| 441 | store = _make_store() |
| 442 | store.query.return_value = MagicMock(result_set=[]) |
| 443 | ingester = RepoIngester(store) |
| 444 | assert ingester._file_unchanged("app.py", "abc123") is False |
| 445 | |
| 446 | def test_returns_false_for_null_hash(self): |
| 447 | store = _make_store() |
| 448 | store.query.return_value = MagicMock(result_set=[[None]]) |
| 449 | ingester = RepoIngester(store) |
| 450 | assert ingester._file_unchanged("app.py", "abc123") is False |
| 451 | |
| 452 | def test_returns_true_when_hash_matches(self): |
| 453 | store = _make_store() |
| 454 | store.query.return_value = MagicMock(result_set=[["abc123"]]) |
| 455 | ingester = RepoIngester(store) |
| 456 | assert ingester._file_unchanged("app.py", "abc123") is True |
| 457 | |
| 458 | def test_returns_false_when_hash_differs(self): |
| 459 | store = _make_store() |
| 460 | store.query.return_value = MagicMock(result_set=[["old_hash"]]) |
| 461 | ingester = RepoIngester(store) |
| 462 | assert ingester._file_unchanged("app.py", "new_hash") is False |
| 463 | |
| 464 | |
| 465 | class TestWatch: |
| 466 | def test_watch_raises_on_missing_dir(self): |
| 467 | store = _make_store() |
| 468 | ingester = RepoIngester(store) |
| 469 | with pytest.raises(FileNotFoundError): |
| 470 | ingester.watch("/nonexistent/repo") |
| 471 | |
| 472 | def test_watch_calls_callback_and_stops_on_false(self): |
| 473 | store = _make_store() |
| 474 | ingester = RepoIngester(store) |
| 475 | call_count = [0] |
| 476 | |
| 477 | def callback(stats): |
| 478 | call_count[0] += 1 |
| 479 | return False # stop immediately |
| 480 | |
| 481 | with tempfile.TemporaryDirectory() as tmpdir: |
| 482 | ingester.watch(tmpdir, interval=0.01, callback=callback) |
| 483 | assert call_count[0] == 1 |
| 484 | |
| 485 | def test_watch_runs_multiple_cycles(self): |
| 486 | store = _make_store() |
| 487 | ingester = RepoIngester(store) |
| 488 | call_count = [0] |
| 489 | |
| 490 | def callback(stats): |
| 491 | call_count[0] += 1 |
| 492 | return call_count[0] < 3 # run 3 times then stop |
| 493 | |
| 494 | with tempfile.TemporaryDirectory() as tmpdir: |
| 495 | ingester.watch(tmpdir, interval=0.01, callback=callback) |
| 496 | assert call_count[0] == 3 |
| 497 | |
| 498 | |
| 499 | class TestLanguageParserBase: |
| 500 | def test_parse_file_raises_not_implemented(self): |
| 501 | from pathlib import Path |
| 502 | |
| 503 |