Navegador

navegador / tests / test_ingestion_code.py
Source Blame History 577 lines
b663b12… lmata 1 """Tests for navegador.ingestion.parser — RepoIngester orchestration."""
b663b12… lmata 2
b663b12… lmata 3 import tempfile
b663b12… lmata 4 from pathlib import Path
b663b12… lmata 5 from unittest.mock import MagicMock, patch
b663b12… lmata 6
b663b12… lmata 7 import pytest
b663b12… lmata 8
b663b12… lmata 9 from navegador.graph.schema import NodeLabel
b663b12… lmata 10 from navegador.ingestion.parser import LANGUAGE_MAP, RepoIngester
b663b12… lmata 11
b663b12… lmata 12
b663b12… lmata 13 def _make_store():
b663b12… lmata 14 store = MagicMock()
b663b12… lmata 15 store.query.return_value = MagicMock(result_set=[])
b663b12… lmata 16 return store
b663b12… lmata 17
b663b12… lmata 18
b663b12… lmata 19 # ── LANGUAGE_MAP ──────────────────────────────────────────────────────────────
b663b12… lmata 20
b663b12… lmata 21 class TestLanguageMap:
b663b12… lmata 22 def test_python_extension(self):
b663b12… lmata 23 assert LANGUAGE_MAP[".py"] == "python"
b663b12… lmata 24
b663b12… lmata 25 def test_typescript_extensions(self):
b663b12… lmata 26 assert LANGUAGE_MAP[".ts"] == "typescript"
b663b12… lmata 27 assert LANGUAGE_MAP[".tsx"] == "typescript"
b663b12… lmata 28
b663b12… lmata 29 def test_javascript_extensions(self):
b663b12… lmata 30 assert LANGUAGE_MAP[".js"] == "javascript"
b663b12… lmata 31 assert LANGUAGE_MAP[".jsx"] == "javascript"
b663b12… lmata 32
2e96458… lmata 33 def test_go_rust_java_extensions(self):
2e96458… lmata 34 assert LANGUAGE_MAP[".go"] == "go"
2e96458… lmata 35 assert LANGUAGE_MAP[".rs"] == "rust"
2e96458… lmata 36 assert LANGUAGE_MAP[".java"] == "java"
2e96458… lmata 37
b663b12… lmata 38 def test_no_entry_for_unknown(self):
fa82b95… lmata 39 assert ".txt" not in LANGUAGE_MAP
fa82b95… lmata 40 assert ".md" not in LANGUAGE_MAP
b663b12… lmata 41
b663b12… lmata 42
b663b12… lmata 43 # ── ingest() ─────────────────────────────────────────────────────────────────
b663b12… lmata 44
b663b12… lmata 45 class TestRepoIngester:
b663b12… lmata 46 def test_raises_on_missing_dir(self):
b663b12… lmata 47 store = _make_store()
b663b12… lmata 48 ingester = RepoIngester(store)
b663b12… lmata 49 with pytest.raises(FileNotFoundError):
b663b12… lmata 50 ingester.ingest("/nonexistent/repo")
b663b12… lmata 51
b663b12… lmata 52 def test_creates_repository_node(self):
b663b12… lmata 53 store = _make_store()
b663b12… lmata 54 ingester = RepoIngester(store)
b663b12… lmata 55 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 56 ingester.ingest(tmpdir)
b663b12… lmata 57 store.create_node.assert_called_once()
b663b12… lmata 58 label, props = store.create_node.call_args[0]
b663b12… lmata 59 assert label == NodeLabel.Repository
b663b12… lmata 60 assert "name" in props
b663b12… lmata 61 assert "path" in props
b663b12… lmata 62
b663b12… lmata 63 def test_returns_stats_dict(self):
b663b12… lmata 64 store = _make_store()
b663b12… lmata 65 ingester = RepoIngester(store)
b663b12… lmata 66 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 67 stats = ingester.ingest(tmpdir)
b663b12… lmata 68 assert "files" in stats
b663b12… lmata 69 assert "functions" in stats
b663b12… lmata 70 assert "classes" in stats
b663b12… lmata 71 assert "edges" in stats
b663b12… lmata 72
b663b12… lmata 73 def test_empty_dir_returns_zero_counts(self):
b663b12… lmata 74 store = _make_store()
b663b12… lmata 75 ingester = RepoIngester(store)
b663b12… lmata 76 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 77 stats = ingester.ingest(tmpdir)
b663b12… lmata 78 assert stats["files"] == 0
b663b12… lmata 79 assert stats["functions"] == 0
b663b12… lmata 80
b663b12… lmata 81 def test_clear_flag_calls_store_clear(self):
b663b12… lmata 82 store = _make_store()
b663b12… lmata 83 ingester = RepoIngester(store)
b663b12… lmata 84 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 85 ingester.ingest(tmpdir, clear=True)
b663b12… lmata 86 store.clear.assert_called_once()
b663b12… lmata 87
b663b12… lmata 88 def test_no_clear_by_default(self):
b663b12… lmata 89 store = _make_store()
b663b12… lmata 90 ingester = RepoIngester(store)
b663b12… lmata 91 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 92 ingester.ingest(tmpdir)
b663b12… lmata 93 store.clear.assert_not_called()
b663b12… lmata 94
b663b12… lmata 95 def test_skips_unsupported_extensions(self):
b663b12… lmata 96 store = _make_store()
b663b12… lmata 97 ingester = RepoIngester(store)
b663b12… lmata 98 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 99 (Path(tmpdir) / "readme.md").write_text("# Readme")
b663b12… lmata 100 (Path(tmpdir) / "config.yaml").write_text("key: val")
b663b12… lmata 101 stats = ingester.ingest(tmpdir)
b663b12… lmata 102 assert stats["files"] == 0
b663b12… lmata 103
b663b12… lmata 104 def test_ingests_python_files_with_mock_parser(self):
b663b12… lmata 105 store = _make_store()
b663b12… lmata 106 ingester = RepoIngester(store)
b663b12… lmata 107 mock_parser = MagicMock()
b663b12… lmata 108 mock_parser.parse_file.return_value = {"functions": 3, "classes": 1, "edges": 5}
b663b12… lmata 109 ingester._parsers["python"] = mock_parser
b663b12… lmata 110
b663b12… lmata 111 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 112 (Path(tmpdir) / "app.py").write_text("def foo(): pass")
b663b12… lmata 113 stats = ingester.ingest(tmpdir)
b663b12… lmata 114 assert stats["files"] == 1
b663b12… lmata 115 assert stats["functions"] == 3
b663b12… lmata 116 assert stats["classes"] == 1
b663b12… lmata 117 assert stats["edges"] == 5
b663b12… lmata 118
b663b12… lmata 119 def test_ingests_multiple_python_files(self):
b663b12… lmata 120 store = _make_store()
b663b12… lmata 121 ingester = RepoIngester(store)
b663b12… lmata 122 mock_parser = MagicMock()
b663b12… lmata 123 mock_parser.parse_file.return_value = {"functions": 2, "classes": 0, "edges": 1}
b663b12… lmata 124 ingester._parsers["python"] = mock_parser
b663b12… lmata 125
b663b12… lmata 126 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 127 (Path(tmpdir) / "a.py").write_text("def a(): pass")
b663b12… lmata 128 (Path(tmpdir) / "b.py").write_text("def b(): pass")
b663b12… lmata 129 stats = ingester.ingest(tmpdir)
b663b12… lmata 130 assert stats["files"] == 2
b663b12… lmata 131 assert stats["functions"] == 4
b663b12… lmata 132
b663b12… lmata 133 def test_handles_parse_exception_gracefully(self):
b663b12… lmata 134 store = _make_store()
b663b12… lmata 135 ingester = RepoIngester(store)
b663b12… lmata 136 mock_parser = MagicMock()
b663b12… lmata 137 mock_parser.parse_file.side_effect = Exception("parse error")
b663b12… lmata 138 ingester._parsers["python"] = mock_parser
b663b12… lmata 139
b663b12… lmata 140 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 141 (Path(tmpdir) / "broken.py").write_text("invalid python @@@@")
b663b12… lmata 142 # Should not raise, just log
b663b12… lmata 143 stats = ingester.ingest(tmpdir)
b663b12… lmata 144 # File was attempted but failed
b663b12… lmata 145 assert stats["functions"] == 0
b663b12… lmata 146
b663b12… lmata 147 def test_ingests_typescript_files_with_mock_parser(self):
b663b12… lmata 148 store = _make_store()
b663b12… lmata 149 ingester = RepoIngester(store)
b663b12… lmata 150 mock_parser = MagicMock()
b663b12… lmata 151 mock_parser.parse_file.return_value = {"functions": 1, "classes": 1, "edges": 2}
b663b12… lmata 152 ingester._parsers["typescript"] = mock_parser
b663b12… lmata 153
b663b12… lmata 154 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 155 (Path(tmpdir) / "comp.tsx").write_text("const App = () => null")
b663b12… lmata 156 stats = ingester.ingest(tmpdir)
b663b12… lmata 157 assert stats["files"] == 1
b663b12… lmata 158
b663b12… lmata 159 def test_accumulates_stats_across_files(self):
b663b12… lmata 160 store = _make_store()
b663b12… lmata 161 ingester = RepoIngester(store)
b663b12… lmata 162 mock_py = MagicMock()
b663b12… lmata 163 mock_py.parse_file.return_value = {"functions": 5, "classes": 2, "edges": 10}
b663b12… lmata 164 mock_ts = MagicMock()
b663b12… lmata 165 mock_ts.parse_file.return_value = {"functions": 3, "classes": 1, "edges": 5}
b663b12… lmata 166 ingester._parsers["python"] = mock_py
b663b12… lmata 167 ingester._parsers["typescript"] = mock_ts
b663b12… lmata 168
b663b12… lmata 169 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 170 (Path(tmpdir) / "app.py").write_text("x=1")
b663b12… lmata 171 (Path(tmpdir) / "comp.ts").write_text("const x = 1")
b663b12… lmata 172 stats = ingester.ingest(tmpdir)
b663b12… lmata 173 assert stats["files"] == 2
b663b12… lmata 174 assert stats["functions"] == 8
b663b12… lmata 175 assert stats["classes"] == 3
b663b12… lmata 176 assert stats["edges"] == 15
b663b12… lmata 177
b663b12… lmata 178
b663b12… lmata 179 # ── _iter_source_files() ──────────────────────────────────────────────────────
b663b12… lmata 180
b663b12… lmata 181 class TestIterSourceFiles:
b663b12… lmata 182 def test_yields_python_files(self):
b663b12… lmata 183 store = _make_store()
b663b12… lmata 184 ingester = RepoIngester(store)
b663b12… lmata 185 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 186 (Path(tmpdir) / "app.py").write_text("x=1")
b663b12… lmata 187 files = list(ingester._iter_source_files(Path(tmpdir)))
b663b12… lmata 188 assert len(files) == 1
b663b12… lmata 189 assert files[0].name == "app.py"
b663b12… lmata 190
b663b12… lmata 191 def test_skips_git_dir(self):
b663b12… lmata 192 store = _make_store()
b663b12… lmata 193 ingester = RepoIngester(store)
b663b12… lmata 194 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 195 git_dir = Path(tmpdir) / ".git"
b663b12… lmata 196 git_dir.mkdir()
b663b12… lmata 197 (git_dir / "hook.py").write_text("x=1")
b663b12… lmata 198 (Path(tmpdir) / "main.py").write_text("y=2")
b663b12… lmata 199 files = list(ingester._iter_source_files(Path(tmpdir)))
b663b12… lmata 200 assert len(files) == 1
b663b12… lmata 201 assert files[0].name == "main.py"
b663b12… lmata 202
b663b12… lmata 203 def test_skips_node_modules(self):
b663b12… lmata 204 store = _make_store()
b663b12… lmata 205 ingester = RepoIngester(store)
b663b12… lmata 206 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 207 nm = Path(tmpdir) / "node_modules"
b663b12… lmata 208 nm.mkdir()
b663b12… lmata 209 (nm / "dep.js").write_text("module.exports={}")
b663b12… lmata 210 (Path(tmpdir) / "app.ts").write_text("const x=1")
b663b12… lmata 211 files = list(ingester._iter_source_files(Path(tmpdir)))
b663b12… lmata 212 assert len(files) == 1
b663b12… lmata 213 assert files[0].name == "app.ts"
b663b12… lmata 214
b663b12… lmata 215 def test_skips_pycache(self):
b663b12… lmata 216 store = _make_store()
b663b12… lmata 217 ingester = RepoIngester(store)
b663b12… lmata 218 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 219 cache = Path(tmpdir) / "__pycache__"
b663b12… lmata 220 cache.mkdir()
b663b12… lmata 221 (cache / "cached.py").write_text("x=1")
b663b12… lmata 222 (Path(tmpdir) / "real.py").write_text("y=2")
b663b12… lmata 223 files = list(ingester._iter_source_files(Path(tmpdir)))
b663b12… lmata 224 names = [f.name for f in files]
b663b12… lmata 225 assert "cached.py" not in names
b663b12… lmata 226 assert "real.py" in names
b663b12… lmata 227
b663b12… lmata 228 def test_skips_non_source_files(self):
b663b12… lmata 229 store = _make_store()
b663b12… lmata 230 ingester = RepoIngester(store)
b663b12… lmata 231 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 232 (Path(tmpdir) / "readme.md").write_text("# readme")
b663b12… lmata 233 (Path(tmpdir) / "config.json").write_text("{}")
b663b12… lmata 234 files = list(ingester._iter_source_files(Path(tmpdir)))
b663b12… lmata 235 assert len(files) == 0
b663b12… lmata 236
b663b12… lmata 237 def test_recurses_into_subdirs(self):
b663b12… lmata 238 store = _make_store()
b663b12… lmata 239 ingester = RepoIngester(store)
b663b12… lmata 240 with tempfile.TemporaryDirectory() as tmpdir:
b663b12… lmata 241 sub = Path(tmpdir) / "src" / "api"
b663b12… lmata 242 sub.mkdir(parents=True)
b663b12… lmata 243 (sub / "views.py").write_text("x=1")
b663b12… lmata 244 files = list(ingester._iter_source_files(Path(tmpdir)))
b663b12… lmata 245 assert len(files) == 1
b663b12… lmata 246 assert files[0].name == "views.py"
b663b12… lmata 247
b663b12… lmata 248
b663b12… lmata 249 # ── _get_parser() ─────────────────────────────────────────────────────────────
b663b12… lmata 250
b663b12… lmata 251 class TestGetParser:
b663b12… lmata 252 def test_returns_cached_parser(self):
b663b12… lmata 253 store = _make_store()
b663b12… lmata 254 ingester = RepoIngester(store)
b663b12… lmata 255 mock_parser = MagicMock()
b663b12… lmata 256 ingester._parsers["python"] = mock_parser
b663b12… lmata 257 result = ingester._get_parser("python")
b663b12… lmata 258 assert result is mock_parser
b663b12… lmata 259
b663b12… lmata 260 def test_raises_for_unknown_language(self):
b663b12… lmata 261 store = _make_store()
b663b12… lmata 262 ingester = RepoIngester(store)
b663b12… lmata 263 with pytest.raises(ValueError, match="Unsupported language"):
fa82b95… lmata 264 ingester._get_parser("brainfuck")
b663b12… lmata 265
2e96458… lmata 266 def test_creates_python_parser_via_lazy_import(self):
b663b12… lmata 267 store = _make_store()
b663b12… lmata 268 ingester = RepoIngester(store)
b663b12… lmata 269 mock_py_parser = MagicMock()
b663b12… lmata 270 mock_py_class = MagicMock(return_value=mock_py_parser)
2e96458… lmata 271 with patch.dict("sys.modules", {
2e96458… lmata 272 "navegador.ingestion.python": MagicMock(PythonParser=mock_py_class)
2e96458… lmata 273 }):
2e96458… lmata 274 result = ingester._get_parser("python")
2e96458… lmata 275 assert result is mock_py_parser
2e96458… lmata 276 mock_py_class.assert_called_once_with()
2e96458… lmata 277
2e96458… lmata 278 def test_creates_typescript_parser_via_lazy_import(self):
b663b12… lmata 279 store = _make_store()
b663b12… lmata 280 ingester = RepoIngester(store)
b663b12… lmata 281 mock_ts_parser = MagicMock()
2e96458… lmata 282 mock_ts_class = MagicMock(return_value=mock_ts_parser)
2e96458… lmata 283 with patch.dict("sys.modules", {
2e96458… lmata 284 "navegador.ingestion.typescript": MagicMock(TypeScriptParser=mock_ts_class)
2e96458… lmata 285 }):
2e96458… lmata 286 result = ingester._get_parser("typescript")
b663b12… lmata 287 assert result is mock_ts_parser
2e96458… lmata 288 mock_ts_class.assert_called_once_with("typescript")
2e96458… lmata 289
2e96458… lmata 290 def test_creates_go_parser_via_lazy_import(self):
2e96458… lmata 291 store = _make_store()
2e96458… lmata 292 ingester = RepoIngester(store)
2e96458… lmata 293 mock_go_parser = MagicMock()
2e96458… lmata 294 mock_go_class = MagicMock(return_value=mock_go_parser)
2e96458… lmata 295 with patch.dict("sys.modules", {
2e96458… lmata 296 "navegador.ingestion.go": MagicMock(GoParser=mock_go_class)
2e96458… lmata 297 }):
2e96458… lmata 298 result = ingester._get_parser("go")
2e96458… lmata 299 assert result is mock_go_parser
2e96458… lmata 300 mock_go_class.assert_called_once_with()
2e96458… lmata 301
2e96458… lmata 302 def test_creates_rust_parser_via_lazy_import(self):
2e96458… lmata 303 store = _make_store()
2e96458… lmata 304 ingester = RepoIngester(store)
2e96458… lmata 305 mock_rust_parser = MagicMock()
2e96458… lmata 306 mock_rust_class = MagicMock(return_value=mock_rust_parser)
2e96458… lmata 307 with patch.dict("sys.modules", {
2e96458… lmata 308 "navegador.ingestion.rust": MagicMock(RustParser=mock_rust_class)
2e96458… lmata 309 }):
2e96458… lmata 310 result = ingester._get_parser("rust")
2e96458… lmata 311 assert result is mock_rust_parser
2e96458… lmata 312 mock_rust_class.assert_called_once_with()
2e96458… lmata 313
2e96458… lmata 314 def test_creates_java_parser_via_lazy_import(self):
2e96458… lmata 315 store = _make_store()
2e96458… lmata 316 ingester = RepoIngester(store)
2e96458… lmata 317 mock_java_parser = MagicMock()
2e96458… lmata 318 mock_java_class = MagicMock(return_value=mock_java_parser)
2e96458… lmata 319 with patch.dict("sys.modules", {
2e96458… lmata 320 "navegador.ingestion.java": MagicMock(JavaParser=mock_java_class)
2e96458… lmata 321 }):
2e96458… lmata 322 result = ingester._get_parser("java")
2e96458… lmata 323 assert result is mock_java_parser
2e96458… lmata 324 mock_java_class.assert_called_once_with()
7ae0080… lmata 325
7ae0080… lmata 326
7ae0080… lmata 327 # ── defensive continue branch ─────────────────────────────────────────────────
7ae0080… lmata 328
7ae0080… lmata 329 class TestIngesterContinueBranch:
7ae0080… lmata 330 def test_skips_file_when_language_not_in_map(self):
7ae0080… lmata 331 """
7ae0080… lmata 332 _iter_source_files filters to LANGUAGE_MAP extensions, but ingest()
7ae0080… lmata 333 has a defensive `if not language: continue`. Test it by patching
fa82b95… lmata 334 _iter_source_files to yield a .txt path.
7ae0080… lmata 335 """
7ae0080… lmata 336 import tempfile
7ae0080… lmata 337 from pathlib import Path
7ae0080… lmata 338 from unittest.mock import patch
7ae0080… lmata 339 store = _make_store()
7ae0080… lmata 340 ingester = RepoIngester(store)
7ae0080… lmata 341 with tempfile.TemporaryDirectory() as tmpdir:
fa82b95… lmata 342 rb_file = Path(tmpdir) / "notes.txt"
fa82b95… lmata 343 rb_file.write_text("just a text file")
7ae0080… lmata 344 with patch.object(ingester, "_iter_source_files", return_value=[rb_file]):
7ae0080… lmata 345 stats = ingester.ingest(tmpdir)
7ae0080… lmata 346 assert stats["files"] == 0
7ae0080… lmata 347
7ae0080… lmata 348
7ae0080… lmata 349 # ── LanguageParser base class ─────────────────────────────────────────────────
df3d6fa… lmata 350
df3d6fa… lmata 351 # ── Incremental ingestion ─────────────────────────────────────────────────────
df3d6fa… lmata 352
df3d6fa… lmata 353 class TestIncrementalIngestion:
df3d6fa… lmata 354 def test_incremental_returns_skipped_count(self):
df3d6fa… lmata 355 store = _make_store()
df3d6fa… lmata 356 ingester = RepoIngester(store)
df3d6fa… lmata 357 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 358 stats = ingester.ingest(tmpdir, incremental=True)
df3d6fa… lmata 359 assert "skipped" in stats
df3d6fa… lmata 360
df3d6fa… lmata 361 def test_incremental_skips_unchanged_file(self):
df3d6fa… lmata 362 store = _make_store()
df3d6fa… lmata 363 ingester = RepoIngester(store)
df3d6fa… lmata 364 mock_parser = MagicMock()
df3d6fa… lmata 365 mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
df3d6fa… lmata 366 ingester._parsers["python"] = mock_parser
df3d6fa… lmata 367
df3d6fa… lmata 368 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 369 py_file = Path(tmpdir) / "app.py"
df3d6fa… lmata 370 py_file.write_text("def foo(): pass")
df3d6fa… lmata 371
df3d6fa… lmata 372 # First ingest: file is new, should be parsed
df3d6fa… lmata 373 stats1 = ingester.ingest(tmpdir, incremental=True)
df3d6fa… lmata 374 assert stats1["files"] == 1
df3d6fa… lmata 375 assert stats1["skipped"] == 0
df3d6fa… lmata 376
df3d6fa… lmata 377 # Simulate stored hash matching
df3d6fa… lmata 378 from navegador.ingestion.parser import _file_hash
df3d6fa… lmata 379 current_hash = _file_hash(py_file)
df3d6fa… lmata 380 rel_path = "app.py"
df3d6fa… lmata 381
df3d6fa… lmata 382 # Mock _file_unchanged to return True
df3d6fa… lmata 383 ingester._file_unchanged = MagicMock(return_value=True)
df3d6fa… lmata 384 stats2 = ingester.ingest(tmpdir, incremental=True)
df3d6fa… lmata 385 assert stats2["files"] == 0
df3d6fa… lmata 386 assert stats2["skipped"] == 1
df3d6fa… lmata 387
df3d6fa… lmata 388 def test_incremental_reparses_changed_file(self):
df3d6fa… lmata 389 store = _make_store()
df3d6fa… lmata 390 ingester = RepoIngester(store)
df3d6fa… lmata 391 mock_parser = MagicMock()
df3d6fa… lmata 392 mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
df3d6fa… lmata 393 ingester._parsers["python"] = mock_parser
df3d6fa… lmata 394
df3d6fa… lmata 395 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 396 py_file = Path(tmpdir) / "app.py"
df3d6fa… lmata 397 py_file.write_text("def foo(): pass")
df3d6fa… lmata 398
df3d6fa… lmata 399 ingester._file_unchanged = MagicMock(return_value=False)
df3d6fa… lmata 400 ingester._clear_file_subgraph = MagicMock()
df3d6fa… lmata 401 stats = ingester.ingest(tmpdir, incremental=True)
df3d6fa… lmata 402 assert stats["files"] == 1
df3d6fa… lmata 403 ingester._clear_file_subgraph.assert_called_once()
df3d6fa… lmata 404
df3d6fa… lmata 405 def test_non_incremental_does_not_check_hash(self):
df3d6fa… lmata 406 store = _make_store()
df3d6fa… lmata 407 ingester = RepoIngester(store)
df3d6fa… lmata 408 mock_parser = MagicMock()
df3d6fa… lmata 409 mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
df3d6fa… lmata 410 ingester._parsers["python"] = mock_parser
df3d6fa… lmata 411
df3d6fa… lmata 412 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 413 (Path(tmpdir) / "app.py").write_text("def foo(): pass")
df3d6fa… lmata 414 ingester._file_unchanged = MagicMock()
df3d6fa… lmata 415 ingester.ingest(tmpdir, incremental=False)
df3d6fa… lmata 416 ingester._file_unchanged.assert_not_called()
df3d6fa… lmata 417
df3d6fa… lmata 418 def test_file_hash_is_deterministic(self):
df3d6fa… lmata 419 from navegador.ingestion.parser import _file_hash
df3d6fa… lmata 420 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 421 f = Path(tmpdir) / "test.py"
df3d6fa… lmata 422 f.write_text("x = 1")
df3d6fa… lmata 423 h1 = _file_hash(f)
df3d6fa… lmata 424 h2 = _file_hash(f)
df3d6fa… lmata 425 assert h1 == h2
df3d6fa… lmata 426 assert len(h1) == 64 # SHA-256 hex
df3d6fa… lmata 427
df3d6fa… lmata 428 def test_file_hash_changes_on_content_change(self):
df3d6fa… lmata 429 from navegador.ingestion.parser import _file_hash
df3d6fa… lmata 430 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 431 f = Path(tmpdir) / "test.py"
df3d6fa… lmata 432 f.write_text("x = 1")
df3d6fa… lmata 433 h1 = _file_hash(f)
df3d6fa… lmata 434 f.write_text("x = 2")
df3d6fa… lmata 435 h2 = _file_hash(f)
df3d6fa… lmata 436 assert h1 != h2
df3d6fa… lmata 437
df3d6fa… lmata 438
df3d6fa… lmata 439 class TestFileUnchanged:
df3d6fa… lmata 440 def test_returns_false_for_new_file(self):
df3d6fa… lmata 441 store = _make_store()
df3d6fa… lmata 442 store.query.return_value = MagicMock(result_set=[])
df3d6fa… lmata 443 ingester = RepoIngester(store)
df3d6fa… lmata 444 assert ingester._file_unchanged("app.py", "abc123") is False
df3d6fa… lmata 445
df3d6fa… lmata 446 def test_returns_false_for_null_hash(self):
df3d6fa… lmata 447 store = _make_store()
df3d6fa… lmata 448 store.query.return_value = MagicMock(result_set=[[None]])
df3d6fa… lmata 449 ingester = RepoIngester(store)
df3d6fa… lmata 450 assert ingester._file_unchanged("app.py", "abc123") is False
df3d6fa… lmata 451
df3d6fa… lmata 452 def test_returns_true_when_hash_matches(self):
df3d6fa… lmata 453 store = _make_store()
df3d6fa… lmata 454 store.query.return_value = MagicMock(result_set=[["abc123"]])
df3d6fa… lmata 455 ingester = RepoIngester(store)
df3d6fa… lmata 456 assert ingester._file_unchanged("app.py", "abc123") is True
df3d6fa… lmata 457
df3d6fa… lmata 458 def test_returns_false_when_hash_differs(self):
df3d6fa… lmata 459 store = _make_store()
df3d6fa… lmata 460 store.query.return_value = MagicMock(result_set=[["old_hash"]])
df3d6fa… lmata 461 ingester = RepoIngester(store)
df3d6fa… lmata 462 assert ingester._file_unchanged("app.py", "new_hash") is False
95549e5… lmata 463
95549e5… lmata 464
95549e5… lmata 465 # ── Redaction integration ─────────────────────────────────────────────────────
95549e5… lmata 466
95549e5… lmata 467 class TestRedaction:
95549e5… lmata 468 def test_constructor_with_redact_true(self):
95549e5… lmata 469 store = _make_store()
95549e5… lmata 470 ingester = RepoIngester(store, redact=True)
95549e5… lmata 471 assert ingester.redact is True
95549e5… lmata 472 assert ingester._detector is not None
95549e5… lmata 473
95549e5… lmata 474 def test_constructor_with_redact_false(self):
95549e5… lmata 475 store = _make_store()
95549e5… lmata 476 ingester = RepoIngester(store, redact=False)
95549e5… lmata 477 assert ingester.redact is False
95549e5… lmata 478
95549e5… lmata 479 def test_maybe_redact_noop_when_disabled(self):
95549e5… lmata 480 store = _make_store()
95549e5… lmata 481 ingester = RepoIngester(store, redact=False)
95549e5… lmata 482 with tempfile.TemporaryDirectory() as tmpdir:
95549e5… lmata 483 f = Path(tmpdir) / "app.py"
95549e5… lmata 484 f.write_text("x = 1")
95549e5… lmata 485 parse_path, root = ingester._maybe_redact_to_tmp(f, Path(tmpdir))
95549e5… lmata 486 assert parse_path == f
95549e5… lmata 487 assert root == Path(tmpdir)
95549e5… lmata 488
95549e5… lmata 489 def test_maybe_redact_returns_original_if_no_sensitive(self):
95549e5… lmata 490 store = _make_store()
95549e5… lmata 491 ingester = RepoIngester(store, redact=True)
95549e5… lmata 492 with tempfile.TemporaryDirectory() as tmpdir:
95549e5… lmata 493 f = Path(tmpdir) / "app.py"
95549e5… lmata 494 f.write_text("def hello(): pass")
95549e5… lmata 495 parse_path, root = ingester._maybe_redact_to_tmp(f, Path(tmpdir))
95549e5… lmata 496 assert parse_path == f
95549e5… lmata 497
95549e5… lmata 498 def test_maybe_redact_creates_temp_for_sensitive(self):
95549e5… lmata 499 store = _make_store()
95549e5… lmata 500 ingester = RepoIngester(store, redact=True)
95549e5… lmata 501 with tempfile.TemporaryDirectory() as tmpdir:
95549e5… lmata 502 f = Path(tmpdir) / "app.py"
95549e5… lmata 503 f.write_text('password = "s3cret123"')
95549e5… lmata 504 parse_path, root = ingester._maybe_redact_to_tmp(f, Path(tmpdir))
95549e5… lmata 505 assert parse_path != f
95549e5… lmata 506 assert root != Path(tmpdir)
95549e5… lmata 507 content = parse_path.read_text()
95549e5… lmata 508 assert "[REDACTED]" in content
95549e5… lmata 509 # Clean up
95549e5… lmata 510 import shutil
95549e5… lmata 511 shutil.rmtree(root, ignore_errors=True)
95549e5… lmata 512
95549e5… lmata 513 def test_maybe_redact_handles_oserror(self):
95549e5… lmata 514 store = _make_store()
95549e5… lmata 515 ingester = RepoIngester(store, redact=True)
95549e5… lmata 516 fake_path = Path("/nonexistent/file.py")
95549e5… lmata 517 parse_path, root = ingester._maybe_redact_to_tmp(fake_path, Path("/nonexistent"))
95549e5… lmata 518 assert parse_path == fake_path
95549e5… lmata 519
95549e5… lmata 520 def test_ingest_with_redact_cleans_up_temp(self):
95549e5… lmata 521 store = _make_store()
95549e5… lmata 522 ingester = RepoIngester(store, redact=True)
95549e5… lmata 523 mock_parser = MagicMock()
95549e5… lmata 524 mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
95549e5… lmata 525 ingester._parsers["python"] = mock_parser
95549e5… lmata 526
95549e5… lmata 527 with tempfile.TemporaryDirectory() as tmpdir:
95549e5… lmata 528 f = Path(tmpdir) / "app.py"
95549e5… lmata 529 f.write_text('api_key = "sk-1234567890abcdef1234567890"')
95549e5… lmata 530 ingester.ingest(tmpdir)
95549e5… lmata 531 assert mock_parser.parse_file.called
df3d6fa… lmata 532
df3d6fa… lmata 533
df3d6fa… lmata 534 class TestWatch:
df3d6fa… lmata 535 def test_watch_raises_on_missing_dir(self):
df3d6fa… lmata 536 store = _make_store()
df3d6fa… lmata 537 ingester = RepoIngester(store)
df3d6fa… lmata 538 with pytest.raises(FileNotFoundError):
df3d6fa… lmata 539 ingester.watch("/nonexistent/repo")
df3d6fa… lmata 540
df3d6fa… lmata 541 def test_watch_calls_callback_and_stops_on_false(self):
df3d6fa… lmata 542 store = _make_store()
df3d6fa… lmata 543 ingester = RepoIngester(store)
df3d6fa… lmata 544 call_count = [0]
df3d6fa… lmata 545
df3d6fa… lmata 546 def callback(stats):
df3d6fa… lmata 547 call_count[0] += 1
df3d6fa… lmata 548 return False # stop immediately
df3d6fa… lmata 549
df3d6fa… lmata 550 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 551 ingester.watch(tmpdir, interval=0.01, callback=callback)
df3d6fa… lmata 552 assert call_count[0] == 1
df3d6fa… lmata 553
df3d6fa… lmata 554 def test_watch_runs_multiple_cycles(self):
df3d6fa… lmata 555 store = _make_store()
df3d6fa… lmata 556 ingester = RepoIngester(store)
df3d6fa… lmata 557 call_count = [0]
df3d6fa… lmata 558
df3d6fa… lmata 559 def callback(stats):
df3d6fa… lmata 560 call_count[0] += 1
df3d6fa… lmata 561 return call_count[0] < 3 # run 3 times then stop
df3d6fa… lmata 562
df3d6fa… lmata 563 with tempfile.TemporaryDirectory() as tmpdir:
df3d6fa… lmata 564 ingester.watch(tmpdir, interval=0.01, callback=callback)
df3d6fa… lmata 565 assert call_count[0] == 3
df3d6fa… lmata 566
7ae0080… lmata 567
7ae0080… lmata 568 class TestLanguageParserBase:
7ae0080… lmata 569 def test_parse_file_raises_not_implemented(self):
7ae0080… lmata 570 from pathlib import Path
7ae0080… lmata 571
7ae0080… lmata 572 import pytest
7ae0080… lmata 573
7ae0080… lmata 574 from navegador.ingestion.parser import LanguageParser
7ae0080… lmata 575 lp = LanguageParser()
7ae0080… lmata 576 with pytest.raises(NotImplementedError):
7ae0080… lmata 577 lp.parse_file(Path("/tmp/x.py"), Path("/tmp"), MagicMock())

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button