Navegador

navegador / tests / test_ingestion_code.py
Blame History Raw 578 lines
1
"""Tests for navegador.ingestion.parser — RepoIngester orchestration."""
2
3
import tempfile
4
from pathlib import Path
5
from unittest.mock import MagicMock, patch
6
7
import pytest
8
9
from navegador.graph.schema import NodeLabel
10
from navegador.ingestion.parser import LANGUAGE_MAP, RepoIngester
11
12
13
def _make_store():
14
store = MagicMock()
15
store.query.return_value = MagicMock(result_set=[])
16
return store
17
18
19
# ── LANGUAGE_MAP ──────────────────────────────────────────────────────────────
20
21
class TestLanguageMap:
22
def test_python_extension(self):
23
assert LANGUAGE_MAP[".py"] == "python"
24
25
def test_typescript_extensions(self):
26
assert LANGUAGE_MAP[".ts"] == "typescript"
27
assert LANGUAGE_MAP[".tsx"] == "typescript"
28
29
def test_javascript_extensions(self):
30
assert LANGUAGE_MAP[".js"] == "javascript"
31
assert LANGUAGE_MAP[".jsx"] == "javascript"
32
33
def test_go_rust_java_extensions(self):
34
assert LANGUAGE_MAP[".go"] == "go"
35
assert LANGUAGE_MAP[".rs"] == "rust"
36
assert LANGUAGE_MAP[".java"] == "java"
37
38
def test_no_entry_for_unknown(self):
39
assert ".txt" not in LANGUAGE_MAP
40
assert ".md" not in LANGUAGE_MAP
41
42
43
# ── ingest() ─────────────────────────────────────────────────────────────────
44
45
class TestRepoIngester:
46
def test_raises_on_missing_dir(self):
47
store = _make_store()
48
ingester = RepoIngester(store)
49
with pytest.raises(FileNotFoundError):
50
ingester.ingest("/nonexistent/repo")
51
52
def test_creates_repository_node(self):
53
store = _make_store()
54
ingester = RepoIngester(store)
55
with tempfile.TemporaryDirectory() as tmpdir:
56
ingester.ingest(tmpdir)
57
store.create_node.assert_called_once()
58
label, props = store.create_node.call_args[0]
59
assert label == NodeLabel.Repository
60
assert "name" in props
61
assert "path" in props
62
63
def test_returns_stats_dict(self):
64
store = _make_store()
65
ingester = RepoIngester(store)
66
with tempfile.TemporaryDirectory() as tmpdir:
67
stats = ingester.ingest(tmpdir)
68
assert "files" in stats
69
assert "functions" in stats
70
assert "classes" in stats
71
assert "edges" in stats
72
73
def test_empty_dir_returns_zero_counts(self):
74
store = _make_store()
75
ingester = RepoIngester(store)
76
with tempfile.TemporaryDirectory() as tmpdir:
77
stats = ingester.ingest(tmpdir)
78
assert stats["files"] == 0
79
assert stats["functions"] == 0
80
81
def test_clear_flag_calls_store_clear(self):
82
store = _make_store()
83
ingester = RepoIngester(store)
84
with tempfile.TemporaryDirectory() as tmpdir:
85
ingester.ingest(tmpdir, clear=True)
86
store.clear.assert_called_once()
87
88
def test_no_clear_by_default(self):
89
store = _make_store()
90
ingester = RepoIngester(store)
91
with tempfile.TemporaryDirectory() as tmpdir:
92
ingester.ingest(tmpdir)
93
store.clear.assert_not_called()
94
95
def test_skips_unsupported_extensions(self):
96
store = _make_store()
97
ingester = RepoIngester(store)
98
with tempfile.TemporaryDirectory() as tmpdir:
99
(Path(tmpdir) / "readme.md").write_text("# Readme")
100
(Path(tmpdir) / "config.yaml").write_text("key: val")
101
stats = ingester.ingest(tmpdir)
102
assert stats["files"] == 0
103
104
def test_ingests_python_files_with_mock_parser(self):
105
store = _make_store()
106
ingester = RepoIngester(store)
107
mock_parser = MagicMock()
108
mock_parser.parse_file.return_value = {"functions": 3, "classes": 1, "edges": 5}
109
ingester._parsers["python"] = mock_parser
110
111
with tempfile.TemporaryDirectory() as tmpdir:
112
(Path(tmpdir) / "app.py").write_text("def foo(): pass")
113
stats = ingester.ingest(tmpdir)
114
assert stats["files"] == 1
115
assert stats["functions"] == 3
116
assert stats["classes"] == 1
117
assert stats["edges"] == 5
118
119
def test_ingests_multiple_python_files(self):
120
store = _make_store()
121
ingester = RepoIngester(store)
122
mock_parser = MagicMock()
123
mock_parser.parse_file.return_value = {"functions": 2, "classes": 0, "edges": 1}
124
ingester._parsers["python"] = mock_parser
125
126
with tempfile.TemporaryDirectory() as tmpdir:
127
(Path(tmpdir) / "a.py").write_text("def a(): pass")
128
(Path(tmpdir) / "b.py").write_text("def b(): pass")
129
stats = ingester.ingest(tmpdir)
130
assert stats["files"] == 2
131
assert stats["functions"] == 4
132
133
def test_handles_parse_exception_gracefully(self):
134
store = _make_store()
135
ingester = RepoIngester(store)
136
mock_parser = MagicMock()
137
mock_parser.parse_file.side_effect = Exception("parse error")
138
ingester._parsers["python"] = mock_parser
139
140
with tempfile.TemporaryDirectory() as tmpdir:
141
(Path(tmpdir) / "broken.py").write_text("invalid python @@@@")
142
# Should not raise, just log
143
stats = ingester.ingest(tmpdir)
144
# File was attempted but failed
145
assert stats["functions"] == 0
146
147
def test_ingests_typescript_files_with_mock_parser(self):
148
store = _make_store()
149
ingester = RepoIngester(store)
150
mock_parser = MagicMock()
151
mock_parser.parse_file.return_value = {"functions": 1, "classes": 1, "edges": 2}
152
ingester._parsers["typescript"] = mock_parser
153
154
with tempfile.TemporaryDirectory() as tmpdir:
155
(Path(tmpdir) / "comp.tsx").write_text("const App = () => null")
156
stats = ingester.ingest(tmpdir)
157
assert stats["files"] == 1
158
159
def test_accumulates_stats_across_files(self):
160
store = _make_store()
161
ingester = RepoIngester(store)
162
mock_py = MagicMock()
163
mock_py.parse_file.return_value = {"functions": 5, "classes": 2, "edges": 10}
164
mock_ts = MagicMock()
165
mock_ts.parse_file.return_value = {"functions": 3, "classes": 1, "edges": 5}
166
ingester._parsers["python"] = mock_py
167
ingester._parsers["typescript"] = mock_ts
168
169
with tempfile.TemporaryDirectory() as tmpdir:
170
(Path(tmpdir) / "app.py").write_text("x=1")
171
(Path(tmpdir) / "comp.ts").write_text("const x = 1")
172
stats = ingester.ingest(tmpdir)
173
assert stats["files"] == 2
174
assert stats["functions"] == 8
175
assert stats["classes"] == 3
176
assert stats["edges"] == 15
177
178
179
# ── _iter_source_files() ──────────────────────────────────────────────────────
180
181
class TestIterSourceFiles:
182
def test_yields_python_files(self):
183
store = _make_store()
184
ingester = RepoIngester(store)
185
with tempfile.TemporaryDirectory() as tmpdir:
186
(Path(tmpdir) / "app.py").write_text("x=1")
187
files = list(ingester._iter_source_files(Path(tmpdir)))
188
assert len(files) == 1
189
assert files[0].name == "app.py"
190
191
def test_skips_git_dir(self):
192
store = _make_store()
193
ingester = RepoIngester(store)
194
with tempfile.TemporaryDirectory() as tmpdir:
195
git_dir = Path(tmpdir) / ".git"
196
git_dir.mkdir()
197
(git_dir / "hook.py").write_text("x=1")
198
(Path(tmpdir) / "main.py").write_text("y=2")
199
files = list(ingester._iter_source_files(Path(tmpdir)))
200
assert len(files) == 1
201
assert files[0].name == "main.py"
202
203
def test_skips_node_modules(self):
204
store = _make_store()
205
ingester = RepoIngester(store)
206
with tempfile.TemporaryDirectory() as tmpdir:
207
nm = Path(tmpdir) / "node_modules"
208
nm.mkdir()
209
(nm / "dep.js").write_text("module.exports={}")
210
(Path(tmpdir) / "app.ts").write_text("const x=1")
211
files = list(ingester._iter_source_files(Path(tmpdir)))
212
assert len(files) == 1
213
assert files[0].name == "app.ts"
214
215
def test_skips_pycache(self):
216
store = _make_store()
217
ingester = RepoIngester(store)
218
with tempfile.TemporaryDirectory() as tmpdir:
219
cache = Path(tmpdir) / "__pycache__"
220
cache.mkdir()
221
(cache / "cached.py").write_text("x=1")
222
(Path(tmpdir) / "real.py").write_text("y=2")
223
files = list(ingester._iter_source_files(Path(tmpdir)))
224
names = [f.name for f in files]
225
assert "cached.py" not in names
226
assert "real.py" in names
227
228
def test_skips_non_source_files(self):
229
store = _make_store()
230
ingester = RepoIngester(store)
231
with tempfile.TemporaryDirectory() as tmpdir:
232
(Path(tmpdir) / "readme.md").write_text("# readme")
233
(Path(tmpdir) / "config.json").write_text("{}")
234
files = list(ingester._iter_source_files(Path(tmpdir)))
235
assert len(files) == 0
236
237
def test_recurses_into_subdirs(self):
238
store = _make_store()
239
ingester = RepoIngester(store)
240
with tempfile.TemporaryDirectory() as tmpdir:
241
sub = Path(tmpdir) / "src" / "api"
242
sub.mkdir(parents=True)
243
(sub / "views.py").write_text("x=1")
244
files = list(ingester._iter_source_files(Path(tmpdir)))
245
assert len(files) == 1
246
assert files[0].name == "views.py"
247
248
249
# ── _get_parser() ─────────────────────────────────────────────────────────────
250
251
class TestGetParser:
252
def test_returns_cached_parser(self):
253
store = _make_store()
254
ingester = RepoIngester(store)
255
mock_parser = MagicMock()
256
ingester._parsers["python"] = mock_parser
257
result = ingester._get_parser("python")
258
assert result is mock_parser
259
260
def test_raises_for_unknown_language(self):
261
store = _make_store()
262
ingester = RepoIngester(store)
263
with pytest.raises(ValueError, match="Unsupported language"):
264
ingester._get_parser("brainfuck")
265
266
def test_creates_python_parser_via_lazy_import(self):
267
store = _make_store()
268
ingester = RepoIngester(store)
269
mock_py_parser = MagicMock()
270
mock_py_class = MagicMock(return_value=mock_py_parser)
271
with patch.dict("sys.modules", {
272
"navegador.ingestion.python": MagicMock(PythonParser=mock_py_class)
273
}):
274
result = ingester._get_parser("python")
275
assert result is mock_py_parser
276
mock_py_class.assert_called_once_with()
277
278
def test_creates_typescript_parser_via_lazy_import(self):
279
store = _make_store()
280
ingester = RepoIngester(store)
281
mock_ts_parser = MagicMock()
282
mock_ts_class = MagicMock(return_value=mock_ts_parser)
283
with patch.dict("sys.modules", {
284
"navegador.ingestion.typescript": MagicMock(TypeScriptParser=mock_ts_class)
285
}):
286
result = ingester._get_parser("typescript")
287
assert result is mock_ts_parser
288
mock_ts_class.assert_called_once_with("typescript")
289
290
def test_creates_go_parser_via_lazy_import(self):
291
store = _make_store()
292
ingester = RepoIngester(store)
293
mock_go_parser = MagicMock()
294
mock_go_class = MagicMock(return_value=mock_go_parser)
295
with patch.dict("sys.modules", {
296
"navegador.ingestion.go": MagicMock(GoParser=mock_go_class)
297
}):
298
result = ingester._get_parser("go")
299
assert result is mock_go_parser
300
mock_go_class.assert_called_once_with()
301
302
def test_creates_rust_parser_via_lazy_import(self):
303
store = _make_store()
304
ingester = RepoIngester(store)
305
mock_rust_parser = MagicMock()
306
mock_rust_class = MagicMock(return_value=mock_rust_parser)
307
with patch.dict("sys.modules", {
308
"navegador.ingestion.rust": MagicMock(RustParser=mock_rust_class)
309
}):
310
result = ingester._get_parser("rust")
311
assert result is mock_rust_parser
312
mock_rust_class.assert_called_once_with()
313
314
def test_creates_java_parser_via_lazy_import(self):
315
store = _make_store()
316
ingester = RepoIngester(store)
317
mock_java_parser = MagicMock()
318
mock_java_class = MagicMock(return_value=mock_java_parser)
319
with patch.dict("sys.modules", {
320
"navegador.ingestion.java": MagicMock(JavaParser=mock_java_class)
321
}):
322
result = ingester._get_parser("java")
323
assert result is mock_java_parser
324
mock_java_class.assert_called_once_with()
325
326
327
# ── defensive continue branch ─────────────────────────────────────────────────
328
329
class TestIngesterContinueBranch:
330
def test_skips_file_when_language_not_in_map(self):
331
"""
332
_iter_source_files filters to LANGUAGE_MAP extensions, but ingest()
333
has a defensive `if not language: continue`. Test it by patching
334
_iter_source_files to yield a .txt path.
335
"""
336
import tempfile
337
from pathlib import Path
338
from unittest.mock import patch
339
store = _make_store()
340
ingester = RepoIngester(store)
341
with tempfile.TemporaryDirectory() as tmpdir:
342
rb_file = Path(tmpdir) / "notes.txt"
343
rb_file.write_text("just a text file")
344
with patch.object(ingester, "_iter_source_files", return_value=[rb_file]):
345
stats = ingester.ingest(tmpdir)
346
assert stats["files"] == 0
347
348
349
# ── LanguageParser base class ─────────────────────────────────────────────────
350
351
# ── Incremental ingestion ─────────────────────────────────────────────────────
352
353
class TestIncrementalIngestion:
354
def test_incremental_returns_skipped_count(self):
355
store = _make_store()
356
ingester = RepoIngester(store)
357
with tempfile.TemporaryDirectory() as tmpdir:
358
stats = ingester.ingest(tmpdir, incremental=True)
359
assert "skipped" in stats
360
361
def test_incremental_skips_unchanged_file(self):
362
store = _make_store()
363
ingester = RepoIngester(store)
364
mock_parser = MagicMock()
365
mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
366
ingester._parsers["python"] = mock_parser
367
368
with tempfile.TemporaryDirectory() as tmpdir:
369
py_file = Path(tmpdir) / "app.py"
370
py_file.write_text("def foo(): pass")
371
372
# First ingest: file is new, should be parsed
373
stats1 = ingester.ingest(tmpdir, incremental=True)
374
assert stats1["files"] == 1
375
assert stats1["skipped"] == 0
376
377
# Simulate stored hash matching
378
from navegador.ingestion.parser import _file_hash
379
current_hash = _file_hash(py_file)
380
rel_path = "app.py"
381
382
# Mock _file_unchanged to return True
383
ingester._file_unchanged = MagicMock(return_value=True)
384
stats2 = ingester.ingest(tmpdir, incremental=True)
385
assert stats2["files"] == 0
386
assert stats2["skipped"] == 1
387
388
def test_incremental_reparses_changed_file(self):
389
store = _make_store()
390
ingester = RepoIngester(store)
391
mock_parser = MagicMock()
392
mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
393
ingester._parsers["python"] = mock_parser
394
395
with tempfile.TemporaryDirectory() as tmpdir:
396
py_file = Path(tmpdir) / "app.py"
397
py_file.write_text("def foo(): pass")
398
399
ingester._file_unchanged = MagicMock(return_value=False)
400
ingester._clear_file_subgraph = MagicMock()
401
stats = ingester.ingest(tmpdir, incremental=True)
402
assert stats["files"] == 1
403
ingester._clear_file_subgraph.assert_called_once()
404
405
def test_non_incremental_does_not_check_hash(self):
406
store = _make_store()
407
ingester = RepoIngester(store)
408
mock_parser = MagicMock()
409
mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
410
ingester._parsers["python"] = mock_parser
411
412
with tempfile.TemporaryDirectory() as tmpdir:
413
(Path(tmpdir) / "app.py").write_text("def foo(): pass")
414
ingester._file_unchanged = MagicMock()
415
ingester.ingest(tmpdir, incremental=False)
416
ingester._file_unchanged.assert_not_called()
417
418
def test_file_hash_is_deterministic(self):
419
from navegador.ingestion.parser import _file_hash
420
with tempfile.TemporaryDirectory() as tmpdir:
421
f = Path(tmpdir) / "test.py"
422
f.write_text("x = 1")
423
h1 = _file_hash(f)
424
h2 = _file_hash(f)
425
assert h1 == h2
426
assert len(h1) == 64 # SHA-256 hex
427
428
def test_file_hash_changes_on_content_change(self):
429
from navegador.ingestion.parser import _file_hash
430
with tempfile.TemporaryDirectory() as tmpdir:
431
f = Path(tmpdir) / "test.py"
432
f.write_text("x = 1")
433
h1 = _file_hash(f)
434
f.write_text("x = 2")
435
h2 = _file_hash(f)
436
assert h1 != h2
437
438
439
class TestFileUnchanged:
440
def test_returns_false_for_new_file(self):
441
store = _make_store()
442
store.query.return_value = MagicMock(result_set=[])
443
ingester = RepoIngester(store)
444
assert ingester._file_unchanged("app.py", "abc123") is False
445
446
def test_returns_false_for_null_hash(self):
447
store = _make_store()
448
store.query.return_value = MagicMock(result_set=[[None]])
449
ingester = RepoIngester(store)
450
assert ingester._file_unchanged("app.py", "abc123") is False
451
452
def test_returns_true_when_hash_matches(self):
453
store = _make_store()
454
store.query.return_value = MagicMock(result_set=[["abc123"]])
455
ingester = RepoIngester(store)
456
assert ingester._file_unchanged("app.py", "abc123") is True
457
458
def test_returns_false_when_hash_differs(self):
459
store = _make_store()
460
store.query.return_value = MagicMock(result_set=[["old_hash"]])
461
ingester = RepoIngester(store)
462
assert ingester._file_unchanged("app.py", "new_hash") is False
463
464
465
# ── Redaction integration ─────────────────────────────────────────────────────
466
467
class TestRedaction:
468
def test_constructor_with_redact_true(self):
469
store = _make_store()
470
ingester = RepoIngester(store, redact=True)
471
assert ingester.redact is True
472
assert ingester._detector is not None
473
474
def test_constructor_with_redact_false(self):
475
store = _make_store()
476
ingester = RepoIngester(store, redact=False)
477
assert ingester.redact is False
478
479
def test_maybe_redact_noop_when_disabled(self):
480
store = _make_store()
481
ingester = RepoIngester(store, redact=False)
482
with tempfile.TemporaryDirectory() as tmpdir:
483
f = Path(tmpdir) / "app.py"
484
f.write_text("x = 1")
485
parse_path, root = ingester._maybe_redact_to_tmp(f, Path(tmpdir))
486
assert parse_path == f
487
assert root == Path(tmpdir)
488
489
def test_maybe_redact_returns_original_if_no_sensitive(self):
490
store = _make_store()
491
ingester = RepoIngester(store, redact=True)
492
with tempfile.TemporaryDirectory() as tmpdir:
493
f = Path(tmpdir) / "app.py"
494
f.write_text("def hello(): pass")
495
parse_path, root = ingester._maybe_redact_to_tmp(f, Path(tmpdir))
496
assert parse_path == f
497
498
def test_maybe_redact_creates_temp_for_sensitive(self):
499
store = _make_store()
500
ingester = RepoIngester(store, redact=True)
501
with tempfile.TemporaryDirectory() as tmpdir:
502
f = Path(tmpdir) / "app.py"
503
f.write_text('password = "s3cret123"')
504
parse_path, root = ingester._maybe_redact_to_tmp(f, Path(tmpdir))
505
assert parse_path != f
506
assert root != Path(tmpdir)
507
content = parse_path.read_text()
508
assert "[REDACTED]" in content
509
# Clean up
510
import shutil
511
shutil.rmtree(root, ignore_errors=True)
512
513
def test_maybe_redact_handles_oserror(self):
514
store = _make_store()
515
ingester = RepoIngester(store, redact=True)
516
fake_path = Path("/nonexistent/file.py")
517
parse_path, root = ingester._maybe_redact_to_tmp(fake_path, Path("/nonexistent"))
518
assert parse_path == fake_path
519
520
def test_ingest_with_redact_cleans_up_temp(self):
521
store = _make_store()
522
ingester = RepoIngester(store, redact=True)
523
mock_parser = MagicMock()
524
mock_parser.parse_file.return_value = {"functions": 1, "classes": 0, "edges": 0}
525
ingester._parsers["python"] = mock_parser
526
527
with tempfile.TemporaryDirectory() as tmpdir:
528
f = Path(tmpdir) / "app.py"
529
f.write_text('api_key = "sk-1234567890abcdef1234567890"')
530
ingester.ingest(tmpdir)
531
assert mock_parser.parse_file.called
532
533
534
class TestWatch:
535
def test_watch_raises_on_missing_dir(self):
536
store = _make_store()
537
ingester = RepoIngester(store)
538
with pytest.raises(FileNotFoundError):
539
ingester.watch("/nonexistent/repo")
540
541
def test_watch_calls_callback_and_stops_on_false(self):
542
store = _make_store()
543
ingester = RepoIngester(store)
544
call_count = [0]
545
546
def callback(stats):
547
call_count[0] += 1
548
return False # stop immediately
549
550
with tempfile.TemporaryDirectory() as tmpdir:
551
ingester.watch(tmpdir, interval=0.01, callback=callback)
552
assert call_count[0] == 1
553
554
def test_watch_runs_multiple_cycles(self):
555
store = _make_store()
556
ingester = RepoIngester(store)
557
call_count = [0]
558
559
def callback(stats):
560
call_count[0] += 1
561
return call_count[0] < 3 # run 3 times then stop
562
563
with tempfile.TemporaryDirectory() as tmpdir:
564
ingester.watch(tmpdir, interval=0.01, callback=callback)
565
assert call_count[0] == 3
566
567
568
class TestLanguageParserBase:
569
def test_parse_file_raises_not_implemented(self):
570
from pathlib import Path
571
572
import pytest
573
574
from navegador.ingestion.parser import LanguageParser
575
lp = LanguageParser()
576
with pytest.raises(NotImplementedError):
577
lp.parse_file(Path("/tmp/x.py"), Path("/tmp"), MagicMock())
578

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button