Navegador

navegador / tests / test_optimization.py
Blame History Raw 607 lines
1
"""Tests for navegador.ingestion.optimization (#42 – #45)."""
2
3
from __future__ import annotations
4
5
import tempfile
6
import threading
7
from pathlib import Path
8
from unittest.mock import MagicMock, call, patch
9
10
import pytest
11
12
from navegador.ingestion.optimization import (
13
DiffResult,
14
GraphDiffer,
15
IncrementalParser,
16
NodeDescriptor,
17
ParallelIngester,
18
TreeCache,
19
)
20
21
22
# ── helpers ───────────────────────────────────────────────────────────────────
23
24
25
def _make_store(rows=None):
26
"""Return a MagicMock GraphStore whose query() returns *rows*."""
27
store = MagicMock()
28
store.query.return_value = MagicMock(result_set=rows or [])
29
return store
30
31
32
def _mock_tree(name: str = "tree") -> MagicMock:
33
t = MagicMock()
34
t.__repr__ = lambda self: f"<MockTree {name}>"
35
return t
36
37
38
# ── #42 — TreeCache ───────────────────────────────────────────────────────────
39
40
41
class TestTreeCache:
42
# ── get / put ──────────────────────────────────────────────────────────────
43
44
def test_get_returns_none_on_cold_cache(self):
45
cache = TreeCache()
46
assert cache.get("foo.py", "abc") is None
47
48
def test_put_and_get_roundtrip(self):
49
cache = TreeCache()
50
tree = _mock_tree()
51
cache.put("foo.py", "abc123", tree)
52
assert cache.get("foo.py", "abc123") is tree
53
54
def test_get_miss_does_not_return_wrong_hash(self):
55
cache = TreeCache()
56
tree = _mock_tree()
57
cache.put("foo.py", "hash-A", tree)
58
assert cache.get("foo.py", "hash-B") is None
59
60
def test_get_miss_does_not_return_wrong_path(self):
61
cache = TreeCache()
62
tree = _mock_tree()
63
cache.put("foo.py", "hash-A", tree)
64
assert cache.get("bar.py", "hash-A") is None
65
66
def test_put_overwrites_existing_entry(self):
67
cache = TreeCache()
68
t1 = _mock_tree("t1")
69
t2 = _mock_tree("t2")
70
cache.put("foo.py", "abc", t1)
71
cache.put("foo.py", "abc", t2)
72
assert cache.get("foo.py", "abc") is t2
73
74
# ── LRU eviction ──────────────────────────────────────────────────────────
75
76
def test_evicts_lru_entry_when_full(self):
77
cache = TreeCache(max_size=2)
78
t1 = _mock_tree("t1")
79
t2 = _mock_tree("t2")
80
t3 = _mock_tree("t3")
81
82
cache.put("a.py", "1", t1)
83
cache.put("b.py", "2", t2)
84
# Cache is now full; inserting t3 should evict t1 (LRU).
85
cache.put("c.py", "3", t3)
86
87
assert cache.get("a.py", "1") is None
88
assert cache.get("b.py", "2") is t2
89
assert cache.get("c.py", "3") is t3
90
91
def test_get_promotes_entry_so_it_is_not_evicted(self):
92
cache = TreeCache(max_size=2)
93
t1 = _mock_tree("t1")
94
t2 = _mock_tree("t2")
95
t3 = _mock_tree("t3")
96
97
cache.put("a.py", "1", t1)
98
cache.put("b.py", "2", t2)
99
# Touch t1 so it becomes the most-recently used.
100
cache.get("a.py", "1")
101
# t2 is now the LRU; adding t3 should evict t2.
102
cache.put("c.py", "3", t3)
103
104
assert cache.get("a.py", "1") is t1
105
assert cache.get("b.py", "2") is None
106
assert cache.get("c.py", "3") is t3
107
108
def test_size_respects_max_size(self):
109
cache = TreeCache(max_size=3)
110
for i in range(10):
111
cache.put(f"file{i}.py", str(i), _mock_tree())
112
assert len(cache) <= 3
113
114
def test_constructor_rejects_zero_max_size(self):
115
with pytest.raises(ValueError):
116
TreeCache(max_size=0)
117
118
# ── stats ──────────────────────────────────────────────────────────────────
119
120
def test_stats_initial_state(self):
121
cache = TreeCache()
122
s = cache.stats()
123
assert s["hits"] == 0
124
assert s["misses"] == 0
125
assert s["size"] == 0
126
127
def test_stats_records_hits(self):
128
cache = TreeCache()
129
cache.put("x.py", "h", _mock_tree())
130
cache.get("x.py", "h")
131
cache.get("x.py", "h")
132
assert cache.stats()["hits"] == 2
133
134
def test_stats_records_misses(self):
135
cache = TreeCache()
136
cache.get("x.py", "h")
137
cache.get("y.py", "h")
138
assert cache.stats()["misses"] == 2
139
140
def test_stats_size_tracks_entries(self):
141
cache = TreeCache(max_size=10)
142
cache.put("a.py", "1", _mock_tree())
143
cache.put("b.py", "2", _mock_tree())
144
assert cache.stats()["size"] == 2
145
146
def test_stats_max_size_reported(self):
147
cache = TreeCache(max_size=42)
148
assert cache.stats()["max_size"] == 42
149
150
# ── clear ──────────────────────────────────────────────────────────────────
151
152
def test_clear_removes_all_entries(self):
153
cache = TreeCache()
154
cache.put("a.py", "1", _mock_tree())
155
cache.put("b.py", "2", _mock_tree())
156
cache.clear()
157
assert len(cache) == 0
158
assert cache.get("a.py", "1") is None
159
160
def test_clear_resets_stats(self):
161
cache = TreeCache()
162
cache.put("a.py", "1", _mock_tree())
163
cache.get("a.py", "1")
164
cache.get("a.py", "bad")
165
cache.clear()
166
s = cache.stats()
167
assert s["hits"] == 0
168
assert s["misses"] == 0
169
assert s["size"] == 0
170
171
# ── thread safety ──────────────────────────────────────────────────────────
172
173
def test_concurrent_puts_do_not_corrupt_state(self):
174
cache = TreeCache(max_size=50)
175
errors = []
176
177
def writer(n: int) -> None:
178
try:
179
for i in range(20):
180
cache.put(f"file{n}_{i}.py", str(i), _mock_tree())
181
except Exception as exc: # noqa: BLE001
182
errors.append(exc)
183
184
threads = [threading.Thread(target=writer, args=(t,)) for t in range(5)]
185
for t in threads:
186
t.start()
187
for t in threads:
188
t.join()
189
190
assert not errors
191
assert len(cache) <= 50
192
193
194
# ── #43 — IncrementalParser ───────────────────────────────────────────────────
195
196
197
class TestIncrementalParser:
198
def _make_language_and_parser(self):
199
"""
200
Return a fake tree-sitter Language object whose parser.parse()
201
returns a fresh MagicMock tree.
202
"""
203
fake_tree = _mock_tree("parsed")
204
fake_parser = MagicMock()
205
fake_parser.parse.return_value = fake_tree
206
fake_language = MagicMock()
207
208
# Patch tree_sitter.Parser so IncrementalParser can instantiate it.
209
mock_ts_parser = MagicMock()
210
mock_ts_parser.parse.return_value = fake_tree
211
mock_ts_class = MagicMock(return_value=mock_ts_parser)
212
213
return fake_tree, mock_ts_parser, mock_ts_class, fake_language
214
215
def test_parse_returns_tree(self):
216
cache = TreeCache()
217
inc = IncrementalParser(cache)
218
219
fake_tree = _mock_tree()
220
mock_ts_parser = MagicMock()
221
mock_ts_parser.parse.return_value = fake_tree
222
223
with patch("tree_sitter.Parser", return_value=mock_ts_parser):
224
result = inc.parse(b"source", MagicMock(), "foo.py", "hash1")
225
226
assert result is fake_tree
227
228
def test_parse_stores_tree_in_cache(self):
229
cache = TreeCache()
230
inc = IncrementalParser(cache)
231
232
fake_tree = _mock_tree()
233
mock_ts_parser = MagicMock()
234
mock_ts_parser.parse.return_value = fake_tree
235
236
with patch("tree_sitter.Parser", return_value=mock_ts_parser):
237
inc.parse(b"source", MagicMock(), "foo.py", "hash1")
238
239
assert cache.get("foo.py", "hash1") is fake_tree
240
241
def test_parse_returns_cached_tree_without_calling_parser(self):
242
cached_tree = _mock_tree("cached")
243
cache = TreeCache()
244
cache.put("foo.py", "hash1", cached_tree)
245
246
inc = IncrementalParser(cache)
247
mock_ts_parser = MagicMock()
248
249
with patch("tree_sitter.Parser", return_value=mock_ts_parser):
250
result = inc.parse(b"source", MagicMock(), "foo.py", "hash1")
251
252
assert result is cached_tree
253
mock_ts_parser.parse.assert_not_called()
254
255
def test_cache_hit_increments_hit_count(self):
256
cache = TreeCache()
257
tree = _mock_tree()
258
cache.put("foo.py", "hashX", tree)
259
260
inc = IncrementalParser(cache)
261
with patch("tree_sitter.Parser", return_value=MagicMock()):
262
inc.parse(b"src", MagicMock(), "foo.py", "hashX")
263
264
assert cache.stats()["hits"] == 1
265
266
def test_parse_passes_old_tree_on_rehash(self):
267
"""When a stale tree exists for the same path, it is passed as old_tree."""
268
cache = TreeCache()
269
stale_tree = _mock_tree("stale")
270
cache.put("bar.py", "old-hash", stale_tree)
271
272
new_tree = _mock_tree("new")
273
mock_ts_parser = MagicMock()
274
mock_ts_parser.parse.return_value = new_tree
275
276
inc = IncrementalParser(cache)
277
with patch("tree_sitter.Parser", return_value=mock_ts_parser):
278
result = inc.parse(b"new source", MagicMock(), "bar.py", "new-hash")
279
280
assert result is new_tree
281
# old_tree must have been passed as the second positional argument.
282
mock_ts_parser.parse.assert_called_once_with(b"new source", stale_tree)
283
284
def test_parse_without_old_tree_calls_parse_with_source_only(self):
285
cache = TreeCache()
286
new_tree = _mock_tree()
287
mock_ts_parser = MagicMock()
288
mock_ts_parser.parse.return_value = new_tree
289
290
inc = IncrementalParser(cache)
291
with patch("tree_sitter.Parser", return_value=mock_ts_parser):
292
inc.parse(b"source", MagicMock(), "baz.py", "hash1")
293
294
mock_ts_parser.parse.assert_called_once_with(b"source")
295
296
def test_default_cache_is_created_if_none_given(self):
297
inc = IncrementalParser()
298
assert isinstance(inc.cache, TreeCache)
299
300
def test_custom_cache_is_used(self):
301
cache = TreeCache(max_size=5)
302
inc = IncrementalParser(cache)
303
assert inc.cache is cache
304
305
def test_fallback_when_tree_sitter_not_importable(self):
306
"""When tree_sitter is unavailable, language is used directly as parser."""
307
cache = TreeCache()
308
fake_tree = _mock_tree()
309
fake_language = MagicMock()
310
fake_language.parse.return_value = fake_tree
311
312
inc = IncrementalParser(cache)
313
314
import builtins
315
316
real_import = builtins.__import__
317
318
def _block_tree_sitter(name, *args, **kwargs):
319
if name == "tree_sitter":
320
raise ImportError("mocked absence")
321
return real_import(name, *args, **kwargs)
322
323
with patch("builtins.__import__", side_effect=_block_tree_sitter):
324
result = inc.parse(b"source", fake_language, "x.py", "h1")
325
326
assert result is fake_tree
327
328
329
# ── #44 — GraphDiffer ─────────────────────────────────────────────────────────
330
331
332
def _nd(label: str, name: str, line_start: int, **extra) -> NodeDescriptor:
333
return NodeDescriptor(label=label, name=name, line_start=line_start, extra=extra)
334
335
336
class TestNodeDescriptor:
337
def test_identity_key(self):
338
nd = _nd("Function", "foo", 10)
339
assert nd.identity_key() == ("Function", "foo", 10)
340
341
def test_equality_same(self):
342
assert _nd("Function", "foo", 10) == _nd("Function", "foo", 10)
343
344
def test_equality_different_line(self):
345
assert _nd("Function", "foo", 10) != _nd("Function", "foo", 11)
346
347
def test_equality_different_extra(self):
348
a = _nd("Function", "foo", 10, docstring="hello")
349
b = _nd("Function", "foo", 10, docstring="world")
350
assert a != b
351
352
353
class TestGraphDiffer:
354
def test_diff_empty_new_and_empty_existing(self):
355
store = _make_store(rows=[])
356
differ = GraphDiffer(store)
357
result = differ.diff_file("src/app.py", [])
358
assert result == DiffResult(added=0, modified=0, unchanged=0, removed=0)
359
360
def test_diff_all_new_nodes(self):
361
store = _make_store(rows=[])
362
differ = GraphDiffer(store)
363
nodes = [
364
_nd("Function", "foo", 1),
365
_nd("Class", "Bar", 10),
366
]
367
result = differ.diff_file("src/app.py", nodes)
368
assert result.added == 2
369
assert result.modified == 0
370
assert result.unchanged == 0
371
assert result.removed == 0
372
373
def test_diff_all_unchanged_nodes(self):
374
store = _make_store(rows=[
375
["Function", "foo", 1],
376
["Class", "Bar", 10],
377
])
378
differ = GraphDiffer(store)
379
nodes = [
380
_nd("Function", "foo", 1),
381
_nd("Class", "Bar", 10),
382
]
383
result = differ.diff_file("src/app.py", nodes)
384
assert result.unchanged == 2
385
assert result.added == 0
386
assert result.modified == 0
387
assert result.removed == 0
388
389
def test_diff_modified_node(self):
390
"""Same identity key but different extra props counts as modified."""
391
store = _make_store(rows=[["Function", "foo", 1]])
392
differ = GraphDiffer(store)
393
# Existing node in store has no extra; new node has docstring.
394
nodes = [_nd("Function", "foo", 1, docstring="now documented")]
395
result = differ.diff_file("src/app.py", nodes)
396
# The identity key matches but extra differs → modified.
397
assert result.modified == 1
398
assert result.unchanged == 0
399
assert result.added == 0
400
401
def test_diff_removed_nodes(self):
402
store = _make_store(rows=[
403
["Function", "foo", 1],
404
["Function", "bar", 5],
405
])
406
differ = GraphDiffer(store)
407
# Only foo is present in new parse; bar was removed.
408
nodes = [_nd("Function", "foo", 1)]
409
result = differ.diff_file("src/app.py", nodes)
410
assert result.removed == 1
411
assert result.unchanged == 1
412
413
def test_diff_mixed_scenario(self):
414
store = _make_store(rows=[
415
["Function", "old_func", 1],
416
["Class", "MyClass", 20],
417
])
418
differ = GraphDiffer(store)
419
new_nodes = [
420
_nd("Class", "MyClass", 20), # unchanged
421
_nd("Function", "new_func", 5), # added
422
]
423
result = differ.diff_file("src/app.py", new_nodes)
424
assert result.unchanged == 1
425
assert result.added == 1
426
assert result.removed == 1
427
assert result.modified == 0
428
429
def test_diff_skips_rows_with_none_name(self):
430
store = _make_store(rows=[[None, None, None]])
431
differ = GraphDiffer(store)
432
result = differ.diff_file("src/app.py", [_nd("Function", "foo", 1)])
433
# The None row is skipped; foo is treated as a new node.
434
assert result.added == 1
435
assert result.removed == 0
436
437
def test_total_changes_property(self):
438
result = DiffResult(added=3, modified=1, unchanged=5, removed=2)
439
assert result.total_changes == 6
440
441
def test_store_is_queried_with_file_path(self):
442
store = _make_store(rows=[])
443
differ = GraphDiffer(store)
444
differ.diff_file("src/models.py", [])
445
# Ensure the store was actually queried with the right path param.
446
store.query.assert_called_once()
447
_, kwargs_or_positional = store.query.call_args[0], store.query.call_args
448
# The second positional arg to store.query should contain file_path.
449
call_params = store.query.call_args[0][1]
450
assert call_params["file_path"] == "src/models.py"
451
452
453
# ── #45 — ParallelIngester ────────────────────────────────────────────────────
454
455
456
class TestParallelIngester:
457
def _setup_ingester_with_mock_parser(self, store, parse_result=None):
458
"""
459
Return a ParallelIngester whose internal RepoIngester has a mock
460
Python parser installed.
461
"""
462
if parse_result is None:
463
parse_result = {"functions": 2, "classes": 1, "edges": 3}
464
465
ingester = ParallelIngester(store)
466
mock_parser = MagicMock()
467
mock_parser.parse_file.return_value = parse_result
468
ingester._ingester._parsers["python"] = mock_parser
469
return ingester, mock_parser
470
471
def test_raises_on_missing_dir(self):
472
store = _make_store()
473
ingester = ParallelIngester(store)
474
with pytest.raises(FileNotFoundError):
475
ingester.ingest_parallel("/nonexistent/path")
476
477
def test_returns_stats_dict_with_all_keys(self):
478
store = _make_store()
479
ingester, _ = self._setup_ingester_with_mock_parser(store)
480
with tempfile.TemporaryDirectory() as tmpdir:
481
stats = ingester.ingest_parallel(tmpdir)
482
assert {"files", "functions", "classes", "edges", "skipped", "errors"} <= set(stats)
483
484
def test_processes_single_file(self):
485
store = _make_store()
486
ingester, mock_parser = self._setup_ingester_with_mock_parser(
487
store, {"functions": 3, "classes": 1, "edges": 4}
488
)
489
with tempfile.TemporaryDirectory() as tmpdir:
490
(Path(tmpdir) / "app.py").write_text("def foo(): pass")
491
stats = ingester.ingest_parallel(tmpdir)
492
493
assert stats["files"] == 1
494
assert stats["functions"] == 3
495
assert stats["classes"] == 1
496
assert stats["edges"] == 4
497
assert stats["errors"] == 0
498
499
def test_processes_multiple_files_concurrently(self):
500
store = _make_store()
501
ingester, mock_parser = self._setup_ingester_with_mock_parser(
502
store, {"functions": 1, "classes": 0, "edges": 0}
503
)
504
with tempfile.TemporaryDirectory() as tmpdir:
505
for i in range(5):
506
(Path(tmpdir) / f"mod{i}.py").write_text(f"def f{i}(): pass")
507
stats = ingester.ingest_parallel(tmpdir, max_workers=3)
508
509
assert stats["files"] == 5
510
assert stats["functions"] == 5
511
assert stats["errors"] == 0
512
513
def test_aggregates_stats_across_files(self):
514
store = _make_store()
515
ingester, _ = self._setup_ingester_with_mock_parser(
516
store, {"functions": 2, "classes": 1, "edges": 5}
517
)
518
with tempfile.TemporaryDirectory() as tmpdir:
519
(Path(tmpdir) / "a.py").write_text("x=1")
520
(Path(tmpdir) / "b.py").write_text("y=2")
521
stats = ingester.ingest_parallel(tmpdir)
522
523
assert stats["files"] == 2
524
assert stats["functions"] == 4
525
assert stats["classes"] == 2
526
assert stats["edges"] == 10
527
528
def test_clear_flag_calls_store_clear(self):
529
store = _make_store()
530
ingester, _ = self._setup_ingester_with_mock_parser(store)
531
with tempfile.TemporaryDirectory() as tmpdir:
532
ingester.ingest_parallel(tmpdir, clear=True)
533
store.clear.assert_called_once()
534
535
def test_no_clear_by_default(self):
536
store = _make_store()
537
ingester, _ = self._setup_ingester_with_mock_parser(store)
538
with tempfile.TemporaryDirectory() as tmpdir:
539
ingester.ingest_parallel(tmpdir)
540
store.clear.assert_not_called()
541
542
def test_empty_repo_returns_zero_counts(self):
543
store = _make_store()
544
ingester, _ = self._setup_ingester_with_mock_parser(store)
545
with tempfile.TemporaryDirectory() as tmpdir:
546
stats = ingester.ingest_parallel(tmpdir)
547
assert stats["files"] == 0
548
assert stats["functions"] == 0
549
550
def test_parser_exception_increments_errors_not_files(self):
551
store = _make_store()
552
ingester = ParallelIngester(store)
553
broken_parser = MagicMock()
554
broken_parser.parse_file.side_effect = RuntimeError("boom")
555
ingester._ingester._parsers["python"] = broken_parser
556
557
with tempfile.TemporaryDirectory() as tmpdir:
558
(Path(tmpdir) / "broken.py").write_text("def x(): pass")
559
stats = ingester.ingest_parallel(tmpdir)
560
561
assert stats["files"] == 0
562
assert stats["errors"] == 1
563
564
def test_incremental_skips_unchanged_files(self):
565
store = _make_store()
566
ingester, mock_parser = self._setup_ingester_with_mock_parser(store)
567
ingester._ingester._file_unchanged = MagicMock(return_value=True)
568
569
with tempfile.TemporaryDirectory() as tmpdir:
570
(Path(tmpdir) / "unchanged.py").write_text("x=1")
571
stats = ingester.ingest_parallel(tmpdir, incremental=True)
572
573
assert stats["skipped"] == 1
574
assert stats["files"] == 0
575
mock_parser.parse_file.assert_not_called()
576
577
def test_creates_repository_node(self):
578
store = _make_store()
579
ingester, _ = self._setup_ingester_with_mock_parser(store)
580
with tempfile.TemporaryDirectory() as tmpdir:
581
ingester.ingest_parallel(tmpdir)
582
583
from navegador.graph.schema import NodeLabel
584
585
store.create_node.assert_called_once()
586
label, props = store.create_node.call_args[0]
587
assert label == NodeLabel.Repository
588
assert "name" in props and "path" in props
589
590
def test_max_workers_none_uses_default(self):
591
"""Passing max_workers=None should not raise."""
592
store = _make_store()
593
ingester, _ = self._setup_ingester_with_mock_parser(store)
594
with tempfile.TemporaryDirectory() as tmpdir:
595
stats = ingester.ingest_parallel(tmpdir, max_workers=None)
596
assert isinstance(stats, dict)
597
598
def test_skips_non_python_files(self):
599
store = _make_store()
600
ingester, mock_parser = self._setup_ingester_with_mock_parser(store)
601
with tempfile.TemporaryDirectory() as tmpdir:
602
(Path(tmpdir) / "readme.md").write_text("# readme")
603
(Path(tmpdir) / "config.yaml").write_text("key: value")
604
stats = ingester.ingest_parallel(tmpdir)
605
assert stats["files"] == 0
606
mock_parser.parse_file.assert_not_called()
607

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button