Navegador

navegador / tests / test_optimization.py
Source Blame History 606 lines
a24deda… lmata 1 """Tests for navegador.ingestion.optimization (#42 – #45)."""
a24deda… lmata 2
a24deda… lmata 3 from __future__ import annotations
a24deda… lmata 4
a24deda… lmata 5 import tempfile
a24deda… lmata 6 import threading
a24deda… lmata 7 from pathlib import Path
a24deda… lmata 8 from unittest.mock import MagicMock, call, patch
a24deda… lmata 9
a24deda… lmata 10 import pytest
a24deda… lmata 11
a24deda… lmata 12 from navegador.ingestion.optimization import (
a24deda… lmata 13 DiffResult,
a24deda… lmata 14 GraphDiffer,
a24deda… lmata 15 IncrementalParser,
a24deda… lmata 16 NodeDescriptor,
a24deda… lmata 17 ParallelIngester,
a24deda… lmata 18 TreeCache,
a24deda… lmata 19 )
a24deda… lmata 20
a24deda… lmata 21
a24deda… lmata 22 # ── helpers ───────────────────────────────────────────────────────────────────
a24deda… lmata 23
a24deda… lmata 24
a24deda… lmata 25 def _make_store(rows=None):
a24deda… lmata 26 """Return a MagicMock GraphStore whose query() returns *rows*."""
a24deda… lmata 27 store = MagicMock()
a24deda… lmata 28 store.query.return_value = MagicMock(result_set=rows or [])
a24deda… lmata 29 return store
a24deda… lmata 30
a24deda… lmata 31
a24deda… lmata 32 def _mock_tree(name: str = "tree") -> MagicMock:
a24deda… lmata 33 t = MagicMock()
a24deda… lmata 34 t.__repr__ = lambda self: f"<MockTree {name}>"
a24deda… lmata 35 return t
a24deda… lmata 36
a24deda… lmata 37
a24deda… lmata 38 # ── #42 — TreeCache ───────────────────────────────────────────────────────────
a24deda… lmata 39
a24deda… lmata 40
a24deda… lmata 41 class TestTreeCache:
a24deda… lmata 42 # ── get / put ──────────────────────────────────────────────────────────────
a24deda… lmata 43
a24deda… lmata 44 def test_get_returns_none_on_cold_cache(self):
a24deda… lmata 45 cache = TreeCache()
a24deda… lmata 46 assert cache.get("foo.py", "abc") is None
a24deda… lmata 47
a24deda… lmata 48 def test_put_and_get_roundtrip(self):
a24deda… lmata 49 cache = TreeCache()
a24deda… lmata 50 tree = _mock_tree()
a24deda… lmata 51 cache.put("foo.py", "abc123", tree)
a24deda… lmata 52 assert cache.get("foo.py", "abc123") is tree
a24deda… lmata 53
a24deda… lmata 54 def test_get_miss_does_not_return_wrong_hash(self):
a24deda… lmata 55 cache = TreeCache()
a24deda… lmata 56 tree = _mock_tree()
a24deda… lmata 57 cache.put("foo.py", "hash-A", tree)
a24deda… lmata 58 assert cache.get("foo.py", "hash-B") is None
a24deda… lmata 59
a24deda… lmata 60 def test_get_miss_does_not_return_wrong_path(self):
a24deda… lmata 61 cache = TreeCache()
a24deda… lmata 62 tree = _mock_tree()
a24deda… lmata 63 cache.put("foo.py", "hash-A", tree)
a24deda… lmata 64 assert cache.get("bar.py", "hash-A") is None
a24deda… lmata 65
a24deda… lmata 66 def test_put_overwrites_existing_entry(self):
a24deda… lmata 67 cache = TreeCache()
a24deda… lmata 68 t1 = _mock_tree("t1")
a24deda… lmata 69 t2 = _mock_tree("t2")
a24deda… lmata 70 cache.put("foo.py", "abc", t1)
a24deda… lmata 71 cache.put("foo.py", "abc", t2)
a24deda… lmata 72 assert cache.get("foo.py", "abc") is t2
a24deda… lmata 73
a24deda… lmata 74 # ── LRU eviction ──────────────────────────────────────────────────────────
a24deda… lmata 75
a24deda… lmata 76 def test_evicts_lru_entry_when_full(self):
a24deda… lmata 77 cache = TreeCache(max_size=2)
a24deda… lmata 78 t1 = _mock_tree("t1")
a24deda… lmata 79 t2 = _mock_tree("t2")
a24deda… lmata 80 t3 = _mock_tree("t3")
a24deda… lmata 81
a24deda… lmata 82 cache.put("a.py", "1", t1)
a24deda… lmata 83 cache.put("b.py", "2", t2)
a24deda… lmata 84 # Cache is now full; inserting t3 should evict t1 (LRU).
a24deda… lmata 85 cache.put("c.py", "3", t3)
a24deda… lmata 86
a24deda… lmata 87 assert cache.get("a.py", "1") is None
a24deda… lmata 88 assert cache.get("b.py", "2") is t2
a24deda… lmata 89 assert cache.get("c.py", "3") is t3
a24deda… lmata 90
a24deda… lmata 91 def test_get_promotes_entry_so_it_is_not_evicted(self):
a24deda… lmata 92 cache = TreeCache(max_size=2)
a24deda… lmata 93 t1 = _mock_tree("t1")
a24deda… lmata 94 t2 = _mock_tree("t2")
a24deda… lmata 95 t3 = _mock_tree("t3")
a24deda… lmata 96
a24deda… lmata 97 cache.put("a.py", "1", t1)
a24deda… lmata 98 cache.put("b.py", "2", t2)
a24deda… lmata 99 # Touch t1 so it becomes the most-recently used.
a24deda… lmata 100 cache.get("a.py", "1")
a24deda… lmata 101 # t2 is now the LRU; adding t3 should evict t2.
a24deda… lmata 102 cache.put("c.py", "3", t3)
a24deda… lmata 103
a24deda… lmata 104 assert cache.get("a.py", "1") is t1
a24deda… lmata 105 assert cache.get("b.py", "2") is None
a24deda… lmata 106 assert cache.get("c.py", "3") is t3
a24deda… lmata 107
a24deda… lmata 108 def test_size_respects_max_size(self):
a24deda… lmata 109 cache = TreeCache(max_size=3)
a24deda… lmata 110 for i in range(10):
a24deda… lmata 111 cache.put(f"file{i}.py", str(i), _mock_tree())
a24deda… lmata 112 assert len(cache) <= 3
a24deda… lmata 113
a24deda… lmata 114 def test_constructor_rejects_zero_max_size(self):
a24deda… lmata 115 with pytest.raises(ValueError):
a24deda… lmata 116 TreeCache(max_size=0)
a24deda… lmata 117
a24deda… lmata 118 # ── stats ──────────────────────────────────────────────────────────────────
a24deda… lmata 119
a24deda… lmata 120 def test_stats_initial_state(self):
a24deda… lmata 121 cache = TreeCache()
a24deda… lmata 122 s = cache.stats()
a24deda… lmata 123 assert s["hits"] == 0
a24deda… lmata 124 assert s["misses"] == 0
a24deda… lmata 125 assert s["size"] == 0
a24deda… lmata 126
a24deda… lmata 127 def test_stats_records_hits(self):
a24deda… lmata 128 cache = TreeCache()
a24deda… lmata 129 cache.put("x.py", "h", _mock_tree())
a24deda… lmata 130 cache.get("x.py", "h")
a24deda… lmata 131 cache.get("x.py", "h")
a24deda… lmata 132 assert cache.stats()["hits"] == 2
a24deda… lmata 133
a24deda… lmata 134 def test_stats_records_misses(self):
a24deda… lmata 135 cache = TreeCache()
a24deda… lmata 136 cache.get("x.py", "h")
a24deda… lmata 137 cache.get("y.py", "h")
a24deda… lmata 138 assert cache.stats()["misses"] == 2
a24deda… lmata 139
a24deda… lmata 140 def test_stats_size_tracks_entries(self):
a24deda… lmata 141 cache = TreeCache(max_size=10)
a24deda… lmata 142 cache.put("a.py", "1", _mock_tree())
a24deda… lmata 143 cache.put("b.py", "2", _mock_tree())
a24deda… lmata 144 assert cache.stats()["size"] == 2
a24deda… lmata 145
a24deda… lmata 146 def test_stats_max_size_reported(self):
a24deda… lmata 147 cache = TreeCache(max_size=42)
a24deda… lmata 148 assert cache.stats()["max_size"] == 42
a24deda… lmata 149
a24deda… lmata 150 # ── clear ──────────────────────────────────────────────────────────────────
a24deda… lmata 151
a24deda… lmata 152 def test_clear_removes_all_entries(self):
a24deda… lmata 153 cache = TreeCache()
a24deda… lmata 154 cache.put("a.py", "1", _mock_tree())
a24deda… lmata 155 cache.put("b.py", "2", _mock_tree())
a24deda… lmata 156 cache.clear()
a24deda… lmata 157 assert len(cache) == 0
a24deda… lmata 158 assert cache.get("a.py", "1") is None
a24deda… lmata 159
a24deda… lmata 160 def test_clear_resets_stats(self):
a24deda… lmata 161 cache = TreeCache()
a24deda… lmata 162 cache.put("a.py", "1", _mock_tree())
a24deda… lmata 163 cache.get("a.py", "1")
a24deda… lmata 164 cache.get("a.py", "bad")
a24deda… lmata 165 cache.clear()
a24deda… lmata 166 s = cache.stats()
a24deda… lmata 167 assert s["hits"] == 0
a24deda… lmata 168 assert s["misses"] == 0
a24deda… lmata 169 assert s["size"] == 0
a24deda… lmata 170
a24deda… lmata 171 # ── thread safety ──────────────────────────────────────────────────────────
a24deda… lmata 172
a24deda… lmata 173 def test_concurrent_puts_do_not_corrupt_state(self):
a24deda… lmata 174 cache = TreeCache(max_size=50)
a24deda… lmata 175 errors = []
a24deda… lmata 176
a24deda… lmata 177 def writer(n: int) -> None:
a24deda… lmata 178 try:
a24deda… lmata 179 for i in range(20):
a24deda… lmata 180 cache.put(f"file{n}_{i}.py", str(i), _mock_tree())
a24deda… lmata 181 except Exception as exc: # noqa: BLE001
a24deda… lmata 182 errors.append(exc)
a24deda… lmata 183
a24deda… lmata 184 threads = [threading.Thread(target=writer, args=(t,)) for t in range(5)]
a24deda… lmata 185 for t in threads:
a24deda… lmata 186 t.start()
a24deda… lmata 187 for t in threads:
a24deda… lmata 188 t.join()
a24deda… lmata 189
a24deda… lmata 190 assert not errors
a24deda… lmata 191 assert len(cache) <= 50
a24deda… lmata 192
a24deda… lmata 193
a24deda… lmata 194 # ── #43 — IncrementalParser ───────────────────────────────────────────────────
a24deda… lmata 195
a24deda… lmata 196
a24deda… lmata 197 class TestIncrementalParser:
a24deda… lmata 198 def _make_language_and_parser(self):
a24deda… lmata 199 """
a24deda… lmata 200 Return a fake tree-sitter Language object whose parser.parse()
a24deda… lmata 201 returns a fresh MagicMock tree.
a24deda… lmata 202 """
a24deda… lmata 203 fake_tree = _mock_tree("parsed")
a24deda… lmata 204 fake_parser = MagicMock()
a24deda… lmata 205 fake_parser.parse.return_value = fake_tree
a24deda… lmata 206 fake_language = MagicMock()
a24deda… lmata 207
a24deda… lmata 208 # Patch tree_sitter.Parser so IncrementalParser can instantiate it.
a24deda… lmata 209 mock_ts_parser = MagicMock()
a24deda… lmata 210 mock_ts_parser.parse.return_value = fake_tree
a24deda… lmata 211 mock_ts_class = MagicMock(return_value=mock_ts_parser)
a24deda… lmata 212
a24deda… lmata 213 return fake_tree, mock_ts_parser, mock_ts_class, fake_language
a24deda… lmata 214
a24deda… lmata 215 def test_parse_returns_tree(self):
a24deda… lmata 216 cache = TreeCache()
a24deda… lmata 217 inc = IncrementalParser(cache)
a24deda… lmata 218
a24deda… lmata 219 fake_tree = _mock_tree()
a24deda… lmata 220 mock_ts_parser = MagicMock()
a24deda… lmata 221 mock_ts_parser.parse.return_value = fake_tree
a24deda… lmata 222
a24deda… lmata 223 with patch("tree_sitter.Parser", return_value=mock_ts_parser):
a24deda… lmata 224 result = inc.parse(b"source", MagicMock(), "foo.py", "hash1")
a24deda… lmata 225
a24deda… lmata 226 assert result is fake_tree
a24deda… lmata 227
a24deda… lmata 228 def test_parse_stores_tree_in_cache(self):
a24deda… lmata 229 cache = TreeCache()
a24deda… lmata 230 inc = IncrementalParser(cache)
a24deda… lmata 231
a24deda… lmata 232 fake_tree = _mock_tree()
a24deda… lmata 233 mock_ts_parser = MagicMock()
a24deda… lmata 234 mock_ts_parser.parse.return_value = fake_tree
a24deda… lmata 235
a24deda… lmata 236 with patch("tree_sitter.Parser", return_value=mock_ts_parser):
a24deda… lmata 237 inc.parse(b"source", MagicMock(), "foo.py", "hash1")
a24deda… lmata 238
a24deda… lmata 239 assert cache.get("foo.py", "hash1") is fake_tree
a24deda… lmata 240
a24deda… lmata 241 def test_parse_returns_cached_tree_without_calling_parser(self):
a24deda… lmata 242 cached_tree = _mock_tree("cached")
a24deda… lmata 243 cache = TreeCache()
a24deda… lmata 244 cache.put("foo.py", "hash1", cached_tree)
a24deda… lmata 245
a24deda… lmata 246 inc = IncrementalParser(cache)
a24deda… lmata 247 mock_ts_parser = MagicMock()
a24deda… lmata 248
a24deda… lmata 249 with patch("tree_sitter.Parser", return_value=mock_ts_parser):
a24deda… lmata 250 result = inc.parse(b"source", MagicMock(), "foo.py", "hash1")
a24deda… lmata 251
a24deda… lmata 252 assert result is cached_tree
a24deda… lmata 253 mock_ts_parser.parse.assert_not_called()
a24deda… lmata 254
a24deda… lmata 255 def test_cache_hit_increments_hit_count(self):
a24deda… lmata 256 cache = TreeCache()
a24deda… lmata 257 tree = _mock_tree()
a24deda… lmata 258 cache.put("foo.py", "hashX", tree)
a24deda… lmata 259
a24deda… lmata 260 inc = IncrementalParser(cache)
a24deda… lmata 261 with patch("tree_sitter.Parser", return_value=MagicMock()):
a24deda… lmata 262 inc.parse(b"src", MagicMock(), "foo.py", "hashX")
a24deda… lmata 263
a24deda… lmata 264 assert cache.stats()["hits"] == 1
a24deda… lmata 265
a24deda… lmata 266 def test_parse_passes_old_tree_on_rehash(self):
a24deda… lmata 267 """When a stale tree exists for the same path, it is passed as old_tree."""
a24deda… lmata 268 cache = TreeCache()
a24deda… lmata 269 stale_tree = _mock_tree("stale")
a24deda… lmata 270 cache.put("bar.py", "old-hash", stale_tree)
a24deda… lmata 271
a24deda… lmata 272 new_tree = _mock_tree("new")
a24deda… lmata 273 mock_ts_parser = MagicMock()
a24deda… lmata 274 mock_ts_parser.parse.return_value = new_tree
a24deda… lmata 275
a24deda… lmata 276 inc = IncrementalParser(cache)
a24deda… lmata 277 with patch("tree_sitter.Parser", return_value=mock_ts_parser):
a24deda… lmata 278 result = inc.parse(b"new source", MagicMock(), "bar.py", "new-hash")
a24deda… lmata 279
a24deda… lmata 280 assert result is new_tree
a24deda… lmata 281 # old_tree must have been passed as the second positional argument.
a24deda… lmata 282 mock_ts_parser.parse.assert_called_once_with(b"new source", stale_tree)
a24deda… lmata 283
a24deda… lmata 284 def test_parse_without_old_tree_calls_parse_with_source_only(self):
a24deda… lmata 285 cache = TreeCache()
a24deda… lmata 286 new_tree = _mock_tree()
a24deda… lmata 287 mock_ts_parser = MagicMock()
a24deda… lmata 288 mock_ts_parser.parse.return_value = new_tree
a24deda… lmata 289
a24deda… lmata 290 inc = IncrementalParser(cache)
a24deda… lmata 291 with patch("tree_sitter.Parser", return_value=mock_ts_parser):
a24deda… lmata 292 inc.parse(b"source", MagicMock(), "baz.py", "hash1")
a24deda… lmata 293
a24deda… lmata 294 mock_ts_parser.parse.assert_called_once_with(b"source")
a24deda… lmata 295
a24deda… lmata 296 def test_default_cache_is_created_if_none_given(self):
a24deda… lmata 297 inc = IncrementalParser()
a24deda… lmata 298 assert isinstance(inc.cache, TreeCache)
a24deda… lmata 299
a24deda… lmata 300 def test_custom_cache_is_used(self):
a24deda… lmata 301 cache = TreeCache(max_size=5)
a24deda… lmata 302 inc = IncrementalParser(cache)
a24deda… lmata 303 assert inc.cache is cache
a24deda… lmata 304
a24deda… lmata 305 def test_fallback_when_tree_sitter_not_importable(self):
a24deda… lmata 306 """When tree_sitter is unavailable, language is used directly as parser."""
a24deda… lmata 307 cache = TreeCache()
a24deda… lmata 308 fake_tree = _mock_tree()
a24deda… lmata 309 fake_language = MagicMock()
a24deda… lmata 310 fake_language.parse.return_value = fake_tree
a24deda… lmata 311
a24deda… lmata 312 inc = IncrementalParser(cache)
a24deda… lmata 313
a24deda… lmata 314 import builtins
a24deda… lmata 315
a24deda… lmata 316 real_import = builtins.__import__
a24deda… lmata 317
a24deda… lmata 318 def _block_tree_sitter(name, *args, **kwargs):
a24deda… lmata 319 if name == "tree_sitter":
a24deda… lmata 320 raise ImportError("mocked absence")
a24deda… lmata 321 return real_import(name, *args, **kwargs)
a24deda… lmata 322
a24deda… lmata 323 with patch("builtins.__import__", side_effect=_block_tree_sitter):
a24deda… lmata 324 result = inc.parse(b"source", fake_language, "x.py", "h1")
a24deda… lmata 325
a24deda… lmata 326 assert result is fake_tree
a24deda… lmata 327
a24deda… lmata 328
a24deda… lmata 329 # ── #44 — GraphDiffer ─────────────────────────────────────────────────────────
a24deda… lmata 330
a24deda… lmata 331
a24deda… lmata 332 def _nd(label: str, name: str, line_start: int, **extra) -> NodeDescriptor:
a24deda… lmata 333 return NodeDescriptor(label=label, name=name, line_start=line_start, extra=extra)
a24deda… lmata 334
a24deda… lmata 335
a24deda… lmata 336 class TestNodeDescriptor:
a24deda… lmata 337 def test_identity_key(self):
a24deda… lmata 338 nd = _nd("Function", "foo", 10)
a24deda… lmata 339 assert nd.identity_key() == ("Function", "foo", 10)
a24deda… lmata 340
a24deda… lmata 341 def test_equality_same(self):
a24deda… lmata 342 assert _nd("Function", "foo", 10) == _nd("Function", "foo", 10)
a24deda… lmata 343
a24deda… lmata 344 def test_equality_different_line(self):
a24deda… lmata 345 assert _nd("Function", "foo", 10) != _nd("Function", "foo", 11)
a24deda… lmata 346
a24deda… lmata 347 def test_equality_different_extra(self):
a24deda… lmata 348 a = _nd("Function", "foo", 10, docstring="hello")
a24deda… lmata 349 b = _nd("Function", "foo", 10, docstring="world")
a24deda… lmata 350 assert a != b
a24deda… lmata 351
a24deda… lmata 352
a24deda… lmata 353 class TestGraphDiffer:
a24deda… lmata 354 def test_diff_empty_new_and_empty_existing(self):
a24deda… lmata 355 store = _make_store(rows=[])
a24deda… lmata 356 differ = GraphDiffer(store)
a24deda… lmata 357 result = differ.diff_file("src/app.py", [])
a24deda… lmata 358 assert result == DiffResult(added=0, modified=0, unchanged=0, removed=0)
a24deda… lmata 359
a24deda… lmata 360 def test_diff_all_new_nodes(self):
a24deda… lmata 361 store = _make_store(rows=[])
a24deda… lmata 362 differ = GraphDiffer(store)
a24deda… lmata 363 nodes = [
a24deda… lmata 364 _nd("Function", "foo", 1),
a24deda… lmata 365 _nd("Class", "Bar", 10),
a24deda… lmata 366 ]
a24deda… lmata 367 result = differ.diff_file("src/app.py", nodes)
a24deda… lmata 368 assert result.added == 2
a24deda… lmata 369 assert result.modified == 0
a24deda… lmata 370 assert result.unchanged == 0
a24deda… lmata 371 assert result.removed == 0
a24deda… lmata 372
a24deda… lmata 373 def test_diff_all_unchanged_nodes(self):
a24deda… lmata 374 store = _make_store(rows=[
a24deda… lmata 375 ["Function", "foo", 1],
a24deda… lmata 376 ["Class", "Bar", 10],
a24deda… lmata 377 ])
a24deda… lmata 378 differ = GraphDiffer(store)
a24deda… lmata 379 nodes = [
a24deda… lmata 380 _nd("Function", "foo", 1),
a24deda… lmata 381 _nd("Class", "Bar", 10),
a24deda… lmata 382 ]
a24deda… lmata 383 result = differ.diff_file("src/app.py", nodes)
a24deda… lmata 384 assert result.unchanged == 2
a24deda… lmata 385 assert result.added == 0
a24deda… lmata 386 assert result.modified == 0
a24deda… lmata 387 assert result.removed == 0
a24deda… lmata 388
a24deda… lmata 389 def test_diff_modified_node(self):
a24deda… lmata 390 """Same identity key but different extra props counts as modified."""
a24deda… lmata 391 store = _make_store(rows=[["Function", "foo", 1]])
a24deda… lmata 392 differ = GraphDiffer(store)
a24deda… lmata 393 # Existing node in store has no extra; new node has docstring.
a24deda… lmata 394 nodes = [_nd("Function", "foo", 1, docstring="now documented")]
a24deda… lmata 395 result = differ.diff_file("src/app.py", nodes)
a24deda… lmata 396 # The identity key matches but extra differs → modified.
a24deda… lmata 397 assert result.modified == 1
a24deda… lmata 398 assert result.unchanged == 0
a24deda… lmata 399 assert result.added == 0
a24deda… lmata 400
a24deda… lmata 401 def test_diff_removed_nodes(self):
a24deda… lmata 402 store = _make_store(rows=[
a24deda… lmata 403 ["Function", "foo", 1],
a24deda… lmata 404 ["Function", "bar", 5],
a24deda… lmata 405 ])
a24deda… lmata 406 differ = GraphDiffer(store)
a24deda… lmata 407 # Only foo is present in new parse; bar was removed.
a24deda… lmata 408 nodes = [_nd("Function", "foo", 1)]
a24deda… lmata 409 result = differ.diff_file("src/app.py", nodes)
a24deda… lmata 410 assert result.removed == 1
a24deda… lmata 411 assert result.unchanged == 1
a24deda… lmata 412
a24deda… lmata 413 def test_diff_mixed_scenario(self):
a24deda… lmata 414 store = _make_store(rows=[
a24deda… lmata 415 ["Function", "old_func", 1],
a24deda… lmata 416 ["Class", "MyClass", 20],
a24deda… lmata 417 ])
a24deda… lmata 418 differ = GraphDiffer(store)
a24deda… lmata 419 new_nodes = [
a24deda… lmata 420 _nd("Class", "MyClass", 20), # unchanged
a24deda… lmata 421 _nd("Function", "new_func", 5), # added
a24deda… lmata 422 ]
a24deda… lmata 423 result = differ.diff_file("src/app.py", new_nodes)
a24deda… lmata 424 assert result.unchanged == 1
a24deda… lmata 425 assert result.added == 1
a24deda… lmata 426 assert result.removed == 1
a24deda… lmata 427 assert result.modified == 0
a24deda… lmata 428
a24deda… lmata 429 def test_diff_skips_rows_with_none_name(self):
a24deda… lmata 430 store = _make_store(rows=[[None, None, None]])
a24deda… lmata 431 differ = GraphDiffer(store)
a24deda… lmata 432 result = differ.diff_file("src/app.py", [_nd("Function", "foo", 1)])
a24deda… lmata 433 # The None row is skipped; foo is treated as a new node.
a24deda… lmata 434 assert result.added == 1
a24deda… lmata 435 assert result.removed == 0
a24deda… lmata 436
a24deda… lmata 437 def test_total_changes_property(self):
a24deda… lmata 438 result = DiffResult(added=3, modified=1, unchanged=5, removed=2)
a24deda… lmata 439 assert result.total_changes == 6
a24deda… lmata 440
a24deda… lmata 441 def test_store_is_queried_with_file_path(self):
a24deda… lmata 442 store = _make_store(rows=[])
a24deda… lmata 443 differ = GraphDiffer(store)
a24deda… lmata 444 differ.diff_file("src/models.py", [])
a24deda… lmata 445 # Ensure the store was actually queried with the right path param.
a24deda… lmata 446 store.query.assert_called_once()
a24deda… lmata 447 _, kwargs_or_positional = store.query.call_args[0], store.query.call_args
a24deda… lmata 448 # The second positional arg to store.query should contain file_path.
a24deda… lmata 449 call_params = store.query.call_args[0][1]
a24deda… lmata 450 assert call_params["file_path"] == "src/models.py"
a24deda… lmata 451
a24deda… lmata 452
a24deda… lmata 453 # ── #45 — ParallelIngester ────────────────────────────────────────────────────
a24deda… lmata 454
a24deda… lmata 455
a24deda… lmata 456 class TestParallelIngester:
a24deda… lmata 457 def _setup_ingester_with_mock_parser(self, store, parse_result=None):
a24deda… lmata 458 """
a24deda… lmata 459 Return a ParallelIngester whose internal RepoIngester has a mock
a24deda… lmata 460 Python parser installed.
a24deda… lmata 461 """
a24deda… lmata 462 if parse_result is None:
a24deda… lmata 463 parse_result = {"functions": 2, "classes": 1, "edges": 3}
a24deda… lmata 464
a24deda… lmata 465 ingester = ParallelIngester(store)
a24deda… lmata 466 mock_parser = MagicMock()
a24deda… lmata 467 mock_parser.parse_file.return_value = parse_result
a24deda… lmata 468 ingester._ingester._parsers["python"] = mock_parser
a24deda… lmata 469 return ingester, mock_parser
a24deda… lmata 470
a24deda… lmata 471 def test_raises_on_missing_dir(self):
a24deda… lmata 472 store = _make_store()
a24deda… lmata 473 ingester = ParallelIngester(store)
a24deda… lmata 474 with pytest.raises(FileNotFoundError):
a24deda… lmata 475 ingester.ingest_parallel("/nonexistent/path")
a24deda… lmata 476
a24deda… lmata 477 def test_returns_stats_dict_with_all_keys(self):
a24deda… lmata 478 store = _make_store()
a24deda… lmata 479 ingester, _ = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 480 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 481 stats = ingester.ingest_parallel(tmpdir)
a24deda… lmata 482 assert {"files", "functions", "classes", "edges", "skipped", "errors"} <= set(stats)
a24deda… lmata 483
a24deda… lmata 484 def test_processes_single_file(self):
a24deda… lmata 485 store = _make_store()
a24deda… lmata 486 ingester, mock_parser = self._setup_ingester_with_mock_parser(
a24deda… lmata 487 store, {"functions": 3, "classes": 1, "edges": 4}
a24deda… lmata 488 )
a24deda… lmata 489 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 490 (Path(tmpdir) / "app.py").write_text("def foo(): pass")
a24deda… lmata 491 stats = ingester.ingest_parallel(tmpdir)
a24deda… lmata 492
a24deda… lmata 493 assert stats["files"] == 1
a24deda… lmata 494 assert stats["functions"] == 3
a24deda… lmata 495 assert stats["classes"] == 1
a24deda… lmata 496 assert stats["edges"] == 4
a24deda… lmata 497 assert stats["errors"] == 0
a24deda… lmata 498
a24deda… lmata 499 def test_processes_multiple_files_concurrently(self):
a24deda… lmata 500 store = _make_store()
a24deda… lmata 501 ingester, mock_parser = self._setup_ingester_with_mock_parser(
a24deda… lmata 502 store, {"functions": 1, "classes": 0, "edges": 0}
a24deda… lmata 503 )
a24deda… lmata 504 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 505 for i in range(5):
a24deda… lmata 506 (Path(tmpdir) / f"mod{i}.py").write_text(f"def f{i}(): pass")
a24deda… lmata 507 stats = ingester.ingest_parallel(tmpdir, max_workers=3)
a24deda… lmata 508
a24deda… lmata 509 assert stats["files"] == 5
a24deda… lmata 510 assert stats["functions"] == 5
a24deda… lmata 511 assert stats["errors"] == 0
a24deda… lmata 512
a24deda… lmata 513 def test_aggregates_stats_across_files(self):
a24deda… lmata 514 store = _make_store()
a24deda… lmata 515 ingester, _ = self._setup_ingester_with_mock_parser(
a24deda… lmata 516 store, {"functions": 2, "classes": 1, "edges": 5}
a24deda… lmata 517 )
a24deda… lmata 518 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 519 (Path(tmpdir) / "a.py").write_text("x=1")
a24deda… lmata 520 (Path(tmpdir) / "b.py").write_text("y=2")
a24deda… lmata 521 stats = ingester.ingest_parallel(tmpdir)
a24deda… lmata 522
a24deda… lmata 523 assert stats["files"] == 2
a24deda… lmata 524 assert stats["functions"] == 4
a24deda… lmata 525 assert stats["classes"] == 2
a24deda… lmata 526 assert stats["edges"] == 10
a24deda… lmata 527
a24deda… lmata 528 def test_clear_flag_calls_store_clear(self):
a24deda… lmata 529 store = _make_store()
a24deda… lmata 530 ingester, _ = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 531 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 532 ingester.ingest_parallel(tmpdir, clear=True)
a24deda… lmata 533 store.clear.assert_called_once()
a24deda… lmata 534
a24deda… lmata 535 def test_no_clear_by_default(self):
a24deda… lmata 536 store = _make_store()
a24deda… lmata 537 ingester, _ = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 538 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 539 ingester.ingest_parallel(tmpdir)
a24deda… lmata 540 store.clear.assert_not_called()
a24deda… lmata 541
a24deda… lmata 542 def test_empty_repo_returns_zero_counts(self):
a24deda… lmata 543 store = _make_store()
a24deda… lmata 544 ingester, _ = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 545 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 546 stats = ingester.ingest_parallel(tmpdir)
a24deda… lmata 547 assert stats["files"] == 0
a24deda… lmata 548 assert stats["functions"] == 0
a24deda… lmata 549
a24deda… lmata 550 def test_parser_exception_increments_errors_not_files(self):
a24deda… lmata 551 store = _make_store()
a24deda… lmata 552 ingester = ParallelIngester(store)
a24deda… lmata 553 broken_parser = MagicMock()
a24deda… lmata 554 broken_parser.parse_file.side_effect = RuntimeError("boom")
a24deda… lmata 555 ingester._ingester._parsers["python"] = broken_parser
a24deda… lmata 556
a24deda… lmata 557 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 558 (Path(tmpdir) / "broken.py").write_text("def x(): pass")
a24deda… lmata 559 stats = ingester.ingest_parallel(tmpdir)
a24deda… lmata 560
a24deda… lmata 561 assert stats["files"] == 0
a24deda… lmata 562 assert stats["errors"] == 1
a24deda… lmata 563
a24deda… lmata 564 def test_incremental_skips_unchanged_files(self):
a24deda… lmata 565 store = _make_store()
a24deda… lmata 566 ingester, mock_parser = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 567 ingester._ingester._file_unchanged = MagicMock(return_value=True)
a24deda… lmata 568
a24deda… lmata 569 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 570 (Path(tmpdir) / "unchanged.py").write_text("x=1")
a24deda… lmata 571 stats = ingester.ingest_parallel(tmpdir, incremental=True)
a24deda… lmata 572
a24deda… lmata 573 assert stats["skipped"] == 1
a24deda… lmata 574 assert stats["files"] == 0
a24deda… lmata 575 mock_parser.parse_file.assert_not_called()
a24deda… lmata 576
a24deda… lmata 577 def test_creates_repository_node(self):
a24deda… lmata 578 store = _make_store()
a24deda… lmata 579 ingester, _ = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 580 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 581 ingester.ingest_parallel(tmpdir)
a24deda… lmata 582
a24deda… lmata 583 from navegador.graph.schema import NodeLabel
a24deda… lmata 584
a24deda… lmata 585 store.create_node.assert_called_once()
a24deda… lmata 586 label, props = store.create_node.call_args[0]
a24deda… lmata 587 assert label == NodeLabel.Repository
a24deda… lmata 588 assert "name" in props and "path" in props
a24deda… lmata 589
a24deda… lmata 590 def test_max_workers_none_uses_default(self):
a24deda… lmata 591 """Passing max_workers=None should not raise."""
a24deda… lmata 592 store = _make_store()
a24deda… lmata 593 ingester, _ = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 594 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 595 stats = ingester.ingest_parallel(tmpdir, max_workers=None)
a24deda… lmata 596 assert isinstance(stats, dict)
a24deda… lmata 597
a24deda… lmata 598 def test_skips_non_python_files(self):
a24deda… lmata 599 store = _make_store()
a24deda… lmata 600 ingester, mock_parser = self._setup_ingester_with_mock_parser(store)
a24deda… lmata 601 with tempfile.TemporaryDirectory() as tmpdir:
a24deda… lmata 602 (Path(tmpdir) / "readme.md").write_text("# readme")
a24deda… lmata 603 (Path(tmpdir) / "config.yaml").write_text("key: value")
a24deda… lmata 604 stats = ingester.ingest_parallel(tmpdir)
a24deda… lmata 605 assert stats["files"] == 0
a24deda… lmata 606 mock_parser.parse_file.assert_not_called()

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button