| | @@ -0,0 +1,606 @@ |
| 1 | +"""Tests for navegador.ingestion.optimization (#42 – #45)."""
|
| 2 | +
|
| 3 | +from __future__ import annotations
|
| 4 | +
|
| 5 | +import tempfile
|
| 6 | +import threading
|
| 7 | +from pathlib import Path
|
| 8 | +from unittest.mock import MagicMock, call, patch
|
| 9 | +
|
| 10 | +import pytest
|
| 11 | +
|
| 12 | +from navegador.ingestion.optimization import (
|
| 13 | + DiffResult,
|
| 14 | + GraphDiffer,
|
| 15 | + IncrementalParser,
|
| 16 | + NodeDescriptor,
|
| 17 | + ParallelIngester,
|
| 18 | + TreeCache,
|
| 19 | +)
|
| 20 | +
|
| 21 | +
|
| 22 | +# ── helpers ───────────────────────────────────────────────────────────────────
|
| 23 | +
|
| 24 | +
|
| 25 | +def _make_store(rows=None):
|
| 26 | + """Return a MagicMock GraphStore whose query() returns *rows*."""
|
| 27 | + store = MagicMock()
|
| 28 | + store.query.return_value = MagicMock(result_set=rows or [])
|
| 29 | + return store
|
| 30 | +
|
| 31 | +
|
| 32 | +def _mock_tree(name: str = "tree") -> MagicMock:
|
| 33 | + t = MagicMock()
|
| 34 | + t.__repr__ = lambda self: f"<MockTree {name}>"
|
| 35 | + return t
|
| 36 | +
|
| 37 | +
|
| 38 | +# ── #42 — TreeCache ───────────────────────────────────────────────────────────
|
| 39 | +
|
| 40 | +
|
| 41 | +class TestTreeCache:
|
| 42 | + # ── get / put ──────────────────────────────────────────────────────────────
|
| 43 | +
|
| 44 | + def test_get_returns_none_on_cold_cache(self):
|
| 45 | + cache = TreeCache()
|
| 46 | + assert cache.get("foo.py", "abc") is None
|
| 47 | +
|
| 48 | + def test_put_and_get_roundtrip(self):
|
| 49 | + cache = TreeCache()
|
| 50 | + tree = _mock_tree()
|
| 51 | + cache.put("foo.py", "abc123", tree)
|
| 52 | + assert cache.get("foo.py", "abc123") is tree
|
| 53 | +
|
| 54 | + def test_get_miss_does_not_return_wrong_hash(self):
|
| 55 | + cache = TreeCache()
|
| 56 | + tree = _mock_tree()
|
| 57 | + cache.put("foo.py", "hash-A", tree)
|
| 58 | + assert cache.get("foo.py", "hash-B") is None
|
| 59 | +
|
| 60 | + def test_get_miss_does_not_return_wrong_path(self):
|
| 61 | + cache = TreeCache()
|
| 62 | + tree = _mock_tree()
|
| 63 | + cache.put("foo.py", "hash-A", tree)
|
| 64 | + assert cache.get("bar.py", "hash-A") is None
|
| 65 | +
|
| 66 | + def test_put_overwrites_existing_entry(self):
|
| 67 | + cache = TreeCache()
|
| 68 | + t1 = _mock_tree("t1")
|
| 69 | + t2 = _mock_tree("t2")
|
| 70 | + cache.put("foo.py", "abc", t1)
|
| 71 | + cache.put("foo.py", "abc", t2)
|
| 72 | + assert cache.get("foo.py", "abc") is t2
|
| 73 | +
|
| 74 | + # ── LRU eviction ──────────────────────────────────────────────────────────
|
| 75 | +
|
| 76 | + def test_evicts_lru_entry_when_full(self):
|
| 77 | + cache = TreeCache(max_size=2)
|
| 78 | + t1 = _mock_tree("t1")
|
| 79 | + t2 = _mock_tree("t2")
|
| 80 | + t3 = _mock_tree("t3")
|
| 81 | +
|
| 82 | + cache.put("a.py", "1", t1)
|
| 83 | + cache.put("b.py", "2", t2)
|
| 84 | + # Cache is now full; inserting t3 should evict t1 (LRU).
|
| 85 | + cache.put("c.py", "3", t3)
|
| 86 | +
|
| 87 | + assert cache.get("a.py", "1") is None
|
| 88 | + assert cache.get("b.py", "2") is t2
|
| 89 | + assert cache.get("c.py", "3") is t3
|
| 90 | +
|
| 91 | + def test_get_promotes_entry_so_it_is_not_evicted(self):
|
| 92 | + cache = TreeCache(max_size=2)
|
| 93 | + t1 = _mock_tree("t1")
|
| 94 | + t2 = _mock_tree("t2")
|
| 95 | + t3 = _mock_tree("t3")
|
| 96 | +
|
| 97 | + cache.put("a.py", "1", t1)
|
| 98 | + cache.put("b.py", "2", t2)
|
| 99 | + # Touch t1 so it becomes the most-recently used.
|
| 100 | + cache.get("a.py", "1")
|
| 101 | + # t2 is now the LRU; adding t3 should evict t2.
|
| 102 | + cache.put("c.py", "3", t3)
|
| 103 | +
|
| 104 | + assert cache.get("a.py", "1") is t1
|
| 105 | + assert cache.get("b.py", "2") is None
|
| 106 | + assert cache.get("c.py", "3") is t3
|
| 107 | +
|
| 108 | + def test_size_respects_max_size(self):
|
| 109 | + cache = TreeCache(max_size=3)
|
| 110 | + for i in range(10):
|
| 111 | + cache.put(f"file{i}.py", str(i), _mock_tree())
|
| 112 | + assert len(cache) <= 3
|
| 113 | +
|
| 114 | + def test_constructor_rejects_zero_max_size(self):
|
| 115 | + with pytest.raises(ValueError):
|
| 116 | + TreeCache(max_size=0)
|
| 117 | +
|
| 118 | + # ── stats ──────────────────────────────────────────────────────────────────
|
| 119 | +
|
| 120 | + def test_stats_initial_state(self):
|
| 121 | + cache = TreeCache()
|
| 122 | + s = cache.stats()
|
| 123 | + assert s["hits"] == 0
|
| 124 | + assert s["misses"] == 0
|
| 125 | + assert s["size"] == 0
|
| 126 | +
|
| 127 | + def test_stats_records_hits(self):
|
| 128 | + cache = TreeCache()
|
| 129 | + cache.put("x.py", "h", _mock_tree())
|
| 130 | + cache.get("x.py", "h")
|
| 131 | + cache.get("x.py", "h")
|
| 132 | + assert cache.stats()["hits"] == 2
|
| 133 | +
|
| 134 | + def test_stats_records_misses(self):
|
| 135 | + cache = TreeCache()
|
| 136 | + cache.get("x.py", "h")
|
| 137 | + cache.get("y.py", "h")
|
| 138 | + assert cache.stats()["misses"] == 2
|
| 139 | +
|
| 140 | + def test_stats_size_tracks_entries(self):
|
| 141 | + cache = TreeCache(max_size=10)
|
| 142 | + cache.put("a.py", "1", _mock_tree())
|
| 143 | + cache.put("b.py", "2", _mock_tree())
|
| 144 | + assert cache.stats()["size"] == 2
|
| 145 | +
|
| 146 | + def test_stats_max_size_reported(self):
|
| 147 | + cache = TreeCache(max_size=42)
|
| 148 | + assert cache.stats()["max_size"] == 42
|
| 149 | +
|
| 150 | + # ── clear ──────────────────────────────────────────────────────────────────
|
| 151 | +
|
| 152 | + def test_clear_removes_all_entries(self):
|
| 153 | + cache = TreeCache()
|
| 154 | + cache.put("a.py", "1", _mock_tree())
|
| 155 | + cache.put("b.py", "2", _mock_tree())
|
| 156 | + cache.clear()
|
| 157 | + assert len(cache) == 0
|
| 158 | + assert cache.get("a.py", "1") is None
|
| 159 | +
|
| 160 | + def test_clear_resets_stats(self):
|
| 161 | + cache = TreeCache()
|
| 162 | + cache.put("a.py", "1", _mock_tree())
|
| 163 | + cache.get("a.py", "1")
|
| 164 | + cache.get("a.py", "bad")
|
| 165 | + cache.clear()
|
| 166 | + s = cache.stats()
|
| 167 | + assert s["hits"] == 0
|
| 168 | + assert s["misses"] == 0
|
| 169 | + assert s["size"] == 0
|
| 170 | +
|
| 171 | + # ── thread safety ──────────────────────────────────────────────────────────
|
| 172 | +
|
| 173 | + def test_concurrent_puts_do_not_corrupt_state(self):
|
| 174 | + cache = TreeCache(max_size=50)
|
| 175 | + errors = []
|
| 176 | +
|
| 177 | + def writer(n: int) -> None:
|
| 178 | + try:
|
| 179 | + for i in range(20):
|
| 180 | + cache.put(f"file{n}_{i}.py", str(i), _mock_tree())
|
| 181 | + except Exception as exc: # noqa: BLE001
|
| 182 | + errors.append(exc)
|
| 183 | +
|
| 184 | + threads = [threading.Thread(target=writer, args=(t,)) for t in range(5)]
|
| 185 | + for t in threads:
|
| 186 | + t.start()
|
| 187 | + for t in threads:
|
| 188 | + t.join()
|
| 189 | +
|
| 190 | + assert not errors
|
| 191 | + assert len(cache) <= 50
|
| 192 | +
|
| 193 | +
|
| 194 | +# ── #43 — IncrementalParser ───────────────────────────────────────────────────
|
| 195 | +
|
| 196 | +
|
| 197 | +class TestIncrementalParser:
|
| 198 | + def _make_language_and_parser(self):
|
| 199 | + """
|
| 200 | + Return a fake tree-sitter Language object whose parser.parse()
|
| 201 | + returns a fresh MagicMock tree.
|
| 202 | + """
|
| 203 | + fake_tree = _mock_tree("parsed")
|
| 204 | + fake_parser = MagicMock()
|
| 205 | + fake_parser.parse.return_value = fake_tree
|
| 206 | + fake_language = MagicMock()
|
| 207 | +
|
| 208 | + # Patch tree_sitter.Parser so IncrementalParser can instantiate it.
|
| 209 | + mock_ts_parser = MagicMock()
|
| 210 | + mock_ts_parser.parse.return_value = fake_tree
|
| 211 | + mock_ts_class = MagicMock(return_value=mock_ts_parser)
|
| 212 | +
|
| 213 | + return fake_tree, mock_ts_parser, mock_ts_class, fake_language
|
| 214 | +
|
| 215 | + def test_parse_returns_tree(self):
|
| 216 | + cache = TreeCache()
|
| 217 | + inc = IncrementalParser(cache)
|
| 218 | +
|
| 219 | + fake_tree = _mock_tree()
|
| 220 | + mock_ts_parser = MagicMock()
|
| 221 | + mock_ts_parser.parse.return_value = fake_tree
|
| 222 | +
|
| 223 | + with patch("tree_sitter.Parser", return_value=mock_ts_parser):
|
| 224 | + result = inc.parse(b"source", MagicMock(), "foo.py", "hash1")
|
| 225 | +
|
| 226 | + assert result is fake_tree
|
| 227 | +
|
| 228 | + def test_parse_stores_tree_in_cache(self):
|
| 229 | + cache = TreeCache()
|
| 230 | + inc = IncrementalParser(cache)
|
| 231 | +
|
| 232 | + fake_tree = _mock_tree()
|
| 233 | + mock_ts_parser = MagicMock()
|
| 234 | + mock_ts_parser.parse.return_value = fake_tree
|
| 235 | +
|
| 236 | + with patch("tree_sitter.Parser", return_value=mock_ts_parser):
|
| 237 | + inc.parse(b"source", MagicMock(), "foo.py", "hash1")
|
| 238 | +
|
| 239 | + assert cache.get("foo.py", "hash1") is fake_tree
|
| 240 | +
|
| 241 | + def test_parse_returns_cached_tree_without_calling_parser(self):
|
| 242 | + cached_tree = _mock_tree("cached")
|
| 243 | + cache = TreeCache()
|
| 244 | + cache.put("foo.py", "hash1", cached_tree)
|
| 245 | +
|
| 246 | + inc = IncrementalParser(cache)
|
| 247 | + mock_ts_parser = MagicMock()
|
| 248 | +
|
| 249 | + with patch("tree_sitter.Parser", return_value=mock_ts_parser):
|
| 250 | + result = inc.parse(b"source", MagicMock(), "foo.py", "hash1")
|
| 251 | +
|
| 252 | + assert result is cached_tree
|
| 253 | + mock_ts_parser.parse.assert_not_called()
|
| 254 | +
|
| 255 | + def test_cache_hit_increments_hit_count(self):
|
| 256 | + cache = TreeCache()
|
| 257 | + tree = _mock_tree()
|
| 258 | + cache.put("foo.py", "hashX", tree)
|
| 259 | +
|
| 260 | + inc = IncrementalParser(cache)
|
| 261 | + with patch("tree_sitter.Parser", return_value=MagicMock()):
|
| 262 | + inc.parse(b"src", MagicMock(), "foo.py", "hashX")
|
| 263 | +
|
| 264 | + assert cache.stats()["hits"] == 1
|
| 265 | +
|
| 266 | + def test_parse_passes_old_tree_on_rehash(self):
|
| 267 | + """When a stale tree exists for the same path, it is passed as old_tree."""
|
| 268 | + cache = TreeCache()
|
| 269 | + stale_tree = _mock_tree("stale")
|
| 270 | + cache.put("bar.py", "old-hash", stale_tree)
|
| 271 | +
|
| 272 | + new_tree = _mock_tree("new")
|
| 273 | + mock_ts_parser = MagicMock()
|
| 274 | + mock_ts_parser.parse.return_value = new_tree
|
| 275 | +
|
| 276 | + inc = IncrementalParser(cache)
|
| 277 | + with patch("tree_sitter.Parser", return_value=mock_ts_parser):
|
| 278 | + result = inc.parse(b"new source", MagicMock(), "bar.py", "new-hash")
|
| 279 | +
|
| 280 | + assert result is new_tree
|
| 281 | + # old_tree must have been passed as the second positional argument.
|
| 282 | + mock_ts_parser.parse.assert_called_once_with(b"new source", stale_tree)
|
| 283 | +
|
| 284 | + def test_parse_without_old_tree_calls_parse_with_source_only(self):
|
| 285 | + cache = TreeCache()
|
| 286 | + new_tree = _mock_tree()
|
| 287 | + mock_ts_parser = MagicMock()
|
| 288 | + mock_ts_parser.parse.return_value = new_tree
|
| 289 | +
|
| 290 | + inc = IncrementalParser(cache)
|
| 291 | + with patch("tree_sitter.Parser", return_value=mock_ts_parser):
|
| 292 | + inc.parse(b"source", MagicMock(), "baz.py", "hash1")
|
| 293 | +
|
| 294 | + mock_ts_parser.parse.assert_called_once_with(b"source")
|
| 295 | +
|
| 296 | + def test_default_cache_is_created_if_none_given(self):
|
| 297 | + inc = IncrementalParser()
|
| 298 | + assert isinstance(inc.cache, TreeCache)
|
| 299 | +
|
| 300 | + def test_custom_cache_is_used(self):
|
| 301 | + cache = TreeCache(max_size=5)
|
| 302 | + inc = IncrementalParser(cache)
|
| 303 | + assert inc.cache is cache
|
| 304 | +
|
| 305 | + def test_fallback_when_tree_sitter_not_importable(self):
|
| 306 | + """When tree_sitter is unavailable, language is used directly as parser."""
|
| 307 | + cache = TreeCache()
|
| 308 | + fake_tree = _mock_tree()
|
| 309 | + fake_language = MagicMock()
|
| 310 | + fake_language.parse.return_value = fake_tree
|
| 311 | +
|
| 312 | + inc = IncrementalParser(cache)
|
| 313 | +
|
| 314 | + import builtins
|
| 315 | +
|
| 316 | + real_import = builtins.__import__
|
| 317 | +
|
| 318 | + def _block_tree_sitter(name, *args, **kwargs):
|
| 319 | + if name == "tree_sitter":
|
| 320 | + raise ImportError("mocked absence")
|
| 321 | + return real_import(name, *args, **kwargs)
|
| 322 | +
|
| 323 | + with patch("builtins.__import__", side_effect=_block_tree_sitter):
|
| 324 | + result = inc.parse(b"source", fake_language, "x.py", "h1")
|
| 325 | +
|
| 326 | + assert result is fake_tree
|
| 327 | +
|
| 328 | +
|
| 329 | +# ── #44 — GraphDiffer ─────────────────────────────────────────────────────────
|
| 330 | +
|
| 331 | +
|
| 332 | +def _nd(label: str, name: str, line_start: int, **extra) -> NodeDescriptor:
|
| 333 | + return NodeDescriptor(label=label, name=name, line_start=line_start, extra=extra)
|
| 334 | +
|
| 335 | +
|
| 336 | +class TestNodeDescriptor:
|
| 337 | + def test_identity_key(self):
|
| 338 | + nd = _nd("Function", "foo", 10)
|
| 339 | + assert nd.identity_key() == ("Function", "foo", 10)
|
| 340 | +
|
| 341 | + def test_equality_same(self):
|
| 342 | + assert _nd("Function", "foo", 10) == _nd("Function", "foo", 10)
|
| 343 | +
|
| 344 | + def test_equality_different_line(self):
|
| 345 | + assert _nd("Function", "foo", 10) != _nd("Function", "foo", 11)
|
| 346 | +
|
| 347 | + def test_equality_different_extra(self):
|
| 348 | + a = _nd("Function", "foo", 10, docstring="hello")
|
| 349 | + b = _nd("Function", "foo", 10, docstring="world")
|
| 350 | + assert a != b
|
| 351 | +
|
| 352 | +
|
| 353 | +class TestGraphDiffer:
|
| 354 | + def test_diff_empty_new_and_empty_existing(self):
|
| 355 | + store = _make_store(rows=[])
|
| 356 | + differ = GraphDiffer(store)
|
| 357 | + result = differ.diff_file("src/app.py", [])
|
| 358 | + assert result == DiffResult(added=0, modified=0, unchanged=0, removed=0)
|
| 359 | +
|
| 360 | + def test_diff_all_new_nodes(self):
|
| 361 | + store = _make_store(rows=[])
|
| 362 | + differ = GraphDiffer(store)
|
| 363 | + nodes = [
|
| 364 | + _nd("Function", "foo", 1),
|
| 365 | + _nd("Class", "Bar", 10),
|
| 366 | + ]
|
| 367 | + result = differ.diff_file("src/app.py", nodes)
|
| 368 | + assert result.added == 2
|
| 369 | + assert result.modified == 0
|
| 370 | + assert result.unchanged == 0
|
| 371 | + assert result.removed == 0
|
| 372 | +
|
| 373 | + def test_diff_all_unchanged_nodes(self):
|
| 374 | + store = _make_store(rows=[
|
| 375 | + ["Function", "foo", 1],
|
| 376 | + ["Class", "Bar", 10],
|
| 377 | + ])
|
| 378 | + differ = GraphDiffer(store)
|
| 379 | + nodes = [
|
| 380 | + _nd("Function", "foo", 1),
|
| 381 | + _nd("Class", "Bar", 10),
|
| 382 | + ]
|
| 383 | + result = differ.diff_file("src/app.py", nodes)
|
| 384 | + assert result.unchanged == 2
|
| 385 | + assert result.added == 0
|
| 386 | + assert result.modified == 0
|
| 387 | + assert result.removed == 0
|
| 388 | +
|
| 389 | + def test_diff_modified_node(self):
|
| 390 | + """Same identity key but different extra props counts as modified."""
|
| 391 | + store = _make_store(rows=[["Function", "foo", 1]])
|
| 392 | + differ = GraphDiffer(store)
|
| 393 | + # Existing node in store has no extra; new node has docstring.
|
| 394 | + nodes = [_nd("Function", "foo", 1, docstring="now documented")]
|
| 395 | + result = differ.diff_file("src/app.py", nodes)
|
| 396 | + # The identity key matches but extra differs → modified.
|
| 397 | + assert result.modified == 1
|
| 398 | + assert result.unchanged == 0
|
| 399 | + assert result.added == 0
|
| 400 | +
|
| 401 | + def test_diff_removed_nodes(self):
|
| 402 | + store = _make_store(rows=[
|
| 403 | + ["Function", "foo", 1],
|
| 404 | + ["Function", "bar", 5],
|
| 405 | + ])
|
| 406 | + differ = GraphDiffer(store)
|
| 407 | + # Only foo is present in new parse; bar was removed.
|
| 408 | + nodes = [_nd("Function", "foo", 1)]
|
| 409 | + result = differ.diff_file("src/app.py", nodes)
|
| 410 | + assert result.removed == 1
|
| 411 | + assert result.unchanged == 1
|
| 412 | +
|
| 413 | + def test_diff_mixed_scenario(self):
|
| 414 | + store = _make_store(rows=[
|
| 415 | + ["Function", "old_func", 1],
|
| 416 | + ["Class", "MyClass", 20],
|
| 417 | + ])
|
| 418 | + differ = GraphDiffer(store)
|
| 419 | + new_nodes = [
|
| 420 | + _nd("Class", "MyClass", 20), # unchanged
|
| 421 | + _nd("Function", "new_func", 5), # added
|
| 422 | + ]
|
| 423 | + result = differ.diff_file("src/app.py", new_nodes)
|
| 424 | + assert result.unchanged == 1
|
| 425 | + assert result.added == 1
|
| 426 | + assert result.removed == 1
|
| 427 | + assert result.modified == 0
|
| 428 | +
|
| 429 | + def test_diff_skips_rows_with_none_name(self):
|
| 430 | + store = _make_store(rows=[[None, None, None]])
|
| 431 | + differ = GraphDiffer(store)
|
| 432 | + result = differ.diff_file("src/app.py", [_nd("Function", "foo", 1)])
|
| 433 | + # The None row is skipped; foo is treated as a new node.
|
| 434 | + assert result.added == 1
|
| 435 | + assert result.removed == 0
|
| 436 | +
|
| 437 | + def test_total_changes_property(self):
|
| 438 | + result = DiffResult(added=3, modified=1, unchanged=5, removed=2)
|
| 439 | + assert result.total_changes == 6
|
| 440 | +
|
| 441 | + def test_store_is_queried_with_file_path(self):
|
| 442 | + store = _make_store(rows=[])
|
| 443 | + differ = GraphDiffer(store)
|
| 444 | + differ.diff_file("src/models.py", [])
|
| 445 | + # Ensure the store was actually queried with the right path param.
|
| 446 | + store.query.assert_called_once()
|
| 447 | + _, kwargs_or_positional = store.query.call_args[0], store.query.call_args
|
| 448 | + # The second positional arg to store.query should contain file_path.
|
| 449 | + call_params = store.query.call_args[0][1]
|
| 450 | + assert call_params["file_path"] == "src/models.py"
|
| 451 | +
|
| 452 | +
|
| 453 | +# ── #45 — ParallelIngester ────────────────────────────────────────────────────
|
| 454 | +
|
| 455 | +
|
| 456 | +class TestParallelIngester:
|
| 457 | + def _setup_ingester_with_mock_parser(self, store, parse_result=None):
|
| 458 | + """
|
| 459 | + Return a ParallelIngester whose internal RepoIngester has a mock
|
| 460 | + Python parser installed.
|
| 461 | + """
|
| 462 | + if parse_result is None:
|
| 463 | + parse_result = {"functions": 2, "classes": 1, "edges": 3}
|
| 464 | +
|
| 465 | + ingester = ParallelIngester(store)
|
| 466 | + mock_parser = MagicMock()
|
| 467 | + mock_parser.parse_file.return_value = parse_result
|
| 468 | + ingester._ingester._parsers["python"] = mock_parser
|
| 469 | + return ingester, mock_parser
|
| 470 | +
|
| 471 | + def test_raises_on_missing_dir(self):
|
| 472 | + store = _make_store()
|
| 473 | + ingester = ParallelIngester(store)
|
| 474 | + with pytest.raises(FileNotFoundError):
|
| 475 | + ingester.ingest_parallel("/nonexistent/path")
|
| 476 | +
|
| 477 | + def test_returns_stats_dict_with_all_keys(self):
|
| 478 | + store = _make_store()
|
| 479 | + ingester, _ = self._setup_ingester_with_mock_parser(store)
|
| 480 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 481 | + stats = ingester.ingest_parallel(tmpdir)
|
| 482 | + assert {"files", "functions", "classes", "edges", "skipped", "errors"} <= set(stats)
|
| 483 | +
|
| 484 | + def test_processes_single_file(self):
|
| 485 | + store = _make_store()
|
| 486 | + ingester, mock_parser = self._setup_ingester_with_mock_parser(
|
| 487 | + store, {"functions": 3, "classes": 1, "edges": 4}
|
| 488 | + )
|
| 489 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 490 | + (Path(tmpdir) / "app.py").write_text("def foo(): pass")
|
| 491 | + stats = ingester.ingest_parallel(tmpdir)
|
| 492 | +
|
| 493 | + assert stats["files"] == 1
|
| 494 | + assert stats["functions"] == 3
|
| 495 | + assert stats["classes"] == 1
|
| 496 | + assert stats["edges"] == 4
|
| 497 | + assert stats["errors"] == 0
|
| 498 | +
|
| 499 | + def test_processes_multiple_files_concurrently(self):
|
| 500 | + store = _make_store()
|
| 501 | + ingester, mock_parser = self._setup_ingester_with_mock_parser(
|
| 502 | + store, {"functions": 1, "classes": 0, "edges": 0}
|
| 503 | + )
|
| 504 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 505 | + for i in range(5):
|
| 506 | + (Path(tmpdir) / f"mod{i}.py").write_text(f"def f{i}(): pass")
|
| 507 | + stats = ingester.ingest_parallel(tmpdir, max_workers=3)
|
| 508 | +
|
| 509 | + assert stats["files"] == 5
|
| 510 | + assert stats["functions"] == 5
|
| 511 | + assert stats["errors"] == 0
|
| 512 | +
|
| 513 | + def test_aggregates_stats_across_files(self):
|
| 514 | + store = _make_store()
|
| 515 | + ingester, _ = self._setup_ingester_with_mock_parser(
|
| 516 | + store, {"functions": 2, "classes": 1, "edges": 5}
|
| 517 | + )
|
| 518 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 519 | + (Path(tmpdir) / "a.py").write_text("x=1")
|
| 520 | + (Path(tmpdir) / "b.py").write_text("y=2")
|
| 521 | + stats = ingester.ingest_parallel(tmpdir)
|
| 522 | +
|
| 523 | + assert stats["files"] == 2
|
| 524 | + assert stats["functions"] == 4
|
| 525 | + assert stats["classes"] == 2
|
| 526 | + assert stats["edges"] == 10
|
| 527 | +
|
| 528 | + def test_clear_flag_calls_store_clear(self):
|
| 529 | + store = _make_store()
|
| 530 | + ingester, _ = self._setup_ingester_with_mock_parser(store)
|
| 531 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 532 | + ingester.ingest_parallel(tmpdir, clear=True)
|
| 533 | + store.clear.assert_called_once()
|
| 534 | +
|
| 535 | + def test_no_clear_by_default(self):
|
| 536 | + store = _make_store()
|
| 537 | + ingester, _ = self._setup_ingester_with_mock_parser(store)
|
| 538 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 539 | + ingester.ingest_parallel(tmpdir)
|
| 540 | + store.clear.assert_not_called()
|
| 541 | +
|
| 542 | + def test_empty_repo_returns_zero_counts(self):
|
| 543 | + store = _make_store()
|
| 544 | + ingester, _ = self._setup_ingester_with_mock_parser(store)
|
| 545 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 546 | + stats = ingester.ingest_parallel(tmpdir)
|
| 547 | + assert stats["files"] == 0
|
| 548 | + assert stats["functions"] == 0
|
| 549 | +
|
| 550 | + def test_parser_exception_increments_errors_not_files(self):
|
| 551 | + store = _make_store()
|
| 552 | + ingester = ParallelIngester(store)
|
| 553 | + broken_parser = MagicMock()
|
| 554 | + broken_parser.parse_file.side_effect = RuntimeError("boom")
|
| 555 | + ingester._ingester._parsers["python"] = broken_parser
|
| 556 | +
|
| 557 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 558 | + (Path(tmpdir) / "broken.py").write_text("def x(): pass")
|
| 559 | + stats = ingester.ingest_parallel(tmpdir)
|
| 560 | +
|
| 561 | + assert stats["files"] == 0
|
| 562 | + assert stats["errors"] == 1
|
| 563 | +
|
| 564 | + def test_incremental_skips_unchanged_files(self):
|
| 565 | + store = _make_store()
|
| 566 | + ingester, mock_parser = self._setup_ingester_with_mock_parser(store)
|
| 567 | + ingester._ingester._file_unchanged = MagicMock(return_value=True)
|
| 568 | +
|
| 569 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 570 | + (Path(tmpdir) / "unchanged.py").write_text("x=1")
|
| 571 | + stats = ingester.ingest_parallel(tmpdir, incremental=True)
|
| 572 | +
|
| 573 | + assert stats["skipped"] == 1
|
| 574 | + assert stats["files"] == 0
|
| 575 | + mock_parser.parse_file.assert_not_called()
|
| 576 | +
|
| 577 | + def test_creates_repository_node(self):
|
| 578 | + store = _make_store()
|
| 579 | + ingester, _ = self._setup_ingester_with_mock_parser(store)
|
| 580 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 581 | + ingester.ingest_parallel(tmpdir)
|
| 582 | +
|
| 583 | + from navegador.graph.schema import NodeLabel
|
| 584 | +
|
| 585 | + store.create_node.assert_called_once()
|
| 586 | + label, props = store.create_node.call_args[0]
|
| 587 | + assert label == NodeLabel.Repository
|
| 588 | + assert "name" in props and "path" in props
|
| 589 | +
|
| 590 | + def test_max_workers_none_uses_default(self):
|
| 591 | + """Passing max_workers=None should not raise."""
|
| 592 | + store = _make_store()
|
| 593 | + ingester, _ = self._setup_ingester_with_mock_parser(store)
|
| 594 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 595 | + stats = ingester.ingest_parallel(tmpdir, max_workers=None)
|
| 596 | + assert isinstance(stats, dict)
|
| 597 | +
|
| 598 | + def test_skips_non_python_files(self):
|
| 599 | + store = _make_store()
|
| 600 | + ingester, mock_parser = self._setup_ingester_with_mock_parser(store)
|
| 601 | + with tempfile.TemporaryDirectory() as tmpdir:
|
| 602 | + (Path(tmpdir) / "readme.md").write_text("# readme")
|
| 603 | + (Path(tmpdir) / "config.yaml").write_text("key: value")
|
| 604 | + stats = ingester.ingest_parallel(tmpdir)
|
| 605 | + assert stats["files"] == 0
|
| 606 | + mock_parser.parse_file.assert_not_called()
|