|
1
|
"""Tests for navegador.ingestion.optimization (#42 – #45).""" |
|
2
|
|
|
3
|
from __future__ import annotations |
|
4
|
|
|
5
|
import tempfile |
|
6
|
import threading |
|
7
|
from pathlib import Path |
|
8
|
from unittest.mock import MagicMock, call, patch |
|
9
|
|
|
10
|
import pytest |
|
11
|
|
|
12
|
from navegador.ingestion.optimization import ( |
|
13
|
DiffResult, |
|
14
|
GraphDiffer, |
|
15
|
IncrementalParser, |
|
16
|
NodeDescriptor, |
|
17
|
ParallelIngester, |
|
18
|
TreeCache, |
|
19
|
) |
|
20
|
|
|
21
|
|
|
22
|
# ── helpers ─────────────────────────────────────────────────────────────────── |
|
23
|
|
|
24
|
|
|
25
|
def _make_store(rows=None): |
|
26
|
"""Return a MagicMock GraphStore whose query() returns *rows*.""" |
|
27
|
store = MagicMock() |
|
28
|
store.query.return_value = MagicMock(result_set=rows or []) |
|
29
|
return store |
|
30
|
|
|
31
|
|
|
32
|
def _mock_tree(name: str = "tree") -> MagicMock: |
|
33
|
t = MagicMock() |
|
34
|
t.__repr__ = lambda self: f"<MockTree {name}>" |
|
35
|
return t |
|
36
|
|
|
37
|
|
|
38
|
# ── #42 — TreeCache ─────────────────────────────────────────────────────────── |
|
39
|
|
|
40
|
|
|
41
|
class TestTreeCache: |
|
42
|
# ── get / put ────────────────────────────────────────────────────────────── |
|
43
|
|
|
44
|
def test_get_returns_none_on_cold_cache(self): |
|
45
|
cache = TreeCache() |
|
46
|
assert cache.get("foo.py", "abc") is None |
|
47
|
|
|
48
|
def test_put_and_get_roundtrip(self): |
|
49
|
cache = TreeCache() |
|
50
|
tree = _mock_tree() |
|
51
|
cache.put("foo.py", "abc123", tree) |
|
52
|
assert cache.get("foo.py", "abc123") is tree |
|
53
|
|
|
54
|
def test_get_miss_does_not_return_wrong_hash(self): |
|
55
|
cache = TreeCache() |
|
56
|
tree = _mock_tree() |
|
57
|
cache.put("foo.py", "hash-A", tree) |
|
58
|
assert cache.get("foo.py", "hash-B") is None |
|
59
|
|
|
60
|
def test_get_miss_does_not_return_wrong_path(self): |
|
61
|
cache = TreeCache() |
|
62
|
tree = _mock_tree() |
|
63
|
cache.put("foo.py", "hash-A", tree) |
|
64
|
assert cache.get("bar.py", "hash-A") is None |
|
65
|
|
|
66
|
def test_put_overwrites_existing_entry(self): |
|
67
|
cache = TreeCache() |
|
68
|
t1 = _mock_tree("t1") |
|
69
|
t2 = _mock_tree("t2") |
|
70
|
cache.put("foo.py", "abc", t1) |
|
71
|
cache.put("foo.py", "abc", t2) |
|
72
|
assert cache.get("foo.py", "abc") is t2 |
|
73
|
|
|
74
|
# ── LRU eviction ────────────────────────────────────────────────────────── |
|
75
|
|
|
76
|
def test_evicts_lru_entry_when_full(self): |
|
77
|
cache = TreeCache(max_size=2) |
|
78
|
t1 = _mock_tree("t1") |
|
79
|
t2 = _mock_tree("t2") |
|
80
|
t3 = _mock_tree("t3") |
|
81
|
|
|
82
|
cache.put("a.py", "1", t1) |
|
83
|
cache.put("b.py", "2", t2) |
|
84
|
# Cache is now full; inserting t3 should evict t1 (LRU). |
|
85
|
cache.put("c.py", "3", t3) |
|
86
|
|
|
87
|
assert cache.get("a.py", "1") is None |
|
88
|
assert cache.get("b.py", "2") is t2 |
|
89
|
assert cache.get("c.py", "3") is t3 |
|
90
|
|
|
91
|
def test_get_promotes_entry_so_it_is_not_evicted(self): |
|
92
|
cache = TreeCache(max_size=2) |
|
93
|
t1 = _mock_tree("t1") |
|
94
|
t2 = _mock_tree("t2") |
|
95
|
t3 = _mock_tree("t3") |
|
96
|
|
|
97
|
cache.put("a.py", "1", t1) |
|
98
|
cache.put("b.py", "2", t2) |
|
99
|
# Touch t1 so it becomes the most-recently used. |
|
100
|
cache.get("a.py", "1") |
|
101
|
# t2 is now the LRU; adding t3 should evict t2. |
|
102
|
cache.put("c.py", "3", t3) |
|
103
|
|
|
104
|
assert cache.get("a.py", "1") is t1 |
|
105
|
assert cache.get("b.py", "2") is None |
|
106
|
assert cache.get("c.py", "3") is t3 |
|
107
|
|
|
108
|
def test_size_respects_max_size(self): |
|
109
|
cache = TreeCache(max_size=3) |
|
110
|
for i in range(10): |
|
111
|
cache.put(f"file{i}.py", str(i), _mock_tree()) |
|
112
|
assert len(cache) <= 3 |
|
113
|
|
|
114
|
def test_constructor_rejects_zero_max_size(self): |
|
115
|
with pytest.raises(ValueError): |
|
116
|
TreeCache(max_size=0) |
|
117
|
|
|
118
|
# ── stats ────────────────────────────────────────────────────────────────── |
|
119
|
|
|
120
|
def test_stats_initial_state(self): |
|
121
|
cache = TreeCache() |
|
122
|
s = cache.stats() |
|
123
|
assert s["hits"] == 0 |
|
124
|
assert s["misses"] == 0 |
|
125
|
assert s["size"] == 0 |
|
126
|
|
|
127
|
def test_stats_records_hits(self): |
|
128
|
cache = TreeCache() |
|
129
|
cache.put("x.py", "h", _mock_tree()) |
|
130
|
cache.get("x.py", "h") |
|
131
|
cache.get("x.py", "h") |
|
132
|
assert cache.stats()["hits"] == 2 |
|
133
|
|
|
134
|
def test_stats_records_misses(self): |
|
135
|
cache = TreeCache() |
|
136
|
cache.get("x.py", "h") |
|
137
|
cache.get("y.py", "h") |
|
138
|
assert cache.stats()["misses"] == 2 |
|
139
|
|
|
140
|
def test_stats_size_tracks_entries(self): |
|
141
|
cache = TreeCache(max_size=10) |
|
142
|
cache.put("a.py", "1", _mock_tree()) |
|
143
|
cache.put("b.py", "2", _mock_tree()) |
|
144
|
assert cache.stats()["size"] == 2 |
|
145
|
|
|
146
|
def test_stats_max_size_reported(self): |
|
147
|
cache = TreeCache(max_size=42) |
|
148
|
assert cache.stats()["max_size"] == 42 |
|
149
|
|
|
150
|
# ── clear ────────────────────────────────────────────────────────────────── |
|
151
|
|
|
152
|
def test_clear_removes_all_entries(self): |
|
153
|
cache = TreeCache() |
|
154
|
cache.put("a.py", "1", _mock_tree()) |
|
155
|
cache.put("b.py", "2", _mock_tree()) |
|
156
|
cache.clear() |
|
157
|
assert len(cache) == 0 |
|
158
|
assert cache.get("a.py", "1") is None |
|
159
|
|
|
160
|
def test_clear_resets_stats(self): |
|
161
|
cache = TreeCache() |
|
162
|
cache.put("a.py", "1", _mock_tree()) |
|
163
|
cache.get("a.py", "1") |
|
164
|
cache.get("a.py", "bad") |
|
165
|
cache.clear() |
|
166
|
s = cache.stats() |
|
167
|
assert s["hits"] == 0 |
|
168
|
assert s["misses"] == 0 |
|
169
|
assert s["size"] == 0 |
|
170
|
|
|
171
|
# ── thread safety ────────────────────────────────────────────────────────── |
|
172
|
|
|
173
|
def test_concurrent_puts_do_not_corrupt_state(self): |
|
174
|
cache = TreeCache(max_size=50) |
|
175
|
errors = [] |
|
176
|
|
|
177
|
def writer(n: int) -> None: |
|
178
|
try: |
|
179
|
for i in range(20): |
|
180
|
cache.put(f"file{n}_{i}.py", str(i), _mock_tree()) |
|
181
|
except Exception as exc: # noqa: BLE001 |
|
182
|
errors.append(exc) |
|
183
|
|
|
184
|
threads = [threading.Thread(target=writer, args=(t,)) for t in range(5)] |
|
185
|
for t in threads: |
|
186
|
t.start() |
|
187
|
for t in threads: |
|
188
|
t.join() |
|
189
|
|
|
190
|
assert not errors |
|
191
|
assert len(cache) <= 50 |
|
192
|
|
|
193
|
|
|
194
|
# ── #43 — IncrementalParser ─────────────────────────────────────────────────── |
|
195
|
|
|
196
|
|
|
197
|
class TestIncrementalParser: |
|
198
|
def _make_language_and_parser(self): |
|
199
|
""" |
|
200
|
Return a fake tree-sitter Language object whose parser.parse() |
|
201
|
returns a fresh MagicMock tree. |
|
202
|
""" |
|
203
|
fake_tree = _mock_tree("parsed") |
|
204
|
fake_parser = MagicMock() |
|
205
|
fake_parser.parse.return_value = fake_tree |
|
206
|
fake_language = MagicMock() |
|
207
|
|
|
208
|
# Patch tree_sitter.Parser so IncrementalParser can instantiate it. |
|
209
|
mock_ts_parser = MagicMock() |
|
210
|
mock_ts_parser.parse.return_value = fake_tree |
|
211
|
mock_ts_class = MagicMock(return_value=mock_ts_parser) |
|
212
|
|
|
213
|
return fake_tree, mock_ts_parser, mock_ts_class, fake_language |
|
214
|
|
|
215
|
def test_parse_returns_tree(self): |
|
216
|
cache = TreeCache() |
|
217
|
inc = IncrementalParser(cache) |
|
218
|
|
|
219
|
fake_tree = _mock_tree() |
|
220
|
mock_ts_parser = MagicMock() |
|
221
|
mock_ts_parser.parse.return_value = fake_tree |
|
222
|
|
|
223
|
with patch("tree_sitter.Parser", return_value=mock_ts_parser): |
|
224
|
result = inc.parse(b"source", MagicMock(), "foo.py", "hash1") |
|
225
|
|
|
226
|
assert result is fake_tree |
|
227
|
|
|
228
|
def test_parse_stores_tree_in_cache(self): |
|
229
|
cache = TreeCache() |
|
230
|
inc = IncrementalParser(cache) |
|
231
|
|
|
232
|
fake_tree = _mock_tree() |
|
233
|
mock_ts_parser = MagicMock() |
|
234
|
mock_ts_parser.parse.return_value = fake_tree |
|
235
|
|
|
236
|
with patch("tree_sitter.Parser", return_value=mock_ts_parser): |
|
237
|
inc.parse(b"source", MagicMock(), "foo.py", "hash1") |
|
238
|
|
|
239
|
assert cache.get("foo.py", "hash1") is fake_tree |
|
240
|
|
|
241
|
def test_parse_returns_cached_tree_without_calling_parser(self): |
|
242
|
cached_tree = _mock_tree("cached") |
|
243
|
cache = TreeCache() |
|
244
|
cache.put("foo.py", "hash1", cached_tree) |
|
245
|
|
|
246
|
inc = IncrementalParser(cache) |
|
247
|
mock_ts_parser = MagicMock() |
|
248
|
|
|
249
|
with patch("tree_sitter.Parser", return_value=mock_ts_parser): |
|
250
|
result = inc.parse(b"source", MagicMock(), "foo.py", "hash1") |
|
251
|
|
|
252
|
assert result is cached_tree |
|
253
|
mock_ts_parser.parse.assert_not_called() |
|
254
|
|
|
255
|
def test_cache_hit_increments_hit_count(self): |
|
256
|
cache = TreeCache() |
|
257
|
tree = _mock_tree() |
|
258
|
cache.put("foo.py", "hashX", tree) |
|
259
|
|
|
260
|
inc = IncrementalParser(cache) |
|
261
|
with patch("tree_sitter.Parser", return_value=MagicMock()): |
|
262
|
inc.parse(b"src", MagicMock(), "foo.py", "hashX") |
|
263
|
|
|
264
|
assert cache.stats()["hits"] == 1 |
|
265
|
|
|
266
|
def test_parse_passes_old_tree_on_rehash(self): |
|
267
|
"""When a stale tree exists for the same path, it is passed as old_tree.""" |
|
268
|
cache = TreeCache() |
|
269
|
stale_tree = _mock_tree("stale") |
|
270
|
cache.put("bar.py", "old-hash", stale_tree) |
|
271
|
|
|
272
|
new_tree = _mock_tree("new") |
|
273
|
mock_ts_parser = MagicMock() |
|
274
|
mock_ts_parser.parse.return_value = new_tree |
|
275
|
|
|
276
|
inc = IncrementalParser(cache) |
|
277
|
with patch("tree_sitter.Parser", return_value=mock_ts_parser): |
|
278
|
result = inc.parse(b"new source", MagicMock(), "bar.py", "new-hash") |
|
279
|
|
|
280
|
assert result is new_tree |
|
281
|
# old_tree must have been passed as the second positional argument. |
|
282
|
mock_ts_parser.parse.assert_called_once_with(b"new source", stale_tree) |
|
283
|
|
|
284
|
def test_parse_without_old_tree_calls_parse_with_source_only(self): |
|
285
|
cache = TreeCache() |
|
286
|
new_tree = _mock_tree() |
|
287
|
mock_ts_parser = MagicMock() |
|
288
|
mock_ts_parser.parse.return_value = new_tree |
|
289
|
|
|
290
|
inc = IncrementalParser(cache) |
|
291
|
with patch("tree_sitter.Parser", return_value=mock_ts_parser): |
|
292
|
inc.parse(b"source", MagicMock(), "baz.py", "hash1") |
|
293
|
|
|
294
|
mock_ts_parser.parse.assert_called_once_with(b"source") |
|
295
|
|
|
296
|
def test_default_cache_is_created_if_none_given(self): |
|
297
|
inc = IncrementalParser() |
|
298
|
assert isinstance(inc.cache, TreeCache) |
|
299
|
|
|
300
|
def test_custom_cache_is_used(self): |
|
301
|
cache = TreeCache(max_size=5) |
|
302
|
inc = IncrementalParser(cache) |
|
303
|
assert inc.cache is cache |
|
304
|
|
|
305
|
def test_fallback_when_tree_sitter_not_importable(self): |
|
306
|
"""When tree_sitter is unavailable, language is used directly as parser.""" |
|
307
|
cache = TreeCache() |
|
308
|
fake_tree = _mock_tree() |
|
309
|
fake_language = MagicMock() |
|
310
|
fake_language.parse.return_value = fake_tree |
|
311
|
|
|
312
|
inc = IncrementalParser(cache) |
|
313
|
|
|
314
|
import builtins |
|
315
|
|
|
316
|
real_import = builtins.__import__ |
|
317
|
|
|
318
|
def _block_tree_sitter(name, *args, **kwargs): |
|
319
|
if name == "tree_sitter": |
|
320
|
raise ImportError("mocked absence") |
|
321
|
return real_import(name, *args, **kwargs) |
|
322
|
|
|
323
|
with patch("builtins.__import__", side_effect=_block_tree_sitter): |
|
324
|
result = inc.parse(b"source", fake_language, "x.py", "h1") |
|
325
|
|
|
326
|
assert result is fake_tree |
|
327
|
|
|
328
|
|
|
329
|
# ── #44 — GraphDiffer ───────────────────────────────────────────────────────── |
|
330
|
|
|
331
|
|
|
332
|
def _nd(label: str, name: str, line_start: int, **extra) -> NodeDescriptor: |
|
333
|
return NodeDescriptor(label=label, name=name, line_start=line_start, extra=extra) |
|
334
|
|
|
335
|
|
|
336
|
class TestNodeDescriptor: |
|
337
|
def test_identity_key(self): |
|
338
|
nd = _nd("Function", "foo", 10) |
|
339
|
assert nd.identity_key() == ("Function", "foo", 10) |
|
340
|
|
|
341
|
def test_equality_same(self): |
|
342
|
assert _nd("Function", "foo", 10) == _nd("Function", "foo", 10) |
|
343
|
|
|
344
|
def test_equality_different_line(self): |
|
345
|
assert _nd("Function", "foo", 10) != _nd("Function", "foo", 11) |
|
346
|
|
|
347
|
def test_equality_different_extra(self): |
|
348
|
a = _nd("Function", "foo", 10, docstring="hello") |
|
349
|
b = _nd("Function", "foo", 10, docstring="world") |
|
350
|
assert a != b |
|
351
|
|
|
352
|
|
|
353
|
class TestGraphDiffer: |
|
354
|
def test_diff_empty_new_and_empty_existing(self): |
|
355
|
store = _make_store(rows=[]) |
|
356
|
differ = GraphDiffer(store) |
|
357
|
result = differ.diff_file("src/app.py", []) |
|
358
|
assert result == DiffResult(added=0, modified=0, unchanged=0, removed=0) |
|
359
|
|
|
360
|
def test_diff_all_new_nodes(self): |
|
361
|
store = _make_store(rows=[]) |
|
362
|
differ = GraphDiffer(store) |
|
363
|
nodes = [ |
|
364
|
_nd("Function", "foo", 1), |
|
365
|
_nd("Class", "Bar", 10), |
|
366
|
] |
|
367
|
result = differ.diff_file("src/app.py", nodes) |
|
368
|
assert result.added == 2 |
|
369
|
assert result.modified == 0 |
|
370
|
assert result.unchanged == 0 |
|
371
|
assert result.removed == 0 |
|
372
|
|
|
373
|
def test_diff_all_unchanged_nodes(self): |
|
374
|
store = _make_store(rows=[ |
|
375
|
["Function", "foo", 1], |
|
376
|
["Class", "Bar", 10], |
|
377
|
]) |
|
378
|
differ = GraphDiffer(store) |
|
379
|
nodes = [ |
|
380
|
_nd("Function", "foo", 1), |
|
381
|
_nd("Class", "Bar", 10), |
|
382
|
] |
|
383
|
result = differ.diff_file("src/app.py", nodes) |
|
384
|
assert result.unchanged == 2 |
|
385
|
assert result.added == 0 |
|
386
|
assert result.modified == 0 |
|
387
|
assert result.removed == 0 |
|
388
|
|
|
389
|
def test_diff_modified_node(self): |
|
390
|
"""Same identity key but different extra props counts as modified.""" |
|
391
|
store = _make_store(rows=[["Function", "foo", 1]]) |
|
392
|
differ = GraphDiffer(store) |
|
393
|
# Existing node in store has no extra; new node has docstring. |
|
394
|
nodes = [_nd("Function", "foo", 1, docstring="now documented")] |
|
395
|
result = differ.diff_file("src/app.py", nodes) |
|
396
|
# The identity key matches but extra differs → modified. |
|
397
|
assert result.modified == 1 |
|
398
|
assert result.unchanged == 0 |
|
399
|
assert result.added == 0 |
|
400
|
|
|
401
|
def test_diff_removed_nodes(self): |
|
402
|
store = _make_store(rows=[ |
|
403
|
["Function", "foo", 1], |
|
404
|
["Function", "bar", 5], |
|
405
|
]) |
|
406
|
differ = GraphDiffer(store) |
|
407
|
# Only foo is present in new parse; bar was removed. |
|
408
|
nodes = [_nd("Function", "foo", 1)] |
|
409
|
result = differ.diff_file("src/app.py", nodes) |
|
410
|
assert result.removed == 1 |
|
411
|
assert result.unchanged == 1 |
|
412
|
|
|
413
|
def test_diff_mixed_scenario(self): |
|
414
|
store = _make_store(rows=[ |
|
415
|
["Function", "old_func", 1], |
|
416
|
["Class", "MyClass", 20], |
|
417
|
]) |
|
418
|
differ = GraphDiffer(store) |
|
419
|
new_nodes = [ |
|
420
|
_nd("Class", "MyClass", 20), # unchanged |
|
421
|
_nd("Function", "new_func", 5), # added |
|
422
|
] |
|
423
|
result = differ.diff_file("src/app.py", new_nodes) |
|
424
|
assert result.unchanged == 1 |
|
425
|
assert result.added == 1 |
|
426
|
assert result.removed == 1 |
|
427
|
assert result.modified == 0 |
|
428
|
|
|
429
|
def test_diff_skips_rows_with_none_name(self): |
|
430
|
store = _make_store(rows=[[None, None, None]]) |
|
431
|
differ = GraphDiffer(store) |
|
432
|
result = differ.diff_file("src/app.py", [_nd("Function", "foo", 1)]) |
|
433
|
# The None row is skipped; foo is treated as a new node. |
|
434
|
assert result.added == 1 |
|
435
|
assert result.removed == 0 |
|
436
|
|
|
437
|
def test_total_changes_property(self): |
|
438
|
result = DiffResult(added=3, modified=1, unchanged=5, removed=2) |
|
439
|
assert result.total_changes == 6 |
|
440
|
|
|
441
|
def test_store_is_queried_with_file_path(self): |
|
442
|
store = _make_store(rows=[]) |
|
443
|
differ = GraphDiffer(store) |
|
444
|
differ.diff_file("src/models.py", []) |
|
445
|
# Ensure the store was actually queried with the right path param. |
|
446
|
store.query.assert_called_once() |
|
447
|
_, kwargs_or_positional = store.query.call_args[0], store.query.call_args |
|
448
|
# The second positional arg to store.query should contain file_path. |
|
449
|
call_params = store.query.call_args[0][1] |
|
450
|
assert call_params["file_path"] == "src/models.py" |
|
451
|
|
|
452
|
|
|
453
|
# ── #45 — ParallelIngester ──────────────────────────────────────────────────── |
|
454
|
|
|
455
|
|
|
456
|
class TestParallelIngester: |
|
457
|
def _setup_ingester_with_mock_parser(self, store, parse_result=None): |
|
458
|
""" |
|
459
|
Return a ParallelIngester whose internal RepoIngester has a mock |
|
460
|
Python parser installed. |
|
461
|
""" |
|
462
|
if parse_result is None: |
|
463
|
parse_result = {"functions": 2, "classes": 1, "edges": 3} |
|
464
|
|
|
465
|
ingester = ParallelIngester(store) |
|
466
|
mock_parser = MagicMock() |
|
467
|
mock_parser.parse_file.return_value = parse_result |
|
468
|
ingester._ingester._parsers["python"] = mock_parser |
|
469
|
return ingester, mock_parser |
|
470
|
|
|
471
|
def test_raises_on_missing_dir(self): |
|
472
|
store = _make_store() |
|
473
|
ingester = ParallelIngester(store) |
|
474
|
with pytest.raises(FileNotFoundError): |
|
475
|
ingester.ingest_parallel("/nonexistent/path") |
|
476
|
|
|
477
|
def test_returns_stats_dict_with_all_keys(self): |
|
478
|
store = _make_store() |
|
479
|
ingester, _ = self._setup_ingester_with_mock_parser(store) |
|
480
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
481
|
stats = ingester.ingest_parallel(tmpdir) |
|
482
|
assert {"files", "functions", "classes", "edges", "skipped", "errors"} <= set(stats) |
|
483
|
|
|
484
|
def test_processes_single_file(self): |
|
485
|
store = _make_store() |
|
486
|
ingester, mock_parser = self._setup_ingester_with_mock_parser( |
|
487
|
store, {"functions": 3, "classes": 1, "edges": 4} |
|
488
|
) |
|
489
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
490
|
(Path(tmpdir) / "app.py").write_text("def foo(): pass") |
|
491
|
stats = ingester.ingest_parallel(tmpdir) |
|
492
|
|
|
493
|
assert stats["files"] == 1 |
|
494
|
assert stats["functions"] == 3 |
|
495
|
assert stats["classes"] == 1 |
|
496
|
assert stats["edges"] == 4 |
|
497
|
assert stats["errors"] == 0 |
|
498
|
|
|
499
|
def test_processes_multiple_files_concurrently(self): |
|
500
|
store = _make_store() |
|
501
|
ingester, mock_parser = self._setup_ingester_with_mock_parser( |
|
502
|
store, {"functions": 1, "classes": 0, "edges": 0} |
|
503
|
) |
|
504
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
505
|
for i in range(5): |
|
506
|
(Path(tmpdir) / f"mod{i}.py").write_text(f"def f{i}(): pass") |
|
507
|
stats = ingester.ingest_parallel(tmpdir, max_workers=3) |
|
508
|
|
|
509
|
assert stats["files"] == 5 |
|
510
|
assert stats["functions"] == 5 |
|
511
|
assert stats["errors"] == 0 |
|
512
|
|
|
513
|
def test_aggregates_stats_across_files(self): |
|
514
|
store = _make_store() |
|
515
|
ingester, _ = self._setup_ingester_with_mock_parser( |
|
516
|
store, {"functions": 2, "classes": 1, "edges": 5} |
|
517
|
) |
|
518
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
519
|
(Path(tmpdir) / "a.py").write_text("x=1") |
|
520
|
(Path(tmpdir) / "b.py").write_text("y=2") |
|
521
|
stats = ingester.ingest_parallel(tmpdir) |
|
522
|
|
|
523
|
assert stats["files"] == 2 |
|
524
|
assert stats["functions"] == 4 |
|
525
|
assert stats["classes"] == 2 |
|
526
|
assert stats["edges"] == 10 |
|
527
|
|
|
528
|
def test_clear_flag_calls_store_clear(self): |
|
529
|
store = _make_store() |
|
530
|
ingester, _ = self._setup_ingester_with_mock_parser(store) |
|
531
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
532
|
ingester.ingest_parallel(tmpdir, clear=True) |
|
533
|
store.clear.assert_called_once() |
|
534
|
|
|
535
|
def test_no_clear_by_default(self): |
|
536
|
store = _make_store() |
|
537
|
ingester, _ = self._setup_ingester_with_mock_parser(store) |
|
538
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
539
|
ingester.ingest_parallel(tmpdir) |
|
540
|
store.clear.assert_not_called() |
|
541
|
|
|
542
|
def test_empty_repo_returns_zero_counts(self): |
|
543
|
store = _make_store() |
|
544
|
ingester, _ = self._setup_ingester_with_mock_parser(store) |
|
545
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
546
|
stats = ingester.ingest_parallel(tmpdir) |
|
547
|
assert stats["files"] == 0 |
|
548
|
assert stats["functions"] == 0 |
|
549
|
|
|
550
|
def test_parser_exception_increments_errors_not_files(self): |
|
551
|
store = _make_store() |
|
552
|
ingester = ParallelIngester(store) |
|
553
|
broken_parser = MagicMock() |
|
554
|
broken_parser.parse_file.side_effect = RuntimeError("boom") |
|
555
|
ingester._ingester._parsers["python"] = broken_parser |
|
556
|
|
|
557
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
558
|
(Path(tmpdir) / "broken.py").write_text("def x(): pass") |
|
559
|
stats = ingester.ingest_parallel(tmpdir) |
|
560
|
|
|
561
|
assert stats["files"] == 0 |
|
562
|
assert stats["errors"] == 1 |
|
563
|
|
|
564
|
def test_incremental_skips_unchanged_files(self): |
|
565
|
store = _make_store() |
|
566
|
ingester, mock_parser = self._setup_ingester_with_mock_parser(store) |
|
567
|
ingester._ingester._file_unchanged = MagicMock(return_value=True) |
|
568
|
|
|
569
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
570
|
(Path(tmpdir) / "unchanged.py").write_text("x=1") |
|
571
|
stats = ingester.ingest_parallel(tmpdir, incremental=True) |
|
572
|
|
|
573
|
assert stats["skipped"] == 1 |
|
574
|
assert stats["files"] == 0 |
|
575
|
mock_parser.parse_file.assert_not_called() |
|
576
|
|
|
577
|
def test_creates_repository_node(self): |
|
578
|
store = _make_store() |
|
579
|
ingester, _ = self._setup_ingester_with_mock_parser(store) |
|
580
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
581
|
ingester.ingest_parallel(tmpdir) |
|
582
|
|
|
583
|
from navegador.graph.schema import NodeLabel |
|
584
|
|
|
585
|
store.create_node.assert_called_once() |
|
586
|
label, props = store.create_node.call_args[0] |
|
587
|
assert label == NodeLabel.Repository |
|
588
|
assert "name" in props and "path" in props |
|
589
|
|
|
590
|
def test_max_workers_none_uses_default(self): |
|
591
|
"""Passing max_workers=None should not raise.""" |
|
592
|
store = _make_store() |
|
593
|
ingester, _ = self._setup_ingester_with_mock_parser(store) |
|
594
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
595
|
stats = ingester.ingest_parallel(tmpdir, max_workers=None) |
|
596
|
assert isinstance(stats, dict) |
|
597
|
|
|
598
|
def test_skips_non_python_files(self): |
|
599
|
store = _make_store() |
|
600
|
ingester, mock_parser = self._setup_ingester_with_mock_parser(store) |
|
601
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
602
|
(Path(tmpdir) / "readme.md").write_text("# readme") |
|
603
|
(Path(tmpdir) / "config.yaml").write_text("key: value") |
|
604
|
stats = ingester.ingest_parallel(tmpdir) |
|
605
|
assert stats["files"] == 0 |
|
606
|
mock_parser.parse_file.assert_not_called() |
|
607
|
|