Navegador

navegador / tests / test_churn.py
Blame History Raw 462 lines
1
"""Tests for navegador.churn — ChurnAnalyzer and the `churn` CLI command."""
2
3
from __future__ import annotations
4
5
import json
6
from pathlib import Path
7
from unittest.mock import MagicMock, patch
8
9
import pytest
10
from click.testing import CliRunner
11
12
from navegador.churn import ChurnAnalyzer, ChurnEntry, CouplingPair
13
from navegador.cli.commands import main
14
15
16
# ── Helpers ───────────────────────────────────────────────────────────────────
17
18
# Fake git log --format=%H --name-only output
19
# Three commits (all-hex 40-char hashes):
20
# aaaa... touches a.py, b.py
21
# bbbb... touches b.py, c.py
22
# cccc... touches a.py, b.py, c.py
23
GIT_LOG_NAME_ONLY = """\
24
aaaa111111111111111111111111111111111111
25
26
a.py
27
b.py
28
bbbb222222222222222222222222222222222222
29
30
b.py
31
c.py
32
cccc333333333333333333333333333333333333
33
34
a.py
35
b.py
36
c.py
37
"""
38
39
# Fake git log --numstat --format= output
40
GIT_LOG_NUMSTAT = """\
41
10\t2\ta.py
42
5\t1\tb.py
43
3\t0\tb.py
44
2\t2\tc.py
45
8\t1\ta.py
46
4\t1\tb.py
47
1\t1\tc.py
48
"""
49
50
51
def _make_analyzer(tmp_path: Path) -> ChurnAnalyzer:
52
"""Return a ChurnAnalyzer pointed at a temp dir (git not required)."""
53
return ChurnAnalyzer(tmp_path, limit=500)
54
55
56
def _mock_run(name_only_output: str = GIT_LOG_NAME_ONLY,
57
numstat_output: str = GIT_LOG_NUMSTAT):
58
"""Return a side_effect function for ChurnAnalyzer._run that dispatches
59
on the git args list."""
60
61
def _side_effect(args: list[str]) -> str:
62
if "--name-only" in args:
63
return name_only_output
64
if "--numstat" in args:
65
return numstat_output
66
return ""
67
68
return _side_effect
69
70
71
# ── ChurnEntry / CouplingPair dataclasses ─────────────────────────────────────
72
73
74
class TestDataclasses:
75
def test_churn_entry_fields(self):
76
e = ChurnEntry(file_path="foo.py", commit_count=5, lines_changed=100)
77
assert e.file_path == "foo.py"
78
assert e.commit_count == 5
79
assert e.lines_changed == 100
80
81
def test_coupling_pair_fields(self):
82
p = CouplingPair(file_a="a.py", file_b="b.py", co_change_count=3, confidence=0.75)
83
assert p.file_a == "a.py"
84
assert p.file_b == "b.py"
85
assert p.co_change_count == 3
86
assert p.confidence == 0.75
87
88
89
# ── file_churn ────────────────────────────────────────────────────────────────
90
91
92
class TestFileChurn:
93
def test_returns_list_of_churn_entries(self, tmp_path):
94
analyzer = _make_analyzer(tmp_path)
95
with patch.object(analyzer, "_run", side_effect=_mock_run()):
96
result = analyzer.file_churn()
97
assert isinstance(result, list)
98
assert all(isinstance(e, ChurnEntry) for e in result)
99
100
def test_commit_counts_are_correct(self, tmp_path):
101
analyzer = _make_analyzer(tmp_path)
102
with patch.object(analyzer, "_run", side_effect=_mock_run()):
103
result = analyzer.file_churn()
104
105
counts = {e.file_path: e.commit_count for e in result}
106
# a.py: commits abc + ghi = 2
107
assert counts["a.py"] == 2
108
# b.py: commits abc + def + ghi = 3
109
assert counts["b.py"] == 3
110
# c.py: commits def + ghi = 2
111
assert counts["c.py"] == 2
112
113
def test_sorted_by_commit_count_descending(self, tmp_path):
114
analyzer = _make_analyzer(tmp_path)
115
with patch.object(analyzer, "_run", side_effect=_mock_run()):
116
result = analyzer.file_churn()
117
counts = [e.commit_count for e in result]
118
assert counts == sorted(counts, reverse=True)
119
120
def test_lines_changed_aggregated(self, tmp_path):
121
analyzer = _make_analyzer(tmp_path)
122
with patch.object(analyzer, "_run", side_effect=_mock_run()):
123
result = analyzer.file_churn()
124
by_file = {e.file_path: e.lines_changed for e in result}
125
# a.py: (10+2) + (8+1) = 21
126
assert by_file["a.py"] == 21
127
# b.py: (5+1) + (3+0) + (4+1) = 14
128
assert by_file["b.py"] == 14
129
# c.py: (2+2) + (1+1) = 6
130
assert by_file["c.py"] == 6
131
132
def test_empty_git_output_returns_empty_list(self, tmp_path):
133
analyzer = _make_analyzer(tmp_path)
134
with patch.object(analyzer, "_run", return_value=""):
135
result = analyzer.file_churn()
136
assert result == []
137
138
def test_binary_files_skipped_in_lines_changed(self, tmp_path):
139
numstat_with_binary = "-\t-\timage.png\n10\t2\ta.py\n"
140
analyzer = _make_analyzer(tmp_path)
141
with patch.object(
142
analyzer, "_run",
143
side_effect=_mock_run(numstat_output=numstat_with_binary)
144
):
145
result = analyzer.file_churn()
146
by_file = {e.file_path: e.lines_changed for e in result}
147
# Binary file should not cause a crash; a.py lines should still be counted
148
assert by_file.get("a.py", 0) == 12
149
150
151
# ── coupling_pairs ────────────────────────────────────────────────────────────
152
153
154
class TestCouplingPairs:
155
def test_returns_list_of_coupling_pairs(self, tmp_path):
156
analyzer = _make_analyzer(tmp_path)
157
with patch.object(analyzer, "_run", side_effect=_mock_run()):
158
result = analyzer.coupling_pairs(min_co_changes=1, min_confidence=0.0)
159
assert isinstance(result, list)
160
assert all(isinstance(p, CouplingPair) for p in result)
161
162
def test_ab_pair_co_change_count(self, tmp_path):
163
"""a.py and b.py appear together in commits abc and ghi → co_change=2."""
164
analyzer = _make_analyzer(tmp_path)
165
with patch.object(analyzer, "_run", side_effect=_mock_run()):
166
result = analyzer.coupling_pairs(min_co_changes=1, min_confidence=0.0)
167
pairs_by_key = {(p.file_a, p.file_b): p for p in result}
168
ab = pairs_by_key.get(("a.py", "b.py"))
169
assert ab is not None
170
assert ab.co_change_count == 2
171
172
def test_bc_pair_co_change_count(self, tmp_path):
173
"""b.py and c.py appear together in commits def and ghi → co_change=2."""
174
analyzer = _make_analyzer(tmp_path)
175
with patch.object(analyzer, "_run", side_effect=_mock_run()):
176
result = analyzer.coupling_pairs(min_co_changes=1, min_confidence=0.0)
177
pairs_by_key = {(p.file_a, p.file_b): p for p in result}
178
bc = pairs_by_key.get(("b.py", "c.py"))
179
assert bc is not None
180
assert bc.co_change_count == 2
181
182
def test_confidence_formula(self, tmp_path):
183
"""confidence = co_change_count / max(changes_a, changes_b)."""
184
analyzer = _make_analyzer(tmp_path)
185
with patch.object(analyzer, "_run", side_effect=_mock_run()):
186
result = analyzer.coupling_pairs(min_co_changes=1, min_confidence=0.0)
187
pairs_by_key = {(p.file_a, p.file_b): p for p in result}
188
# a.py: 2 commits, b.py: 3 commits, co=2 → 2/3 ≈ 0.6667
189
ab = pairs_by_key[("a.py", "b.py")]
190
assert abs(ab.confidence - round(2 / 3, 4)) < 0.001
191
192
def test_min_co_changes_filter(self, tmp_path):
193
analyzer = _make_analyzer(tmp_path)
194
with patch.object(analyzer, "_run", side_effect=_mock_run()):
195
# All pairs have co_change ≤ 2, so requesting ≥ 3 returns nothing
196
result = analyzer.coupling_pairs(min_co_changes=3, min_confidence=0.0)
197
assert result == []
198
199
def test_min_confidence_filter(self, tmp_path):
200
# Commit breakdown:
201
# aaaa: a.py, b.py
202
# bbbb: b.py, c.py
203
# cccc: a.py, b.py, c.py
204
#
205
# commit counts: a=2, b=3, c=2
206
# (a,b): co=2 → confidence=2/3≈0.667
207
# (a,c): co=1 → confidence=1/2=0.5
208
# (b,c): co=2 → confidence=2/3≈0.667
209
#
210
# At min_confidence=0.6: a/b and b/c pass; a/c does not.
211
analyzer = _make_analyzer(tmp_path)
212
with patch.object(analyzer, "_run", side_effect=_mock_run()):
213
result = analyzer.coupling_pairs(min_co_changes=1, min_confidence=0.6)
214
pairs_by_key = {(p.file_a, p.file_b): p for p in result}
215
assert ("a.py", "b.py") in pairs_by_key
216
assert ("b.py", "c.py") in pairs_by_key
217
# a/c has confidence=0.5, below threshold
218
assert ("a.py", "c.py") not in pairs_by_key
219
220
def test_sorted_by_co_change_count_descending(self, tmp_path):
221
analyzer = _make_analyzer(tmp_path)
222
with patch.object(analyzer, "_run", side_effect=_mock_run()):
223
result = analyzer.coupling_pairs(min_co_changes=1, min_confidence=0.0)
224
counts = [p.co_change_count for p in result]
225
assert counts == sorted(counts, reverse=True)
226
227
def test_empty_history_returns_empty_list(self, tmp_path):
228
analyzer = _make_analyzer(tmp_path)
229
with patch.object(analyzer, "_run", return_value=""):
230
result = analyzer.coupling_pairs()
231
assert result == []
232
233
def test_single_file_per_commit_no_pairs(self, tmp_path):
234
"""Commits touching only one file produce no coupling pairs."""
235
log = (
236
"abc1111111111111111111111111111111111111\n\na.py\n"
237
"def2222222222222222222222222222222222222\n\nb.py\n"
238
)
239
analyzer = _make_analyzer(tmp_path)
240
with patch.object(analyzer, "_run", side_effect=_mock_run(name_only_output=log)):
241
result = analyzer.coupling_pairs(min_co_changes=1, min_confidence=0.0)
242
assert result == []
243
244
245
# ── store_churn ───────────────────────────────────────────────────────────────
246
247
248
class TestStoreChurn:
249
def _make_store(self):
250
store = MagicMock()
251
store.query.return_value = MagicMock(
252
nodes_modified=1, properties_set=2
253
)
254
return store
255
256
def test_returns_dict_with_expected_keys(self, tmp_path):
257
analyzer = _make_analyzer(tmp_path)
258
store = self._make_store()
259
with patch.object(analyzer, "_run", side_effect=_mock_run()):
260
result = analyzer.store_churn(store)
261
assert "churn_updated" in result
262
assert "couplings_written" in result
263
264
def test_churn_updated_count(self, tmp_path):
265
analyzer = _make_analyzer(tmp_path)
266
store = self._make_store()
267
with patch.object(analyzer, "_run", side_effect=_mock_run()):
268
result = analyzer.store_churn(store)
269
# Three unique files → 3 churn updates
270
assert result["churn_updated"] == 3
271
272
def test_store_query_called_for_each_file(self, tmp_path):
273
analyzer = _make_analyzer(tmp_path)
274
store = self._make_store()
275
with patch.object(analyzer, "_run", side_effect=_mock_run()):
276
analyzer.store_churn(store)
277
# store.query must have been called at least 3 times (one per file)
278
assert store.query.call_count >= 3
279
280
def test_coupled_with_edges_written(self, tmp_path):
281
analyzer = _make_analyzer(tmp_path)
282
store = self._make_store()
283
with patch.object(analyzer, "_run", side_effect=_mock_run()):
284
result = analyzer.store_churn(store)
285
# Default thresholds: min_co_changes=3, min_confidence=0.5
286
# In our fixture all pairs have co_change ≤ 2, so couplings_written == 0
287
assert isinstance(result["couplings_written"], int)
288
289
def test_coupled_with_edges_written_low_threshold(self, tmp_path):
290
"""With relaxed thresholds coupling edges should be written."""
291
analyzer = _make_analyzer(tmp_path)
292
store = self._make_store()
293
# Override coupling_pairs to always return pairs
294
fake_pairs = [
295
CouplingPair("a.py", "b.py", co_change_count=2, confidence=0.67),
296
]
297
with patch.object(analyzer, "_run", side_effect=_mock_run()), \
298
patch.object(analyzer, "coupling_pairs", return_value=fake_pairs):
299
result = analyzer.store_churn(store)
300
assert result["couplings_written"] == 1
301
302
def test_cypher_contains_coupled_with(self, tmp_path):
303
"""Verify the Cypher for edges references COUPLED_WITH."""
304
analyzer = _make_analyzer(tmp_path)
305
store = self._make_store()
306
fake_pairs = [CouplingPair("a.py", "b.py", co_change_count=5, confidence=0.8)]
307
with patch.object(analyzer, "_run", side_effect=_mock_run()), \
308
patch.object(analyzer, "coupling_pairs", return_value=fake_pairs):
309
analyzer.store_churn(store)
310
311
all_cypher_calls = [call[0][0] for call in store.query.call_args_list]
312
edge_cyphers = [c for c in all_cypher_calls if "COUPLED_WITH" in c]
313
assert len(edge_cyphers) == 1
314
315
316
# ── CLI command ───────────────────────────────────────────────────────────────
317
318
319
class TestChurnCLI:
320
def _analyzer_patch(self, churn_entries=None, pairs=None):
321
"""Return a context manager that patches ChurnAnalyzer in the CLI module."""
322
if churn_entries is None:
323
churn_entries = [
324
ChurnEntry("foo.py", commit_count=5, lines_changed=100),
325
ChurnEntry("bar.py", commit_count=3, lines_changed=40),
326
]
327
if pairs is None:
328
pairs = [
329
CouplingPair("bar.py", "foo.py", co_change_count=3, confidence=0.6),
330
]
331
332
mock_analyzer = MagicMock()
333
mock_analyzer.file_churn.return_value = churn_entries
334
mock_analyzer.coupling_pairs.return_value = pairs
335
336
return patch("navegador.churn.ChurnAnalyzer", return_value=mock_analyzer)
337
338
def test_basic_invocation_exits_zero(self, tmp_path):
339
runner = CliRunner()
340
with runner.isolated_filesystem():
341
with self._analyzer_patch():
342
result = runner.invoke(main, ["churn", str(tmp_path)])
343
assert result.exit_code == 0, result.output
344
345
def test_json_output_has_expected_keys(self, tmp_path):
346
runner = CliRunner()
347
with runner.isolated_filesystem():
348
with self._analyzer_patch():
349
result = runner.invoke(main, ["churn", str(tmp_path), "--json"])
350
assert result.exit_code == 0, result.output
351
data = json.loads(result.output)
352
assert "churn" in data
353
assert "coupling_pairs" in data
354
355
def test_json_churn_entry_shape(self, tmp_path):
356
runner = CliRunner()
357
with runner.isolated_filesystem():
358
with self._analyzer_patch():
359
result = runner.invoke(main, ["churn", str(tmp_path), "--json"])
360
data = json.loads(result.output)
361
entry = data["churn"][0]
362
assert "file_path" in entry
363
assert "commit_count" in entry
364
assert "lines_changed" in entry
365
366
def test_json_coupling_pair_shape(self, tmp_path):
367
runner = CliRunner()
368
with runner.isolated_filesystem():
369
with self._analyzer_patch():
370
result = runner.invoke(main, ["churn", str(tmp_path), "--json"])
371
data = json.loads(result.output)
372
pair = data["coupling_pairs"][0]
373
assert "file_a" in pair
374
assert "file_b" in pair
375
assert "co_change_count" in pair
376
assert "confidence" in pair
377
378
def test_limit_option_passed_to_analyzer(self, tmp_path):
379
runner = CliRunner()
380
mock_cls = MagicMock()
381
mock_instance = MagicMock()
382
mock_instance.file_churn.return_value = []
383
mock_instance.coupling_pairs.return_value = []
384
mock_cls.return_value = mock_instance
385
386
with runner.isolated_filesystem():
387
with patch("navegador.churn.ChurnAnalyzer", mock_cls):
388
runner.invoke(main, ["churn", str(tmp_path), "--limit", "100"])
389
390
_, kwargs = mock_cls.call_args
391
assert kwargs.get("limit") == 100 or mock_cls.call_args[0][1] == 100
392
393
def test_min_confidence_passed_to_coupling_pairs(self, tmp_path):
394
runner = CliRunner()
395
mock_cls = MagicMock()
396
mock_instance = MagicMock()
397
mock_instance.file_churn.return_value = []
398
mock_instance.coupling_pairs.return_value = []
399
mock_cls.return_value = mock_instance
400
401
with runner.isolated_filesystem():
402
with patch("navegador.churn.ChurnAnalyzer", mock_cls):
403
runner.invoke(main, ["churn", str(tmp_path), "--min-confidence", "0.8"])
404
405
mock_instance.coupling_pairs.assert_called_once()
406
_, kwargs = mock_instance.coupling_pairs.call_args
407
assert kwargs.get("min_confidence") == 0.8
408
409
def test_store_flag_calls_store_churn(self, tmp_path):
410
runner = CliRunner()
411
mock_cls = MagicMock()
412
mock_instance = MagicMock()
413
mock_instance.store_churn.return_value = {
414
"churn_updated": 2,
415
"couplings_written": 1,
416
}
417
mock_cls.return_value = mock_instance
418
419
with runner.isolated_filesystem():
420
with patch("navegador.churn.ChurnAnalyzer", mock_cls), \
421
patch("navegador.cli.commands._get_store", return_value=MagicMock()):
422
result = runner.invoke(main, ["churn", str(tmp_path), "--store"])
423
424
assert result.exit_code == 0, result.output
425
mock_instance.store_churn.assert_called_once()
426
427
def test_store_json_flag_outputs_stats(self, tmp_path):
428
runner = CliRunner()
429
mock_cls = MagicMock()
430
mock_instance = MagicMock()
431
mock_instance.store_churn.return_value = {
432
"churn_updated": 5,
433
"couplings_written": 2,
434
}
435
mock_cls.return_value = mock_instance
436
437
with runner.isolated_filesystem():
438
with patch("navegador.churn.ChurnAnalyzer", mock_cls), \
439
patch("navegador.cli.commands._get_store", return_value=MagicMock()):
440
result = runner.invoke(main, ["churn", str(tmp_path), "--store", "--json"])
441
442
assert result.exit_code == 0, result.output
443
data = json.loads(result.output)
444
assert data["churn_updated"] == 5
445
assert data["couplings_written"] == 2
446
447
def test_no_pairs_shows_message(self, tmp_path):
448
runner = CliRunner()
449
with runner.isolated_filesystem():
450
with self._analyzer_patch(pairs=[]):
451
result = runner.invoke(main, ["churn", str(tmp_path)])
452
assert result.exit_code == 0
453
assert "No coupling pairs found" in result.output
454
455
def test_table_output_contains_file_names(self, tmp_path):
456
runner = CliRunner()
457
with runner.isolated_filesystem():
458
with self._analyzer_patch():
459
result = runner.invoke(main, ["churn", str(tmp_path)])
460
assert "foo.py" in result.output
461
assert "bar.py" in result.output
462

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button