Navegador

feat: git diff integration — map uncommitted changes to affected flows and knowledge DiffAnalyzer maps changed lines to graph symbols and follows knowledge edges. CLI: navegador diff [--format json|markdown]. Closes #15

lmata 2026-03-23 05:14 trunk

Commit b853fc008f2929086bcd1c73c19a94e9047084ec95789148503f2a99715347e9

Parent 8c1f142eaa6ab94…

2 files changed +282 +577

+ navegador/diff.py + tests/test_diff.py

A navegador/diff.py

+282

		--- a/navegador/diff.py
		+++ b/navegador/diff.py
		@@ -0,0 +1,282 @@
	1	+"""
	2	+DiffAnalyzer — maps git diff output to affected knowledge-graph nodes.
	3	+
	4	+Given uncommitted changes in a repository, this module tells you:
	5	+ - which files changed
	6	+ - which functions/classes/methods fall within the changed line ranges
	7	+ - which knowledge nodes (Concept, Rule, Decision, WikiPage) are linked
	8	+ to those symbols via ANNOTATES, IMPLEMENTS, or GOVERNS edges
	9	+ - a single impact_summary dict bundling all of the above
	10	+
	11	+Usage::
	12	+
	13	+ from pathlib import Path
	14	+ from navegador.diff import DiffAnalyzer
	15	+ from navegador.graph.store import GraphStore
	16	+
	17	+ store = GraphStore.sqlite(".navegador/graph.db")
	18	+ analyzer = DiffAnalyzer(store, Path("."))
	19	+ print(analyzer.impact_summary())
	20	+"""
	21	+
	22	+from __future__ import annotations
	23	+
	24	+import json
	25	+import subprocess
	26	+from pathlib import Path
	27	+from typing import Any
	28	+
	29	+from navegador.graph.store import GraphStore
	30	+from navegador.vcs import GitAdapter
	31	+
	32	+
	33	+# ── Cypher helpers ────────────────────────────────────────────────────────────
	34	+
	35	+# All symbols (Function / Class / Method) in a given file with their line ranges
	36	+_SYMBOLS_IN_FILE = """
	37	+MATCH (n)
	38	+WHERE (n:Function OR n:Class OR n:Method)
	39	+ AND n.file_path = $file_path
	40	+RETURN labels(n)[0] AS type,
	41	+ n.name AS name,
	42	+ n.file_path AS file_path,
	43	+ n.line_start AS line_start,
	44	+ n.line_end AS line_end
	45	+ORDER BY n.line_start
	46	+"""
	47	+
	48	+# Knowledge nodes reachable from a code symbol in one hop via cross-layer edges.
	49	+# Handles:
	50	+# (symbol)-[:ANNOTATES\|IMPLEMENTS]->(knowledge) — code → knowledge
	51	+# (knowledge)-[:GOVERNS\|ANNOTATES]->(symbol) — knowledge → code
	52	+_KNOWLEDGE_FOR_SYMBOL = """
	53	+MATCH (sym)
	54	+WHERE (sym:Function OR sym:Class OR sym:Method)
	55	+ AND sym.name = $name
	56	+ AND sym.file_path = $file_path
	57	+OPTIONAL MATCH (sym)-[:ANNOTATES\|IMPLEMENTS]->(k1)
	58	+ WHERE (k1:Concept OR k1:Rule OR k1:Decision OR k1:WikiPage)
	59	+OPTIONAL MATCH (k2)-[:GOVERNS\|ANNOTATES]->(sym)
	60	+ WHERE (k2:Concept OR k2:Rule OR k2:Decision OR k2:WikiPage)
	61	+WITH collect(DISTINCT k1) + collect(DISTINCT k2) AS knowledge_nodes
	62	+UNWIND knowledge_nodes AS k
	63	+RETURN DISTINCT
	64	+ labels(k)[0] AS type,
	65	+ k.name AS name,
	66	+ coalesce(k.description, '') AS description,
	67	+ coale
	68	+ (current_new_start, endhanges in a repository"""
	69	+DiffAnalyzer — maps git diff output to affected knowledge-graph nodes.
	70	+
	71	+Given uncommitted changes in a repository, this module tells you:
	72	+ - which files changed
	73	+ - which functions/classes/methods fall within the changed line ranges
	74	+ - which knowledge nodes (Concept, Rule, Decision, WikiPage) are linked
	75	+ to those symbols via ANNOTATES, IMPLEMENTS, or GOVERNS edges
	76	+ - a single impact_summary dict bundling all of the above
	77	+
	78	+Usage::
	79	+
	80	+ from pathlib import Path
	81	+ from navegador.diff import DiffAnalyzer
	82	+ from navegador.graph.store import GraphStore
	83	+
	84	+ store = GraphStore.sqlite(".navegador/graph.db")
	85	+ analyzer = DiffAnalyzer(store, Path("."))
	86	+ print(analyzer.impact_summary())
	87	+"""
	88	+
	89	+from __future__ import annotations
	90	+
	91	+import json
	92	+import subprocess
	93	+from pathlib import Path
	94	+from typing import Any
	95	+
	96	+from navegador.graph.store import GraphStore
	97	+from navegador.vcs import GitAdapter
	98	+
	99	+# ── Cypher helpers ────────────────────────────────────────────────────────────
	100	+
	101	+# All symbols (Function / Class / Method) in a given file with their line ranges
	102	+_SYMBOLS_IN_FILE = """
	103	+MATCH (n)
	104	+WHERE (n:Function OR n:Class OR n:Method)
	105	+ AND n.file_path = $file_path
	106	+RETURN labels(n)[0] AS type,
	107	+ n.name AS name,
	108	+ n.file_path AS file_path,
	109	+ n.line_start AS line_start,
	110	+ n.line_end AS line_end
	111	+ORDER BY n.line_start
	112	+"""
	113	+
	114	+# Knowledge nodes reachable from a code symbol in one hop via cross-layer edges.
	115	+# Handles:
	116	+# (symbol)-[:ANNOTATES\|IMPLEMENTS]->(knowledge) — code → knowledge
	117	+# (knowledge)-[:GOVERNS\|ANNOTATES]->(symbol) — knowledge → code
	118	+_KNOWLEDGE_FOR_SYMBOL = """
	119	+MATCH (sym)
	120	+WHERE (sym:Function OR sym:Class OR sym:Method)
	121	+ AND sym.name = $name
	122	+ AND sym.file_path = $file_path
	123	+OPTIONAL MATCH (sym)-[:ANNOTATES\|IMPLEMENTS]->(k1)
	124	+ WHERE (k1:Concept OR k1:Rule OR k1:Decision OR k1:WikiPage)
	125	+OPTIONAL MATCH (k2)-[:GOVERNS\|ANNOTATES]->(sym)
	126	+ WHERE (k2:Concept OR k2:Rule OR k2:Decision OR k2:WikiPage)
	127	+WITH collect(DISTINCT k1) + collect(DISTINCT k2) AS knowledge_nodes
	128	+UNWIND knowledge_nodes AS k
	129	+RETURN DISTINCT
	130	+ labels(k)[0] AS type,
	131	+ k.name AS name,
	132	+ coalesce(k.description, '') AS description,
	133	+ coalesce(k.domain, '') AS domain,
	134	+ coalesce(k.status, '') AS status
	135	+"""
	136	+
	137	+
	138	+def _parse_unified_diff_hunks(diff_output: str) -> dict[str, list[tuple[int, int]]]:
	139	+ """
	140	+ Parse the output of ``git diff -U0 HEAD`` and return a mapping of
	141	+ file path → list of (start_line, end_line) changed ranges.
	142	+
	143	+ Only new/modified lines (``+`` prefix) are tracked; deleted-only hunks
	144	+ contribute the surrounding context line instead (line before deletion).
	145	+ """
	146	+ result: dict[str, list[tuple[int, int]]] = {}
	147	+ current_file: str \| None = None
	148	+ current_new_start = 0
	149	+ current_new_count = 0
	150	+
	151	+ for line in diff_output.splitlines():
	152	+ # New file header: +++ b/path/to/file
	153	+ if line.startswith("+++ b/"):
	154	+ current_file = line[6:]
	155	+ if current_file not in result:
	156	+ result[current_file] = []
	157	+ elif line.startswith("+++ /dev/null"):
	158	+ current_file = None # deleted file — skip
	159	+ elif line.startswith("@@ "):
	160	+ # Hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
	161	+ try:
	162	+ new_info = line.split("+")[1].split("@@")[0].strip()
	163	+ if "," in new_info:
	164	+ new_start_str, new_count_str = new_info.split(",", 1)
	165	+ current_new_start = int(new_start_str)
	166	+ current_new_count = int(new_count_str)
	167	+ else:
	168	+ current_new_start = int(new_info)
	169	+ current_new_count = 1
	170	+ if current_file and current_new_count > 0:
	171	+ end = current_new_start + max(current_new_count - 1, 0)
	172	+ result.setdefault(current_file, []).append((current_new_start, end))
	173	+ except (ValueError, IndexError):
	174	+ pass
	175	+
	176	+ return result
	177	+
	178	+
	179	+def _lines_overlap(
	180	+ changed_ranges: list[tuple[int, int]],
	181	+ line_start: int \| None,
	182	+ line_end: int \| None,
	183	+) -> bool:
	184	+ """Return True when any changed range overlaps with [line_start, line_end]."""
	185	+ if line_start is None:
	186	+ return False
	187	+ sym_start = line_start
	188	+ sym_end = line_end if line_end is not None else line_start
	189	+ for r_start, r_end in changed_ranges:
	190	+ if r_start <= sym_end and r_end >= sym_start:
	191	+ return True
	192	+ return False
	193	+
	194	+
	195	+# ── DiffAnalyzer ─────────────────────────────────────────────────────────────
	196	+
	197	+
	198	+class DiffAnalyzer:
	199	+ """Maps git diff output to affected graph nodes.
	200	+
	201	+ Parameters
	202	+ ----------
	203	+ store:
	204	+ An open :class:`~navegador.graph.store.GraphStore`.
	205	+ repo_path:
	206	+ Root of the git repository to inspect.
	207	+ """
	208	+
	209	+ def __init__(self, store: GraphStore, repo_path: Path) -> None:
	210	+ self.store = store
	211	+ self.repo_path = Path(repo_path)
	212	+ self._git = GitAdapter(self.repo_path)
	213	+
	214	+ # ------------------------------------------------------------------
	215	+ # Public API
	216	+ # ------------------------------------------------------------------
	217	+
	218	+ def changed_files(self) -> list[str]:
	219	+ """Return paths of files with uncommitted changes (staged + unstaged).
	220	+
	221	+ Delegates to :class:`~navegador.vcs.GitAdapter` which runs
	222	+ ``git diff HEAD --name-only``.
	223	+ """
	224	+ return self._git.changed_files()
	225	+
	226	+ def changed_lines(self) -> dict[str, list[tuple[int, int]]]:
	227	+ """Return a mapping of file path → list of (start, end) changed line ranges.
	228	+
	229	+ Runs ``git diff -U0 HEAD`` and parses the unified diff hunk headers.
	230	+ Falls back to the whole file (line 1 to a large sentinel) when the
	231	+ diff cannot be parsed precisely — ensuring callers always get a result.
	232	+ """
	233	+ result = subprocess.run(
	234	+ ["git", "diff", "-U0", "HEAD"],
	235	+ cwd=self.repo_path,
	236	+ capture_output=True,
	237	+ text=True,
	238	+ check=False,
	239	+ )
	240	+ if result.returncode != 0 or not result.stdout.strip():
	241	+ # No HEAD yet (initial commit) or empty diff — treat all changed
	242	+ # files as fully-changed using a wide sentinel range.
	243	+ return {f: [(1, 999_999)] for f in self.changed_files()}
	244	+
	245	+ parsed = _parse_unified_diff_hunks(result.stdout)
	246	+
	247	+ # Files returned by changed_files() but missing from the parsed diff
	248	+ # (e.g. binary files, new untracked files added with --intent-to-add)
	249	+ # get a full-file sentinel range.
	250	+ for f in self.changed_files():
	251	+ if f not in parsed:
	252	+ parsed[f] = [(1, 999_999)]
	253	+
	254	+ return parsed
	255	+
	256	+ def affected_symbols(self) -> list[dict[str, Any]]:
	257	+ """Return functions/classes/methods whose line ranges overlap changed lines.
	258	+
	259	+ Each entry is a dict with the keys:
	260	+ ``type``, ``name``, ``file_path``, ``line_start``, ``line_end``.
	261	+ """
	262	+ line_map = self.changed_lines()
	263	+ symbols: list[dict[str, Any]] = []
	264	+ seen: set[tuple[str, str]] = set()
	265	+
	266	+ for file_path, changed_ranges in line_map.items():
	267	+ rows = self.store.query(_SYMBOLS_IN_FILE, {"file_path": file_path})
	268	+ if not rows or not rows.result_set:
	269	+ continue
	270	+ for row in rows.result_set:
	271	+ sym_type, name, fp, line_start, line_end = row
	272	+ key = (name, fp or file_path)
	273	+ if key in seen:
	274	+ continue
	275	+ if _lines_overlap(changed_ranges, line_start, line_end):
	276	+ seen.add(key)
	277	+ symbols.append(
	278	+ {
	279	+ "type": sym_type,
	280	+ "name": name,
	281	+ "file_path": fp or file_path,
	282	+

	--- a/navegador/diff.py
	+++ b/navegador/diff.py
	@@ -0,0 +1,282 @@

	--- a/navegador/diff.py
	+++ b/navegador/diff.py
	@@ -0,0 +1,282 @@
1	"""
2	DiffAnalyzer — maps git diff output to affected knowledge-graph nodes.
3
4	Given uncommitted changes in a repository, this module tells you:
5	- which files changed
6	- which functions/classes/methods fall within the changed line ranges
7	- which knowledge nodes (Concept, Rule, Decision, WikiPage) are linked
8	to those symbols via ANNOTATES, IMPLEMENTS, or GOVERNS edges
9	- a single impact_summary dict bundling all of the above
10
11	Usage::
12
13	from pathlib import Path
14	from navegador.diff import DiffAnalyzer
15	from navegador.graph.store import GraphStore
16
17	store = GraphStore.sqlite(".navegador/graph.db")
18	analyzer = DiffAnalyzer(store, Path("."))
19	print(analyzer.impact_summary())
20	"""
21
22	from __future__ import annotations
23
24	import json
25	import subprocess
26	from pathlib import Path
27	from typing import Any
28
29	from navegador.graph.store import GraphStore
30	from navegador.vcs import GitAdapter
31
32
33	# ── Cypher helpers ────────────────────────────────────────────────────────────
34
35	# All symbols (Function / Class / Method) in a given file with their line ranges
36	_SYMBOLS_IN_FILE = """
37	MATCH (n)
38	WHERE (n:Function OR n:Class OR n:Method)
39	AND n.file_path = $file_path
40	RETURN labels(n)[0] AS type,
41	n.name AS name,
42	n.file_path AS file_path,
43	n.line_start AS line_start,
44	n.line_end AS line_end
45	ORDER BY n.line_start
46	"""
47
48	# Knowledge nodes reachable from a code symbol in one hop via cross-layer edges.
49	# Handles:
50	# (symbol)-[:ANNOTATES\|IMPLEMENTS]->(knowledge) — code → knowledge
51	# (knowledge)-[:GOVERNS\|ANNOTATES]->(symbol) — knowledge → code
52	_KNOWLEDGE_FOR_SYMBOL = """
53	MATCH (sym)
54	WHERE (sym:Function OR sym:Class OR sym:Method)
55	AND sym.name = $name
56	AND sym.file_path = $file_path
57	OPTIONAL MATCH (sym)-[:ANNOTATES\|IMPLEMENTS]->(k1)
58	WHERE (k1:Concept OR k1:Rule OR k1:Decision OR k1:WikiPage)
59	OPTIONAL MATCH (k2)-[:GOVERNS\|ANNOTATES]->(sym)
60	WHERE (k2:Concept OR k2:Rule OR k2:Decision OR k2:WikiPage)
61	WITH collect(DISTINCT k1) + collect(DISTINCT k2) AS knowledge_nodes
62	UNWIND knowledge_nodes AS k
63	RETURN DISTINCT
64	labels(k)[0] AS type,
65	k.name AS name,
66	coalesce(k.description, '') AS description,
67	coale
68	(current_new_start, endhanges in a repository"""
69	DiffAnalyzer — maps git diff output to affected knowledge-graph nodes.
70
71	Given uncommitted changes in a repository, this module tells you:
72	- which files changed
73	- which functions/classes/methods fall within the changed line ranges
74	- which knowledge nodes (Concept, Rule, Decision, WikiPage) are linked
75	to those symbols via ANNOTATES, IMPLEMENTS, or GOVERNS edges
76	- a single impact_summary dict bundling all of the above
77
78	Usage::
79
80	from pathlib import Path
81	from navegador.diff import DiffAnalyzer
82	from navegador.graph.store import GraphStore
83
84	store = GraphStore.sqlite(".navegador/graph.db")
85	analyzer = DiffAnalyzer(store, Path("."))
86	print(analyzer.impact_summary())
87	"""
88
89	from __future__ import annotations
90
91	import json
92	import subprocess
93	from pathlib import Path
94	from typing import Any
95
96	from navegador.graph.store import GraphStore
97	from navegador.vcs import GitAdapter
98
99	# ── Cypher helpers ────────────────────────────────────────────────────────────
100
101	# All symbols (Function / Class / Method) in a given file with their line ranges
102	_SYMBOLS_IN_FILE = """
103	MATCH (n)
104	WHERE (n:Function OR n:Class OR n:Method)
105	AND n.file_path = $file_path
106	RETURN labels(n)[0] AS type,
107	n.name AS name,
108	n.file_path AS file_path,
109	n.line_start AS line_start,
110	n.line_end AS line_end
111	ORDER BY n.line_start
112	"""
113
114	# Knowledge nodes reachable from a code symbol in one hop via cross-layer edges.
115	# Handles:
116	# (symbol)-[:ANNOTATES\|IMPLEMENTS]->(knowledge) — code → knowledge
117	# (knowledge)-[:GOVERNS\|ANNOTATES]->(symbol) — knowledge → code
118	_KNOWLEDGE_FOR_SYMBOL = """
119	MATCH (sym)
120	WHERE (sym:Function OR sym:Class OR sym:Method)
121	AND sym.name = $name
122	AND sym.file_path = $file_path
123	OPTIONAL MATCH (sym)-[:ANNOTATES\|IMPLEMENTS]->(k1)
124	WHERE (k1:Concept OR k1:Rule OR k1:Decision OR k1:WikiPage)
125	OPTIONAL MATCH (k2)-[:GOVERNS\|ANNOTATES]->(sym)
126	WHERE (k2:Concept OR k2:Rule OR k2:Decision OR k2:WikiPage)
127	WITH collect(DISTINCT k1) + collect(DISTINCT k2) AS knowledge_nodes
128	UNWIND knowledge_nodes AS k
129	RETURN DISTINCT
130	labels(k)[0] AS type,
131	k.name AS name,
132	coalesce(k.description, '') AS description,
133	coalesce(k.domain, '') AS domain,
134	coalesce(k.status, '') AS status
135	"""
136
137
138	def _parse_unified_diff_hunks(diff_output: str) -> dict[str, list[tuple[int, int]]]:
139	"""
140	Parse the output of ``git diff -U0 HEAD`` and return a mapping of
141	file path → list of (start_line, end_line) changed ranges.
142
143	Only new/modified lines (``+`` prefix) are tracked; deleted-only hunks
144	contribute the surrounding context line instead (line before deletion).
145	"""
146	result: dict[str, list[tuple[int, int]]] = {}
147	current_file: str \| None = None
148	current_new_start = 0
149	current_new_count = 0
150
151	for line in diff_output.splitlines():
152	# New file header: +++ b/path/to/file
153	if line.startswith("+++ b/"):
154	current_file = line[6:]
155	if current_file not in result:
156	result[current_file] = []
157	elif line.startswith("+++ /dev/null"):
158	current_file = None # deleted file — skip
159	elif line.startswith("@@ "):
160	# Hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
161	try:
162	new_info = line.split("+")[1].split("@@")[0].strip()
163	if "," in new_info:
164	new_start_str, new_count_str = new_info.split(",", 1)
165	current_new_start = int(new_start_str)
166	current_new_count = int(new_count_str)
167	else:
168	current_new_start = int(new_info)
169	current_new_count = 1
170	if current_file and current_new_count > 0:
171	end = current_new_start + max(current_new_count - 1, 0)
172	result.setdefault(current_file, []).append((current_new_start, end))
173	except (ValueError, IndexError):
174	pass
175
176	return result
177
178
179	def _lines_overlap(
180	changed_ranges: list[tuple[int, int]],
181	line_start: int \| None,
182	line_end: int \| None,
183	) -> bool:
184	"""Return True when any changed range overlaps with [line_start, line_end]."""
185	if line_start is None:
186	return False
187	sym_start = line_start
188	sym_end = line_end if line_end is not None else line_start
189	for r_start, r_end in changed_ranges:
190	if r_start <= sym_end and r_end >= sym_start:
191	return True
192	return False
193
194
195	# ── DiffAnalyzer ─────────────────────────────────────────────────────────────
196
197
198	class DiffAnalyzer:
199	"""Maps git diff output to affected graph nodes.
200
201	Parameters
202	----------
203	store:
204	An open :class:`~navegador.graph.store.GraphStore`.
205	repo_path:
206	Root of the git repository to inspect.
207	"""
208
209	def __init__(self, store: GraphStore, repo_path: Path) -> None:
210	self.store = store
211	self.repo_path = Path(repo_path)
212	self._git = GitAdapter(self.repo_path)
213
214	# ------------------------------------------------------------------
215	# Public API
216	# ------------------------------------------------------------------
217
218	def changed_files(self) -> list[str]:
219	"""Return paths of files with uncommitted changes (staged + unstaged).
220
221	Delegates to :class:`~navegador.vcs.GitAdapter` which runs
222	``git diff HEAD --name-only``.
223	"""
224	return self._git.changed_files()
225
226	def changed_lines(self) -> dict[str, list[tuple[int, int]]]:
227	"""Return a mapping of file path → list of (start, end) changed line ranges.
228
229	Runs ``git diff -U0 HEAD`` and parses the unified diff hunk headers.
230	Falls back to the whole file (line 1 to a large sentinel) when the
231	diff cannot be parsed precisely — ensuring callers always get a result.
232	"""
233	result = subprocess.run(
234	["git", "diff", "-U0", "HEAD"],
235	cwd=self.repo_path,
236	capture_output=True,
237	text=True,
238	check=False,
239	)
240	if result.returncode != 0 or not result.stdout.strip():
241	# No HEAD yet (initial commit) or empty diff — treat all changed
242	# files as fully-changed using a wide sentinel range.
243	return {f: [(1, 999_999)] for f in self.changed_files()}
244
245	parsed = _parse_unified_diff_hunks(result.stdout)
246
247	# Files returned by changed_files() but missing from the parsed diff
248	# (e.g. binary files, new untracked files added with --intent-to-add)
249	# get a full-file sentinel range.
250	for f in self.changed_files():
251	if f not in parsed:
252	parsed[f] = [(1, 999_999)]
253
254	return parsed
255
256	def affected_symbols(self) -> list[dict[str, Any]]:
257	"""Return functions/classes/methods whose line ranges overlap changed lines.
258
259	Each entry is a dict with the keys:
260	``type``, ``name``, ``file_path``, ``line_start``, ``line_end``.
261	"""
262	line_map = self.changed_lines()
263	symbols: list[dict[str, Any]] = []
264	seen: set[tuple[str, str]] = set()
265
266	for file_path, changed_ranges in line_map.items():
267	rows = self.store.query(_SYMBOLS_IN_FILE, {"file_path": file_path})
268	if not rows or not rows.result_set:
269	continue
270	for row in rows.result_set:
271	sym_type, name, fp, line_start, line_end = row
272	key = (name, fp or file_path)
273	if key in seen:
274	continue
275	if _lines_overlap(changed_ranges, line_start, line_end):
276	seen.add(key)
277	symbols.append(
278	{
279	"type": sym_type,
280	"name": name,
281	"file_path": fp or file_path,
282

A tests/test_diff.py

+577

		--- a/tests/test_diff.py
		+++ b/tests/test_diff.py
		@@ -0,0 +1,581 @@
	1	+"""Tests for navegador.diff — DiffAnalyzer and the CLI 'diff' command."""
	2	+
	3	+from __future__ import annotations
	4	+
	5	+import json
	6	+import subprocess
	7	+from pathlib import Path
	8	+from unittest.mock import MagicMock, patch
	9	+
	10	+import pytest
	11	+from click.testing import CliRunner
	12	+
	13	+from navegador.cli.commands import main
	14	+from navegador.diff import (
	15	+ DiffAnalyzer,
	16	+ _lines_overlap,
	17	+ _parse_unified_diff_hunks,
	18	+)
	19	+
	20	+
	21	+# ── Helpers ────────────────────────────────────────────────────────────────────
	22	+
	23	+
	24	+def _mock_store(result_set: list \| None = None):
	25	+ """Return a MagicMock GraphStore whose .query() yields result_set."""
	26	+ store = MagicMock()
	27	+ store.query.return_value = MagicMock(result_set=result_set or [])
	28	+ return store
	29	+
	30	+
	31	+def _analyzer(store=None, repo_path: Path \| None = None, changed: list[str] \| None = None):
	32	+ """Build a DiffAnalyzer with the given store, patching GitAdapter.changed_files."""
	33	+ if store is None:
	34	+ store = _mock_store()
	35	+ if repo_path is None:
	36	+ repo_path = Path("/fake/repo")
	37	+ analyzer = DiffAnalyzer(store, repo_path)
	38	+ if changed is not None:
	39	+ analyzer._git = MagicMock()
	40	+ analyzer._git.changed_files.return_value = changed
	41	+ return analyzer
	42	+
	43	+
	44	+# ── _parse_unified_diff_hunks ─────────────────────────────────────────────────
	45	+
	46	+
	47	+class TestParseUnifiedDiffHunks:
	48	+ SAMPLE_DIFF = """\
	49	+diff --git a/foo.py b/foo.py
	50	+index 0000000..1111111 100644
	51	+--- a/foo.py
		++++ b/foo.py
	52	+@@ -10,3 +10,5 @@
	53	+ unchanged
	54	++added line 1
	55	++added line 2
	56	+diff --git a/bar.py b/bar.py
	57	+index 0000000..2222222 100644
	58	+--- a/bar.py
		++++ b/bar.py
	59	+@@ -5 +5,2 @@
	60	+-old line
	61	++new line A
	62	++new line B
	63	+"""
	64	+
	65	+ def test_returns_dict(self):
	66	+ result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
	67	+ assert isinstance(result, dict)
	68	+
	69	+ def test_detects_both_files(self):
	70	+ result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
	71	+ assert "foo.py" in result
	72	+ assert "bar.py" in result
	73	+
	74	+ def test_correct_range_for_foo(self):
	75	+ result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
	76	+ # hunk: +10,5 → start=10, end=14
	77	+ ranges = result["foo.py"]
	78	+ assert len(ranges) == 1
	79	+ start, end = ranges[0]
	80	+ assert start == 10
	81	+ assert end == 14 # 10 + 5 - 1
	82	+
	83	+ def test_correct_range_for_bar(self):
	84	+ result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
	85	+ # hunk: +5,2 → start=5, end=6
	86	+ ranges = result["bar.py"]
	87	+ assert len(ranges) == 1
	88	+ start, end = ranges[0]
	89	+ assert start == 5
	90	+ assert end == 6
	91	+
	92	+ def test_empty_diff_returns_empty_dict(self):
	93	+ result = _parse_unified_diff_hunks("")
	94	+ assert result == {}
	95	+
	96	+ def test_deleted_file_not_included(self):
	97	+ diff = """\
	98	+--- a/deleted.py
		++++ /dev/null
	99	+@@ -1 +0,0 @@
	100	+-old
	101	+"""
	102	+ result = _parse_unified_diff_hunks(diff)
	103	+ assert "deleted.py" not in result
	104	+
	105	+ def test_multiple_hunks_same_file(self):
	106	+ diff = """\
	107	+diff --git a/multi.py b/multi.py
	108	+--- a/multi.py
		++++ b/multi.py
	109	+@@ -1,2 +1,3 @@
	110	++first
	111	+ unchanged
	112	++second
	113	+@@ -20 +21,2 @@
	114	+-old
	115	++new1
	116	++new2
	117	+"""
	118	+ result = _parse_unified_diff_hunks(diff)
	119	+ assert "multi.py" in result
	120	+ assert len(result["multi.py"]) == 2
	121	+
	122	+
	123	+# ── _lines_overlap ─────────────────────────────────────────────────────────────
	124	+
	125	+
	126	+class TestLinesOverlap:
	127	+ def test_exact_overlap(self):
	128	+ assert _lines_overlap([(10, 20)], 10, 20) is True
	129	+
	130	+ def test_symbol_inside_range(self):
	131	+ assert _lines_overlap([(5, 30)], 10, 15) is True
	132	+
	133	+ def test_range_inside_symbol(self):
	134	+ assert _lines_overlap([(12, 14)], 10, 20) is True
	135	+
	136	+ def test_no_overlap_before(self):
	137	+ assert _lines_overlap([(20, 30)], 5, 10) is False
	138	+
	139	+ def test_no_overlap_after(self):
	140	+ assert _lines_overlap([(1, 5)], 10, 20) is False
	141	+
	142	+ def test_adjacent_not_overlapping(self):
	143	+ assert _lines_overlap([(1, 9)], 10, 20) is False
	144	+
	145	+ def test_none_line_start_returns_false(self):
	146	+ assert _lines_overlap([(1, 100)], None, None) is False
	147	+
	148	+ def test_no_line_end_uses_start(self):
	149	+ # line_end=None → treated as single-line symbol
	150	+ assert _lines_overlap([(10, 20)], 15, None) is True
	151	+
	152	+ def test_empty_ranges_returns_false(self):
	153	+ assert _lines_overlap([], 10, 20) is False
	154	+
	155	+ def test_multiple_ranges_one_hits(self):
	156	+ assert _lines_overlap([(1, 5), (50, 60)], 52, 55) is True
	157	+
	158	+
	159	+# ── DiffAnalyzer.changed_files ────────────────────────────────────────────────
	160	+
	161	+
	162	+class TestDiffAnalyzerChangedFiles:
	163	+ def test_delegates_to_git_adapter(self):
	164	+ analyzer = _analyzer(changed=["a.py", "b.py"])
	165	+ assert analyzer.changed_files() == ["a.py", "b.py"]
	166	+
	167	+ def test_empty_when_no_changes(self):
	168	+ analyzer = _analyzer(changed=[])
	169	+ assert analyzer.changed_files() == []
	170	+
	171	+ def test_returns_list(self):
	172	+ analyzer = _analyzer(changed=["x.py"])
	173	+ assert isinstance(analyzer.changed_files(), list)
	174	+
	175	+ def test_uses_subprocess_via_git_adapter(self, tmp_path):
	176	+ """Verify changed_files() relies on subprocess (through GitAdapter._run)."""
	177	+ repo = tmp_path / "repo"
	178	+ repo.mkdir()
	179	+ store = _mock_store()
	180	+ analyzer = DiffAnalyzer(store, repo)
	181	+
	182	+ fake_result = MagicMock()
	183	+ fake_result.stdout = "changed.py\n"
	184	+ fake_result.returncode = 0
	185	+
	186	+ with patch("subprocess.run", return_value=fake_result):
	187	+ files = analyzer.changed_files()
	188	+
	189	+ assert "changed.py" in files
	190	+
	191	+
	192	+# ── DiffAnalyzer.changed_lines ────────────────────────────────────────────────
	193	+
	194	+
	195	+class TestDiffAnalyzerChangedLines:
	196	+ def test_returns_dict(self, tmp_path):
	197	+ analyzer = _analyzer(changed=["f.py"])
	198	+ fake = MagicMock()
	199	+ fake.returncode = 0
	200	+ fake.stdout = "+++ b/f.py\n@@ -1 +1,3 @@\n+a\n+b\n+c\n"
	201	+ with patch("subprocess.run", return_value=fake):
	202	+ result = analyzer.changed_lines()
	203	+ assert isinstance(result, dict)
	204	+
	205	+ def test_fallback_on_no_output(self):
	206	+ """No diff output → full-file sentinel range for each changed file."""
	207	+ analyzer = _analyzer(changed=["x.py", "y.py"])
	208	+ fake = MagicMock()
	209	+ fake.returncode = 0
	210	+ fake.stdout = ""
	211	+ with patch("subprocess.run", return_value=fake):
	212	+ result = analyzer.changed_lines()
	213	+ assert "x.py" in result
	214	+ assert "y.py" in result
	215	+ assert result["x.py"] == [(1, 999_999)]
	216	+
	217	+ def test_fallback_on_nonzero_exit(self):
	218	+ """Non-zero exit (e.g. no HEAD) → full-file sentinel for all changed files."""
	219	+ analyzer = _analyzer(changed=["z.py"])
	220	+ fake = MagicMock()
	221	+ fake.returncode = 128
	222	+ fake.stdout = ""
	223	+ with patch("subprocess.run", return_value=fake):
	224	+ result = analyzer.changed_lines()
	225	+ assert result["z.py"] == [(1, 999_999)]
	226	+
	227	+ def test_missing_files_get_sentinel(self):
	228	+ """Files in changed_files() but absent from diff get sentinel range."""
	229	+ analyzer = _analyzer(changed=["in_diff.py", "not_in_diff.py"])
	230	+ fake = MagicMock()
	231	+ fake.returncode = 0
	232	+ fake.stdout = "+++ b/in_diff.py\n@@ -5 +5,2 @@ \n+x\n+y\n"
	233	+ with patch("subprocess.run", return_value=fake):
	234	+ result = analyzer.changed_lines()
	235	+ assert "not_in_diff.py" in result
	236	+ assert result["not_in_diff.py"] == [(1, 999_999)]
	237	+
	238	+
	239	+# ── DiffAnalyzer.affected_symbols ─────────────────────────────────────────────
	240	+
	241	+
	242	+class TestDiffAnalyzerAffectedSymbols:
	243	+ def _sym_rows(self):
	244	+ """Return fake graph rows: (type, name, file_path, line_start, line_end)."""
	245	+ return [
	246	+ ("Function", "do_thing", "app.py", 10, 25),
	247	+ ("Class", "MyClass", "app.py", 30, 80),
	248	+ ("Method", "helper", "utils.py", 5, 15),
	249	+ ]
	250	+
	251	+ def test_returns_list(self):
	252	+ store = _mock_store(result_set=self._sym_rows())
	253	+ analyzer = _analyzer(store=store, changed=["app.py"])
	254	+ with patch.object(analyzer, "changed_lines", return_value={"app.py": [(1, 999_999)]}):
	255	+ result = analyzer.affected_symbols()
	256	+ assert isinstance(result, list)
	257	+
	258	+ def test_symbols_overlap_returned(self):
	259	+ store = _mock_store(result_set=self._sym_rows())
	260	+ analyzer = _analyzer(store=store, changed=["app.py"])
	261	+ # Changed lines 15-20 overlap do_thing (10-25)
	262	+ with patch.object(analyzer, "changed_lines", return_value={"app.py": [(15, 20)]}):
	263	+ result = analyzer.affected_symbols()
	264	+ names = [s["name"] for s in result]
	265	+ assert "do_thing" in names
	266	+
	267	+ def test_non_overlapping_symbols_excluded(self):
	268	+ store = _mock_store(result_set=self._sym_rows())
	269	+ analyzer = _analyzer(store=store, changed=["app.py"])
	270	+ # Changed lines 50-60 overlap MyClass (30-80) but not do_thing (10-25)
	271	+ with patch.object(analyzer, "changed_lines", return_value={"app.py": [(50, 60)]}):
	272	+ result = analyzer.affected_symbols()
	273	+ names = [s["name"] for s in result]
	274	+ assert "MyClass" in names
	275	+ assert "do_thing" not in names
	276	+
	277	+ def test_empty_when_no_graph_nodes(self):
	278	+ store = _mock_store(result_set=[])
	279	+ analyzer = _analyzer(store=store, changed=["app.py"])
	280	+ with patch.object(analyzer, "changed_lines", return_value={"app.py": [(1, 50)]}):
	281	+ result = analyzer.affected_symbols()
	282	+ assert result == []
	283	+
	284	+ def test_empty_when_no_changed_files(self):
	285	+ store = _mock_store(result_set=self._sym_rows())
	286	+ analyzer = _analyzer(store=store, changed=[])
	287	+ with patch.object(analyzer, "changed_lines", return_value={}):
	288	+ result = analyzer.affected_symbols()
	289	+ assert result == []
	290	+
	291	+ def test_symbol_dict_has_required_keys(self):
	292	+ store = _mock_store(result_set=[("Function", "foo", "a.py", 1, 10)])
	293	+ analyzer = _analyzer(store=store, changed=["a.py"])
	294	+ with patch.object(analyzer, "changed_lines", return_value={"a.py": [(1, 10)]}):
	295	+ result = analyzer.affected_symbols()
	296	+ assert len(result) == 1
	297	+ sym = result[0]
	298	+ assert "type" in sym
	299	+ assert "name" in sym
	300	+ assert "file_path" in sym
	301	+ assert "line_start" in sym
	302	+ assert "line_end" in sym
	303	+
	304	+ def test_no_duplicate_symbols(self):
	305	+ """Same symbol matched by two hunk ranges must appear only once."""
	306	+ rows = [("Function", "foo", "a.py", 5, 20)]
	307	+ store = _mock_store(result_set=rows)
	308	+ analyzer = _analyzer(store=store, changed=["a.py"])
	309	+ with patch.object(analyzer, "changed_lines", return_value={"a.py": [(5, 10), (15, 20)]}):
	310	+ result = analyzer.affected_symbols()
	311	+ assert len(result) == 1
	312	+
	313	+
	314	+# ── DiffAnalyzer.affected_knowledge ───────────────────────────────────────────
	315	+
	316	+
	317	+class TestDiffAnalyzerAffectedKnowledge:
	318	+ def _k_rows(self):
	319	+ """Fake knowledge rows: (type, name, description, domain, status)."""
	320	+ return [
	321	+ ("Concept", "Billing", "Handles money", "finance", "stable"),
	322	+ ("Rule", "no_refund_after_30d", "30 day rule", "finance", "active"),
	323	+ ]
	324	+
	325	+ def test_returns_list(self):
	326	+ store = _mock_store(result_set=self._k_rows())
	327	+ analyzer = _analyzer(store=store)
	328	+ sym = [{"name": "charge", "file_path": "billing.py"}]
	329	+ with patch.object(analyzer, "affected_symbols", return_value=sym):
	330	+ result = analyzer.affected_knowledge()
	331	+ assert isinstance(result, list)
	332	+
	333	+ def test_knowledge_nodes_returned(self):
	334	+ store = _mock_store(result_set=self._k_rows())
	335	+ analyzer = _analyzer(store=store)
	336	+ sym = [{"name": "charge", "file_path": "billing.py"}]
	337	+ with patch.object(analyzer, "affected_symbols", return_value=sym):
	338	+ result = analyzer.affected_knowledge()
	339	+ names = [k["name"] for k in result]
	340	+ assert "Billing" in names
	341	+ assert "no_refund_after_30d" in names
	342	+
	343	+ def test_empty_when_no_symbols(self):
	344	+ store = _mock_store(result_set=self._k_rows())
	345	+ analyzer = _analyzer(store=store)
	346	+ with patch.object(analyzer, "affected_symbols", return_value=[]):
	347	+ result = analyzer.affected_knowledge()
	348	+ assert result == []
	349	+
	350	+ def test_empty_when_no_graph_knowledge(self):
	351	+ store = _mock_store(result_set=[])
	352	+ analyzer = _analyzer(store=store)
	353	+ sym = [{"name": "foo", "file_path": "a.py"}]
	354	+ with patch.object(analyzer, "affected_symbols", return_value=sym):
	355	+ result = analyzer.affected_knowledge()
	356	+ assert result == []
	357	+
	358	+ def test_no_duplicate_knowledge_nodes(self):
	359	+ """Two symbols linking to the same knowledge node → deduplicated."""
	360	+ rows = [("Concept", "SharedConcept", "desc", "core", "stable")]
	361	+ store = _mock_store(result_set=rows)
	362	+ analyzer = _analyzer(store=store)
	363	+ syms = [
	364	+ {"name": "alpha", "file_path": "a.py"},
	365	+ {"name": "beta", "file_path": "b.py"},
	366	+ ]
	367	+ with patch.object(analyzer, "affected_symbols", return_value=syms):
	368	+ result = analyzer.affected_knowledge()
	369	+ assert len([k for k in result if k["name"] == "SharedConcept"]) == 1
	370	+
	371	+ def test_knowledge_dict_has_required_keys(self):
	372	+ rows = [("Rule", "my_rule", "some desc", "payments", "")]
	373	+ store = _mock_store(result_set=rows)
	374	+ analyzer = _analyzer(store=store)
	375	+ sym = [{"name": "process", "file_path": "pay.py"}]
	376	+ with patch.object(analyzer, "affected_symbols", return_value=sym):
	377	+ result = analyzer.affected_knowledge()
	378	+ assert len(result) == 1
	379	+ k = result[0]
	380	+ assert "type" in k
	381	+ assert "name" in k
	382	+ assert "description" in k
	383	+ assert "domain" in k
	384	+ assert "status" in k
	385	+
	386	+
	387	+# ── DiffAnalyzer.impact_summary ───────────────────────────────────────────────
	388	+
	389	+
	390	+class TestDiffAnalyzerImpactSummary:
	391	+ def _build(self, files=None, symbols=None, knowledge=None):
	392	+ store = _mock_store()
	393	+ analyzer = _analyzer(store=store)
	394	+ with (
	395	+ patch.object(analyzer, "changed_files", return_value=files or []),
	396	+ patch.object(analyzer, "affected_symbols", return_value=symbols or []),
	397	+ patch.object(analyzer, "affected_knowledge", return_value=knowledge or []),
	398	+ ):
	399	+ return analyzer.impact_summary()
	400	+
	401	+ def test_returns_dict(self):
	402	+ result = self._build()
	403	+ assert isinstance(result, dict)
	404	+
	405	+ def test_has_all_top_level_keys(self):
	406	+ result = self._build()
	407	+ assert "files" in result
	408	+ assert "symbols" in result
	409	+ assert "knowledge" in result
	410	+ assert "counts" in result
	411	+
	412	+ def test_counts_match_lengths(self):
	413	+ files = ["a.py", "b.py"]
	414	+ symbols = [{"type": "Function", "name": "f", "file_path": "a.py",
	415	+ "line_start": 1, "line_end": 5}]
	416	+ knowledge = [{"type": "Concept", "name": "X", "description": "",
	417	+ "domain": "", "status": ""}]
	418	+ result = self._build(files=files, symbols=symbols, knowledge=knowledge)
	419	+ assert result["counts"]["files"] == 2
	420	+ assert result["counts"]["symbols"] == 1
	421	+ assert result["counts"]["knowledge"] == 1
	422	+
	423	+ def test_empty_summary_all_zeros(self):
	424	+ result = self._build()
	425	+ assert result["counts"]["files"] == 0
	426	+ assert result["counts"]["symbols"] == 0
	427	+ assert result["counts"]["knowledge"] == 0
	428	+
	429	+ def test_files_list_propagated(self):
	430	+ result = self._build(files=["x.py", "y.py"])
	431	+ assert result["files"] == ["x.py", "y.py"]
	432	+
	433	+
	434	+# ── DiffAnalyzer.to_json ──────────────────────────────────────────────────────
	435	+
	436	+
	437	+class TestDiffAnalyzerToJson:
	438	+ def test_returns_valid_json(self):
	439	+ store = _mock_store()
	440	+ analyzer = _analyzer(store=store)
	441	+ summary = {"files": [], "symbols": [], "knowledge": [], "counts": {"files": 0}}
	442	+ with patch.object(analyzer, "impact_summary", return_value=summary):
	443	+ output = analyzer.to_json()
	444	+ parsed = json.loads(output)
	445	+ assert isinstance(parsed, dict)
	446	+
	447	+ def test_json_contains_summary_keys(self):
	448	+ store = _mock_store()
	449	+ analyzer = _analyzer(store=store)
	450	+ summary = {"files": ["f.py"], "symbols": [], "knowledge": [], "counts": {"files": 1}}
	451	+ with patch.object(analyzer, "impact_summary", return_value=summary):
	452	+ output = analyzer.to_json()
	453	+ parsed = json.loads(output)
	454	+ assert "files" in parsed
	455	+
	456	+
	457	+# ── DiffAnalyzer.to_markdown ──────────────────────────────────────────────────
	458	+
	459	+
	460	+class TestDiffAnalyzerToMarkdown:
	461	+ def _md(self, files=None, symbols=None, knowledge=None):
	462	+ store = _mock_store()
	463	+ analyzer = _analyzer(store=store)
	464	+ summary = {
	465	+ "files": files or [],
	466	+ "symbols": symbols or [],
	467	+ "knowledge": knowledge or [],
	468	+ "counts": {
	469	+ "files": len(files or []),
	470	+ "symbols": len(symbols or []),
	471	+ "knowledge": len(knowledge or []),
	472	+ },
	473	+ }
	474	+ with patch.object(analyzer, "impact_summary", return_value=summary):
	475	+ return analyzer.to_markdown()
	476	+
	477	+ def test_returns_string(self):
	478	+ assert isinstance(self._md(), str)
	479	+
	480	+ def test_contains_heading(self):
	481	+ assert "Diff Impact Summary" in self._md()
	482	+
	483	+ def test_lists_changed_file(self):
	484	+ md = self._md(files=["src/main.py"])
	485	+ assert "src/main.py" in md
	486	+
	487	+ def test_lists_affected_symbol(self):
	488	+ syms = [{"type": "Function", "name": "pay", "file_path": "billing.py",
	489	+ "line_start": 10, "line_end": 20}]
	490	+ md = self._md(symbols=syms)
	491	+ assert "pay" in md
	492	+
	493	+ def test_lists_knowledge_node(self):
	494	+ know = [{"type": "Rule", "name": "no_double_charge",
	495	+ "description": "desc", "domain": "", "status": ""}]
	496	+ md = self._md(knowledge=know)
	497	+ assert "no_double_charge" in md
	498	+
	499	+ def test_empty_sections_show_placeholder(self):
	500	+ md = self._md()
	501	+ assert "No changed files" in md
	502	+ assert "No affected symbols" in md
	503	+ assert "No linked knowledge" in md
	504	+
	505	+
	506	+# ── CLI: navegador diff ────────────────────────────────────────────────────────
	507	+
	508	+
	509	+class TestCLIDiffCommand:
	510	+ def _runner(self):
	511	+ return CliRunner()
	512	+
	513	+ def _mock_analyzer(self, summary=None):
	514	+ """Patch DiffAnalyzer so it never touches git or the graph."""
	515	+ if summary is None:
	516	+ summary = {
	517	+ "files": ["app.py"],
	518	+ "symbols": [{"type": "Function", "name": "run",
	519	+ "file_path": "app.py", "line_start": 1, "line_end": 10}],
	520	+ "knowledge": [],
	521	+ "counts": {"files": 1, "symbols": 1, "knowledge": 0},
	522	+ }
	523	+ mock_inst = MagicMock()
	524	+ mock_inst.impact_summary.return_value = summary
	525	+ mock_inst.to_json.return_value = json.dumps(summary, indent=2)
	526	+ mock_inst.to_markdown.return_value = "# Diff Impact Summary\n\n## Changed Files (1)"
	527	+ return mock_inst
	528	+
	529	+ def test_command_exists(self):
	530	+ runner = self._runner()
	531	+ result = runner.invoke(main, ["diff", "--help"])
	532	+ assert result.exit_code == 0
	533	+
	534	+ def test_markdown_output_by_default(self, tmp_path):
	535	+ runner = self._runner()
	536	+ mock_inst = self._mock_analyzer()
	537	+ with (
	538	+ runner.isolated_filesystem(),
	539	+ patch("navegador.cli.commands._get_store", return_value=_mock_store()),
	540	+ patch("navegador.diff.DiffAnalyzer", return_value=mock_inst),
	541	+ ):
	542	+ result = runner.invoke(main, ["diff", "--repo", str(tmp_path)])
	543	+ assert result.exit_code == 0
	544	+ assert "Diff Impact Summary" in result.output
	545	+
	546	+ def test_json_output_flag(self, tmp_path):
	547	+ runner = self._runner()
	548	+ mock_inst = self._mock_analyzer()
	549	+ with (
	550	+ runner.isolated_filesystem(),
	551	+ patch("navegador.cli.commands._get_store", return_value=_mock_store()),
	552	+ patch("navegador.diff.DiffAnalyzer", return_value=mock_inst),
	553	+ ):
	554	+ result = runner.invoke(main, ["diff", "--format", "json", "--repo", str(tmp_path)])
	555	+ assert result.exit_code == 0
	556	+ parsed = json.loads(result.output)
	557	+ assert "files" in parsed
	558	+
	559	+ def test_json_is_valid(self, tmp_path):
	560	+ runner = self._runner()
	561	+ summary = {
	562	+ "files": ["x.py"],
	563	+ "symbols": [],
	564	+ "knowledge": [],
	565	+ "counts": {"files": 1, "symbols": 0, "knowledge": 0},
	566	+ }
	567	+ mock_inst = self._mock_analyzer(summary=summary)
	568	+ with (
	569	+ runner.isolated_filesystem(),
	570	+ patch("navegador.cli.commands._get_store", return_value=_mock_store()),
	571	+ patch("navegador.diff.DiffAnalyzer", return_value=mock_inst),
	572	+ ):
	573	+ result = runner.invoke(main, ["diff", "--format", "json", "--repo", str(tmp_path)])
	574	+ assert result.exit_code == 0
	575	+ data = json.loads(result.output)
	576	+ assert data["files"] == ["x.py"]
	577	+ assert data["counts"]["files"] == 1

	--- a/tests/test_diff.py
	+++ b/tests/test_diff.py
	@@ -0,0 +1,581 @@



















































	++++ b/foo.py







	++++ b/bar.py








































	++++ /dev/null










	++++ b/multi.py

	--- a/tests/test_diff.py
	+++ b/tests/test_diff.py
	@@ -0,0 +1,581 @@
1	"""Tests for navegador.diff — DiffAnalyzer and the CLI 'diff' command."""
2
3	from __future__ import annotations
4
5	import json
6	import subprocess
7	from pathlib import Path
8	from unittest.mock import MagicMock, patch
9
10	import pytest
11	from click.testing import CliRunner
12
13	from navegador.cli.commands import main
14	from navegador.diff import (
15	DiffAnalyzer,
16	_lines_overlap,
17	_parse_unified_diff_hunks,
18	)
19
20
21	# ── Helpers ────────────────────────────────────────────────────────────────────
22
23
24	def _mock_store(result_set: list \| None = None):
25	"""Return a MagicMock GraphStore whose .query() yields result_set."""
26	store = MagicMock()
27	store.query.return_value = MagicMock(result_set=result_set or [])
28	return store
29
30
31	def _analyzer(store=None, repo_path: Path \| None = None, changed: list[str] \| None = None):
32	"""Build a DiffAnalyzer with the given store, patching GitAdapter.changed_files."""
33	if store is None:
34	store = _mock_store()
35	if repo_path is None:
36	repo_path = Path("/fake/repo")
37	analyzer = DiffAnalyzer(store, repo_path)
38	if changed is not None:
39	analyzer._git = MagicMock()
40	analyzer._git.changed_files.return_value = changed
41	return analyzer
42
43
44	# ── _parse_unified_diff_hunks ─────────────────────────────────────────────────
45
46
47	class TestParseUnifiedDiffHunks:
48	SAMPLE_DIFF = """\
49	diff --git a/foo.py b/foo.py
50	index 0000000..1111111 100644
51	--- a/foo.py
	++++ b/foo.py
52	@@ -10,3 +10,5 @@
53	unchanged
54	+added line 1
55	+added line 2
56	diff --git a/bar.py b/bar.py
57	index 0000000..2222222 100644
58	--- a/bar.py
	++++ b/bar.py
59	@@ -5 +5,2 @@
60	-old line
61	+new line A
62	+new line B
63	"""
64
65	def test_returns_dict(self):
66	result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
67	assert isinstance(result, dict)
68
69	def test_detects_both_files(self):
70	result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
71	assert "foo.py" in result
72	assert "bar.py" in result
73
74	def test_correct_range_for_foo(self):
75	result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
76	# hunk: +10,5 → start=10, end=14
77	ranges = result["foo.py"]
78	assert len(ranges) == 1
79	start, end = ranges[0]
80	assert start == 10
81	assert end == 14 # 10 + 5 - 1
82
83	def test_correct_range_for_bar(self):
84	result = _parse_unified_diff_hunks(self.SAMPLE_DIFF)
85	# hunk: +5,2 → start=5, end=6
86	ranges = result["bar.py"]
87	assert len(ranges) == 1
88	start, end = ranges[0]
89	assert start == 5
90	assert end == 6
91
92	def test_empty_diff_returns_empty_dict(self):
93	result = _parse_unified_diff_hunks("")
94	assert result == {}
95
96	def test_deleted_file_not_included(self):
97	diff = """\
98	--- a/deleted.py
	++++ /dev/null
99	@@ -1 +0,0 @@
100	-old
101	"""
102	result = _parse_unified_diff_hunks(diff)
103	assert "deleted.py" not in result
104
105	def test_multiple_hunks_same_file(self):
106	diff = """\
107	diff --git a/multi.py b/multi.py
108	--- a/multi.py
	++++ b/multi.py
109	@@ -1,2 +1,3 @@
110	+first
111	unchanged
112	+second
113	@@ -20 +21,2 @@
114	-old
115	+new1
116	+new2
117	"""
118	result = _parse_unified_diff_hunks(diff)
119	assert "multi.py" in result
120	assert len(result["multi.py"]) == 2
121
122
123	# ── _lines_overlap ─────────────────────────────────────────────────────────────
124
125
126	class TestLinesOverlap:
127	def test_exact_overlap(self):
128	assert _lines_overlap([(10, 20)], 10, 20) is True
129
130	def test_symbol_inside_range(self):
131	assert _lines_overlap([(5, 30)], 10, 15) is True
132
133	def test_range_inside_symbol(self):
134	assert _lines_overlap([(12, 14)], 10, 20) is True
135
136	def test_no_overlap_before(self):
137	assert _lines_overlap([(20, 30)], 5, 10) is False
138
139	def test_no_overlap_after(self):
140	assert _lines_overlap([(1, 5)], 10, 20) is False
141
142	def test_adjacent_not_overlapping(self):
143	assert _lines_overlap([(1, 9)], 10, 20) is False
144
145	def test_none_line_start_returns_false(self):
146	assert _lines_overlap([(1, 100)], None, None) is False
147
148	def test_no_line_end_uses_start(self):
149	# line_end=None → treated as single-line symbol
150	assert _lines_overlap([(10, 20)], 15, None) is True
151
152	def test_empty_ranges_returns_false(self):
153	assert _lines_overlap([], 10, 20) is False
154
155	def test_multiple_ranges_one_hits(self):
156	assert _lines_overlap([(1, 5), (50, 60)], 52, 55) is True
157
158
159	# ── DiffAnalyzer.changed_files ────────────────────────────────────────────────
160
161
162	class TestDiffAnalyzerChangedFiles:
163	def test_delegates_to_git_adapter(self):
164	analyzer = _analyzer(changed=["a.py", "b.py"])
165	assert analyzer.changed_files() == ["a.py", "b.py"]
166
167	def test_empty_when_no_changes(self):
168	analyzer = _analyzer(changed=[])
169	assert analyzer.changed_files() == []
170
171	def test_returns_list(self):
172	analyzer = _analyzer(changed=["x.py"])
173	assert isinstance(analyzer.changed_files(), list)
174
175	def test_uses_subprocess_via_git_adapter(self, tmp_path):
176	"""Verify changed_files() relies on subprocess (through GitAdapter._run)."""
177	repo = tmp_path / "repo"
178	repo.mkdir()
179	store = _mock_store()
180	analyzer = DiffAnalyzer(store, repo)
181
182	fake_result = MagicMock()
183	fake_result.stdout = "changed.py\n"
184	fake_result.returncode = 0
185
186	with patch("subprocess.run", return_value=fake_result):
187	files = analyzer.changed_files()
188
189	assert "changed.py" in files
190
191
192	# ── DiffAnalyzer.changed_lines ────────────────────────────────────────────────
193
194
195	class TestDiffAnalyzerChangedLines:
196	def test_returns_dict(self, tmp_path):
197	analyzer = _analyzer(changed=["f.py"])
198	fake = MagicMock()
199	fake.returncode = 0
200	fake.stdout = "+++ b/f.py\n@@ -1 +1,3 @@\n+a\n+b\n+c\n"
201	with patch("subprocess.run", return_value=fake):
202	result = analyzer.changed_lines()
203	assert isinstance(result, dict)
204
205	def test_fallback_on_no_output(self):
206	"""No diff output → full-file sentinel range for each changed file."""
207	analyzer = _analyzer(changed=["x.py", "y.py"])
208	fake = MagicMock()
209	fake.returncode = 0
210	fake.stdout = ""
211	with patch("subprocess.run", return_value=fake):
212	result = analyzer.changed_lines()
213	assert "x.py" in result
214	assert "y.py" in result
215	assert result["x.py"] == [(1, 999_999)]
216
217	def test_fallback_on_nonzero_exit(self):
218	"""Non-zero exit (e.g. no HEAD) → full-file sentinel for all changed files."""
219	analyzer = _analyzer(changed=["z.py"])
220	fake = MagicMock()
221	fake.returncode = 128
222	fake.stdout = ""
223	with patch("subprocess.run", return_value=fake):
224	result = analyzer.changed_lines()
225	assert result["z.py"] == [(1, 999_999)]
226
227	def test_missing_files_get_sentinel(self):
228	"""Files in changed_files() but absent from diff get sentinel range."""
229	analyzer = _analyzer(changed=["in_diff.py", "not_in_diff.py"])
230	fake = MagicMock()
231	fake.returncode = 0
232	fake.stdout = "+++ b/in_diff.py\n@@ -5 +5,2 @@ \n+x\n+y\n"
233	with patch("subprocess.run", return_value=fake):
234	result = analyzer.changed_lines()
235	assert "not_in_diff.py" in result
236	assert result["not_in_diff.py"] == [(1, 999_999)]
237
238
239	# ── DiffAnalyzer.affected_symbols ─────────────────────────────────────────────
240
241
242	class TestDiffAnalyzerAffectedSymbols:
243	def _sym_rows(self):
244	"""Return fake graph rows: (type, name, file_path, line_start, line_end)."""
245	return [
246	("Function", "do_thing", "app.py", 10, 25),
247	("Class", "MyClass", "app.py", 30, 80),
248	("Method", "helper", "utils.py", 5, 15),
249	]
250
251	def test_returns_list(self):
252	store = _mock_store(result_set=self._sym_rows())
253	analyzer = _analyzer(store=store, changed=["app.py"])
254	with patch.object(analyzer, "changed_lines", return_value={"app.py": [(1, 999_999)]}):
255	result = analyzer.affected_symbols()
256	assert isinstance(result, list)
257
258	def test_symbols_overlap_returned(self):
259	store = _mock_store(result_set=self._sym_rows())
260	analyzer = _analyzer(store=store, changed=["app.py"])
261	# Changed lines 15-20 overlap do_thing (10-25)
262	with patch.object(analyzer, "changed_lines", return_value={"app.py": [(15, 20)]}):
263	result = analyzer.affected_symbols()
264	names = [s["name"] for s in result]
265	assert "do_thing" in names
266
267	def test_non_overlapping_symbols_excluded(self):
268	store = _mock_store(result_set=self._sym_rows())
269	analyzer = _analyzer(store=store, changed=["app.py"])
270	# Changed lines 50-60 overlap MyClass (30-80) but not do_thing (10-25)
271	with patch.object(analyzer, "changed_lines", return_value={"app.py": [(50, 60)]}):
272	result = analyzer.affected_symbols()
273	names = [s["name"] for s in result]
274	assert "MyClass" in names
275	assert "do_thing" not in names
276
277	def test_empty_when_no_graph_nodes(self):
278	store = _mock_store(result_set=[])
279	analyzer = _analyzer(store=store, changed=["app.py"])
280	with patch.object(analyzer, "changed_lines", return_value={"app.py": [(1, 50)]}):
281	result = analyzer.affected_symbols()
282	assert result == []
283
284	def test_empty_when_no_changed_files(self):
285	store = _mock_store(result_set=self._sym_rows())
286	analyzer = _analyzer(store=store, changed=[])
287	with patch.object(analyzer, "changed_lines", return_value={}):
288	result = analyzer.affected_symbols()
289	assert result == []
290
291	def test_symbol_dict_has_required_keys(self):
292	store = _mock_store(result_set=[("Function", "foo", "a.py", 1, 10)])
293	analyzer = _analyzer(store=store, changed=["a.py"])
294	with patch.object(analyzer, "changed_lines", return_value={"a.py": [(1, 10)]}):
295	result = analyzer.affected_symbols()
296	assert len(result) == 1
297	sym = result[0]
298	assert "type" in sym
299	assert "name" in sym
300	assert "file_path" in sym
301	assert "line_start" in sym
302	assert "line_end" in sym
303
304	def test_no_duplicate_symbols(self):
305	"""Same symbol matched by two hunk ranges must appear only once."""
306	rows = [("Function", "foo", "a.py", 5, 20)]
307	store = _mock_store(result_set=rows)
308	analyzer = _analyzer(store=store, changed=["a.py"])
309	with patch.object(analyzer, "changed_lines", return_value={"a.py": [(5, 10), (15, 20)]}):
310	result = analyzer.affected_symbols()
311	assert len(result) == 1
312
313
314	# ── DiffAnalyzer.affected_knowledge ───────────────────────────────────────────
315
316
317	class TestDiffAnalyzerAffectedKnowledge:
318	def _k_rows(self):
319	"""Fake knowledge rows: (type, name, description, domain, status)."""
320	return [
321	("Concept", "Billing", "Handles money", "finance", "stable"),
322	("Rule", "no_refund_after_30d", "30 day rule", "finance", "active"),
323	]
324
325	def test_returns_list(self):
326	store = _mock_store(result_set=self._k_rows())
327	analyzer = _analyzer(store=store)
328	sym = [{"name": "charge", "file_path": "billing.py"}]
329	with patch.object(analyzer, "affected_symbols", return_value=sym):
330	result = analyzer.affected_knowledge()
331	assert isinstance(result, list)
332
333	def test_knowledge_nodes_returned(self):
334	store = _mock_store(result_set=self._k_rows())
335	analyzer = _analyzer(store=store)
336	sym = [{"name": "charge", "file_path": "billing.py"}]
337	with patch.object(analyzer, "affected_symbols", return_value=sym):
338	result = analyzer.affected_knowledge()
339	names = [k["name"] for k in result]
340	assert "Billing" in names
341	assert "no_refund_after_30d" in names
342
343	def test_empty_when_no_symbols(self):
344	store = _mock_store(result_set=self._k_rows())
345	analyzer = _analyzer(store=store)
346	with patch.object(analyzer, "affected_symbols", return_value=[]):
347	result = analyzer.affected_knowledge()
348	assert result == []
349
350	def test_empty_when_no_graph_knowledge(self):
351	store = _mock_store(result_set=[])
352	analyzer = _analyzer(store=store)
353	sym = [{"name": "foo", "file_path": "a.py"}]
354	with patch.object(analyzer, "affected_symbols", return_value=sym):
355	result = analyzer.affected_knowledge()
356	assert result == []
357
358	def test_no_duplicate_knowledge_nodes(self):
359	"""Two symbols linking to the same knowledge node → deduplicated."""
360	rows = [("Concept", "SharedConcept", "desc", "core", "stable")]
361	store = _mock_store(result_set=rows)
362	analyzer = _analyzer(store=store)
363	syms = [
364	{"name": "alpha", "file_path": "a.py"},
365	{"name": "beta", "file_path": "b.py"},
366	]
367	with patch.object(analyzer, "affected_symbols", return_value=syms):
368	result = analyzer.affected_knowledge()
369	assert len([k for k in result if k["name"] == "SharedConcept"]) == 1
370
371	def test_knowledge_dict_has_required_keys(self):
372	rows = [("Rule", "my_rule", "some desc", "payments", "")]
373	store = _mock_store(result_set=rows)
374	analyzer = _analyzer(store=store)
375	sym = [{"name": "process", "file_path": "pay.py"}]
376	with patch.object(analyzer, "affected_symbols", return_value=sym):
377	result = analyzer.affected_knowledge()
378	assert len(result) == 1
379	k = result[0]
380	assert "type" in k
381	assert "name" in k
382	assert "description" in k
383	assert "domain" in k
384	assert "status" in k
385
386
387	# ── DiffAnalyzer.impact_summary ───────────────────────────────────────────────
388
389
390	class TestDiffAnalyzerImpactSummary:
391	def _build(self, files=None, symbols=None, knowledge=None):
392	store = _mock_store()
393	analyzer = _analyzer(store=store)
394	with (
395	patch.object(analyzer, "changed_files", return_value=files or []),
396	patch.object(analyzer, "affected_symbols", return_value=symbols or []),
397	patch.object(analyzer, "affected_knowledge", return_value=knowledge or []),
398	):
399	return analyzer.impact_summary()
400
401	def test_returns_dict(self):
402	result = self._build()
403	assert isinstance(result, dict)
404
405	def test_has_all_top_level_keys(self):
406	result = self._build()
407	assert "files" in result
408	assert "symbols" in result
409	assert "knowledge" in result
410	assert "counts" in result
411
412	def test_counts_match_lengths(self):
413	files = ["a.py", "b.py"]
414	symbols = [{"type": "Function", "name": "f", "file_path": "a.py",
415	"line_start": 1, "line_end": 5}]
416	knowledge = [{"type": "Concept", "name": "X", "description": "",
417	"domain": "", "status": ""}]
418	result = self._build(files=files, symbols=symbols, knowledge=knowledge)
419	assert result["counts"]["files"] == 2
420	assert result["counts"]["symbols"] == 1
421	assert result["counts"]["knowledge"] == 1
422
423	def test_empty_summary_all_zeros(self):
424	result = self._build()
425	assert result["counts"]["files"] == 0
426	assert result["counts"]["symbols"] == 0
427	assert result["counts"]["knowledge"] == 0
428
429	def test_files_list_propagated(self):
430	result = self._build(files=["x.py", "y.py"])
431	assert result["files"] == ["x.py", "y.py"]
432
433
434	# ── DiffAnalyzer.to_json ──────────────────────────────────────────────────────
435
436
437	class TestDiffAnalyzerToJson:
438	def test_returns_valid_json(self):
439	store = _mock_store()
440	analyzer = _analyzer(store=store)
441	summary = {"files": [], "symbols": [], "knowledge": [], "counts": {"files": 0}}
442	with patch.object(analyzer, "impact_summary", return_value=summary):
443	output = analyzer.to_json()
444	parsed = json.loads(output)
445	assert isinstance(parsed, dict)
446
447	def test_json_contains_summary_keys(self):
448	store = _mock_store()
449	analyzer = _analyzer(store=store)
450	summary = {"files": ["f.py"], "symbols": [], "knowledge": [], "counts": {"files": 1}}
451	with patch.object(analyzer, "impact_summary", return_value=summary):
452	output = analyzer.to_json()
453	parsed = json.loads(output)
454	assert "files" in parsed
455
456
457	# ── DiffAnalyzer.to_markdown ──────────────────────────────────────────────────
458
459
460	class TestDiffAnalyzerToMarkdown:
461	def _md(self, files=None, symbols=None, knowledge=None):
462	store = _mock_store()
463	analyzer = _analyzer(store=store)
464	summary = {
465	"files": files or [],
466	"symbols": symbols or [],
467	"knowledge": knowledge or [],
468	"counts": {
469	"files": len(files or []),
470	"symbols": len(symbols or []),
471	"knowledge": len(knowledge or []),
472	},
473	}
474	with patch.object(analyzer, "impact_summary", return_value=summary):
475	return analyzer.to_markdown()
476
477	def test_returns_string(self):
478	assert isinstance(self._md(), str)
479
480	def test_contains_heading(self):
481	assert "Diff Impact Summary" in self._md()
482
483	def test_lists_changed_file(self):
484	md = self._md(files=["src/main.py"])
485	assert "src/main.py" in md
486
487	def test_lists_affected_symbol(self):
488	syms = [{"type": "Function", "name": "pay", "file_path": "billing.py",
489	"line_start": 10, "line_end": 20}]
490	md = self._md(symbols=syms)
491	assert "pay" in md
492
493	def test_lists_knowledge_node(self):
494	know = [{"type": "Rule", "name": "no_double_charge",
495	"description": "desc", "domain": "", "status": ""}]
496	md = self._md(knowledge=know)
497	assert "no_double_charge" in md
498
499	def test_empty_sections_show_placeholder(self):
500	md = self._md()
501	assert "No changed files" in md
502	assert "No affected symbols" in md
503	assert "No linked knowledge" in md
504
505
506	# ── CLI: navegador diff ────────────────────────────────────────────────────────
507
508
509	class TestCLIDiffCommand:
510	def _runner(self):
511	return CliRunner()
512
513	def _mock_analyzer(self, summary=None):
514	"""Patch DiffAnalyzer so it never touches git or the graph."""
515	if summary is None:
516	summary = {
517	"files": ["app.py"],
518	"symbols": [{"type": "Function", "name": "run",
519	"file_path": "app.py", "line_start": 1, "line_end": 10}],
520	"knowledge": [],
521	"counts": {"files": 1, "symbols": 1, "knowledge": 0},
522	}
523	mock_inst = MagicMock()
524	mock_inst.impact_summary.return_value = summary
525	mock_inst.to_json.return_value = json.dumps(summary, indent=2)
526	mock_inst.to_markdown.return_value = "# Diff Impact Summary\n\n## Changed Files (1)"
527	return mock_inst
528
529	def test_command_exists(self):
530	runner = self._runner()
531	result = runner.invoke(main, ["diff", "--help"])
532	assert result.exit_code == 0
533
534	def test_markdown_output_by_default(self, tmp_path):
535	runner = self._runner()
536	mock_inst = self._mock_analyzer()
537	with (
538	runner.isolated_filesystem(),
539	patch("navegador.cli.commands._get_store", return_value=_mock_store()),
540	patch("navegador.diff.DiffAnalyzer", return_value=mock_inst),
541	):
542	result = runner.invoke(main, ["diff", "--repo", str(tmp_path)])
543	assert result.exit_code == 0
544	assert "Diff Impact Summary" in result.output
545
546	def test_json_output_flag(self, tmp_path):
547	runner = self._runner()
548	mock_inst = self._mock_analyzer()
549	with (
550	runner.isolated_filesystem(),
551	patch("navegador.cli.commands._get_store", return_value=_mock_store()),
552	patch("navegador.diff.DiffAnalyzer", return_value=mock_inst),
553	):
554	result = runner.invoke(main, ["diff", "--format", "json", "--repo", str(tmp_path)])
555	assert result.exit_code == 0
556	parsed = json.loads(result.output)
557	assert "files" in parsed
558
559	def test_json_is_valid(self, tmp_path):
560	runner = self._runner()
561	summary = {
562	"files": ["x.py"],
563	"symbols": [],
564	"knowledge": [],
565	"counts": {"files": 1, "symbols": 0, "knowledge": 0},
566	}
567	mock_inst = self._mock_analyzer(summary=summary)
568	with (
569	runner.isolated_filesystem(),
570	patch("navegador.cli.commands._get_store", return_value=_mock_store()),
571	patch("navegador.diff.DiffAnalyzer", return_value=mock_inst),
572	):
573	result = runner.invoke(main, ["diff", "--format", "json", "--repo", str(tmp_path)])
574	assert result.exit_code == 0
575	data = json.loads(result.output)
576	assert data["files"] == ["x.py"]
577	assert data["counts"]["files"] == 1

Navegador

Keyboard Shortcuts