Navegador

navegador / tests / test_ingestion_wiki.py
Blame History Raw 299 lines
1
"""Tests for navegador.ingestion.wiki — WikiIngester."""
2
3
import tempfile
4
from pathlib import Path
5
from unittest.mock import MagicMock, patch
6
7
import pytest
8
9
from navegador.graph.schema import NodeLabel
10
from navegador.ingestion.wiki import WikiIngester, _extract_terms
11
12
# ── Unit: _extract_terms ──────────────────────────────────────────────────────
13
14
class TestExtractTerms:
15
def test_extracts_headings(self):
16
md = "# Introduction\n## Getting Started\n### Deep Dive\n"
17
terms = _extract_terms(md)
18
assert "Introduction" in terms
19
assert "Getting Started" in terms
20
assert "Deep Dive" in terms
21
22
def test_extracts_bold_asterisk(self):
23
md = "Use **GraphStore** for all persistence."
24
terms = _extract_terms(md)
25
assert "GraphStore" in terms
26
27
def test_extracts_bold_underscore(self):
28
md = "The __FalkorDB__ module is required."
29
terms = _extract_terms(md)
30
assert "FalkorDB" in terms
31
32
def test_deduplicates(self):
33
md = "# GraphStore\nUse **GraphStore** here too."
34
terms = _extract_terms(md)
35
assert terms.count("GraphStore") == 1
36
37
def test_empty_markdown(self):
38
assert _extract_terms("") == []
39
40
def test_no_headings_no_bold(self):
41
terms = _extract_terms("plain text with no markup")
42
assert terms == []
43
44
def test_preserves_order(self):
45
md = "# Alpha\n# Beta\n**Gamma**"
46
terms = _extract_terms(md)
47
assert terms == ["Alpha", "Beta", "Gamma"]
48
49
50
# ── Unit: ingest_local ────────────────────────────────────────────────────────
51
52
class TestIngestLocal:
53
def _make_store(self):
54
store = MagicMock()
55
store.query.return_value = MagicMock(result_set=[])
56
return store
57
58
def test_ingests_markdown_files(self):
59
store = self._make_store()
60
ingester = WikiIngester(store)
61
with tempfile.TemporaryDirectory() as tmpdir:
62
(Path(tmpdir) / "home.md").write_text("# Welcome\nThis is home.")
63
(Path(tmpdir) / "guide.md").write_text("## Usage\nSome guide.")
64
stats = ingester.ingest_local(tmpdir)
65
assert stats["pages"] == 2
66
67
def test_skips_non_markdown(self):
68
store = self._make_store()
69
ingester = WikiIngester(store)
70
with tempfile.TemporaryDirectory() as tmpdir:
71
(Path(tmpdir) / "readme.md").write_text("# Readme")
72
(Path(tmpdir) / "image.png").write_bytes(b"\x89PNG")
73
stats = ingester.ingest_local(tmpdir)
74
assert stats["pages"] == 1
75
76
def test_raises_if_dir_missing(self):
77
store = self._make_store()
78
ingester = WikiIngester(store)
79
with pytest.raises(FileNotFoundError):
80
ingester.ingest_local("/nonexistent/path")
81
82
def test_creates_wiki_page_node(self):
83
store = self._make_store()
84
ingester = WikiIngester(store)
85
with tempfile.TemporaryDirectory() as tmpdir:
86
(Path(tmpdir) / "arch.md").write_text("# Architecture")
87
ingester.ingest_local(tmpdir)
88
store.create_node.assert_called_once()
89
call_args = store.create_node.call_args
90
assert call_args[0][0] == NodeLabel.WikiPage
91
props = call_args[0][1]
92
assert props["name"] == "arch"
93
assert props["source"] == "local"
94
95
def test_page_name_normalisation(self):
96
store = self._make_store()
97
ingester = WikiIngester(store)
98
with tempfile.TemporaryDirectory() as tmpdir:
99
(Path(tmpdir) / "getting-started.md").write_text("# Hi")
100
ingester.ingest_local(tmpdir)
101
props = store.create_node.call_args[0][1]
102
assert props["name"] == "getting started"
103
104
def test_creates_documents_edge_when_term_matches(self):
105
store = MagicMock()
106
store.query.return_value = MagicMock(result_set=[["Concept", "GraphStore"]])
107
ingester = WikiIngester(store)
108
with tempfile.TemporaryDirectory() as tmpdir:
109
(Path(tmpdir) / "page.md").write_text("# GraphStore\nSome text.")
110
stats = ingester.ingest_local(tmpdir)
111
assert stats["links"] >= 1
112
store.create_edge.assert_called()
113
114
def test_no_links_when_no_term_match(self):
115
store = self._make_store() # query returns []
116
ingester = WikiIngester(store)
117
with tempfile.TemporaryDirectory() as tmpdir:
118
(Path(tmpdir) / "page.md").write_text("# UnknownTerm\nText.")
119
stats = ingester.ingest_local(tmpdir)
120
assert stats["links"] == 0
121
store.create_edge.assert_not_called()
122
123
def test_content_capped_at_4000_chars(self):
124
store = self._make_store()
125
ingester = WikiIngester(store)
126
with tempfile.TemporaryDirectory() as tmpdir:
127
(Path(tmpdir) / "long.md").write_text("x" * 10000)
128
ingester.ingest_local(tmpdir)
129
props = store.create_node.call_args[0][1]
130
assert len(props["content"]) <= 4000
131
132
def test_returns_stats_dict(self):
133
store = self._make_store()
134
ingester = WikiIngester(store)
135
with tempfile.TemporaryDirectory() as tmpdir:
136
stats = ingester.ingest_local(tmpdir)
137
assert "pages" in stats
138
assert "links" in stats
139
assert stats["pages"] == 0
140
141
def test_recursive_glob(self):
142
store = self._make_store()
143
ingester = WikiIngester(store)
144
with tempfile.TemporaryDirectory() as tmpdir:
145
subdir = Path(tmpdir) / "sub"
146
subdir.mkdir()
147
(subdir / "nested.md").write_text("# Nested")
148
stats = ingester.ingest_local(tmpdir)
149
assert stats["pages"] == 1
150
151
152
# ── Unit: _try_link edge-type handling ────────────────────────────────────────
153
154
class TestTryLink:
155
def test_handles_invalid_label_gracefully(self):
156
store = MagicMock()
157
store.query.return_value = MagicMock(result_set=[["InvalidLabel", "foo"]])
158
ingester = WikiIngester(store)
159
result = ingester._try_link("page", "foo")
160
assert result == 0
161
162
def test_creates_edge_for_valid_label(self):
163
store = MagicMock()
164
store.query.return_value = MagicMock(result_set=[["Concept", "MyService"]])
165
ingester = WikiIngester(store)
166
result = ingester._try_link("wiki page", "MyService")
167
assert result == 1
168
store.create_edge.assert_called_once()
169
170
def test_returns_zero_on_unknown_label(self):
171
store = MagicMock()
172
store.query.return_value = MagicMock(result_set=[["UnknownLabel", "node"]])
173
ingester = WikiIngester(store)
174
result = ingester._try_link("page", "node")
175
assert result == 0
176
177
def test_propagates_store_error(self):
178
store = MagicMock()
179
store.query.return_value = MagicMock(result_set=[["Concept", "node"]])
180
store.create_edge.side_effect = Exception("DB error")
181
ingester = WikiIngester(store)
182
with pytest.raises(Exception, match="DB error"):
183
ingester._try_link("page", "node")
184
185
186
# ── GitHub clone (ingest_github) ──────────────────────────────────────────────
187
188
class TestIngestGithub:
189
def _make_store(self):
190
store = MagicMock()
191
store.query.return_value = MagicMock(result_set=[])
192
return store
193
194
def test_clones_wiki_and_ingests_local(self):
195
store = self._make_store()
196
ingester = WikiIngester(store)
197
198
with tempfile.TemporaryDirectory() as tmpdir:
199
wiki_dir = Path(tmpdir)
200
(wiki_dir / "home.md").write_text("# Home\nWelcome.")
201
202
mock_result = MagicMock()
203
mock_result.returncode = 0
204
205
with patch("subprocess.run", return_value=mock_result) as mock_run, \
206
patch("tempfile.mkdtemp", return_value=str(tmpdir)):
207
stats = ingester.ingest_github("owner/repo")
208
mock_run.assert_called_once()
209
cmd = mock_run.call_args[0][0]
210
assert "git" in cmd
211
assert "clone" in cmd
212
assert "https://github.com/owner/repo.wiki.git" in cmd
213
assert stats["pages"] == 1
214
215
def test_returns_empty_on_clone_failure(self):
216
store = self._make_store()
217
ingester = WikiIngester(store)
218
219
mock_result = MagicMock()
220
mock_result.returncode = 1
221
mock_result.stderr = "fatal: repository not found"
222
223
with patch("subprocess.run", return_value=mock_result):
224
stats = ingester.ingest_github("owner/empty-repo")
225
assert stats == {"pages": 0, "links": 0}
226
227
def test_uses_token_in_url(self):
228
store = self._make_store()
229
ingester = WikiIngester(store)
230
231
mock_result = MagicMock()
232
mock_result.returncode = 1
233
mock_result.stderr = "auth error"
234
235
with patch("subprocess.run", return_value=mock_result) as mock_run:
236
ingester.ingest_github("owner/repo", token="mytoken")
237
cmd = mock_run.call_args[0][0]
238
assert "[email protected]" in cmd[3]
239
240
def test_uses_explicit_clone_dir(self):
241
store = self._make_store()
242
ingester = WikiIngester(store)
243
244
with tempfile.TemporaryDirectory() as tmpdir:
245
mock_result = MagicMock()
246
mock_result.returncode = 0
247
248
with patch("subprocess.run", return_value=mock_result):
249
ingester.ingest_github("owner/repo", clone_dir=tmpdir)
250
# Should not crash
251
252
253
# ── GitHub API (ingest_github_api) ────────────────────────────────────────────
254
255
class TestIngestGithubApi:
256
def _make_store(self):
257
store = MagicMock()
258
store.query.return_value = MagicMock(result_set=[])
259
return store
260
261
def test_fetches_readme_and_ingests(self):
262
store = self._make_store()
263
ingester = WikiIngester(store)
264
265
import base64
266
import json as _json
267
readme_content = base64.b64encode(b"# README\nSome content").decode()
268
mock_response_data = {
269
"content": readme_content,
270
"html_url": "https://github.com/owner/repo/blob/main/README.md",
271
}
272
273
mock_resp = MagicMock()
274
mock_resp.read.return_value = _json.dumps(mock_response_data).encode()
275
mock_resp.__enter__ = lambda s: s
276
mock_resp.__exit__ = MagicMock(return_value=False)
277
278
with patch("urllib.request.urlopen", return_value=mock_resp):
279
stats = ingester.ingest_github_api("owner/repo")
280
assert stats["pages"] >= 1
281
282
def test_skips_missing_files_gracefully(self):
283
store = self._make_store()
284
ingester = WikiIngester(store)
285
286
with patch("urllib.request.urlopen", side_effect=Exception("404")):
287
stats = ingester.ingest_github_api("owner/repo")
288
assert stats == {"pages": 0, "links": 0}
289
290
def test_uses_auth_header_with_token(self):
291
store = self._make_store()
292
ingester = WikiIngester(store)
293
294
with patch("urllib.request.urlopen", side_effect=Exception("skip")), \
295
patch("urllib.request.Request") as mock_req:
296
ingester.ingest_github_api("owner/repo", token="mytoken")
297
# Just verify no crash and token path was exercised
298
assert mock_req.called
299

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button