Navegador

navegador / tests / test_security.py

Source Blame History 361 lines

95549e5…	lmata	1	"""Tests for navegador.security — sensitive content detection and redaction."""
95549e5…	lmata	2
95549e5…	lmata	3	import json
95549e5…	lmata	4	from pathlib import Path
95549e5…	lmata	5	from unittest.mock import MagicMock, patch
95549e5…	lmata	6
95549e5…	lmata	7	import pytest
95549e5…	lmata	8	from click.testing import CliRunner
95549e5…	lmata	9
95549e5…	lmata	10	from navegador.security import REDACTED, SensitiveContentDetector, SensitiveMatch
95549e5…	lmata	11
95549e5…	lmata	12
95549e5…	lmata	13	# ---------------------------------------------------------------------------
95549e5…	lmata	14	# Fixtures
95549e5…	lmata	15	# ---------------------------------------------------------------------------
95549e5…	lmata	16
95549e5…	lmata	17
95549e5…	lmata	18	@pytest.fixture()
95549e5…	lmata	19	def detector():
95549e5…	lmata	20	return SensitiveContentDetector()
95549e5…	lmata	21
95549e5…	lmata	22
95549e5…	lmata	23	# ---------------------------------------------------------------------------
95549e5…	lmata	24	# Pattern detection tests
95549e5…	lmata	25	# ---------------------------------------------------------------------------
95549e5…	lmata	26
95549e5…	lmata	27
95549e5…	lmata	28	class TestAPIKeyDetection:
95549e5…	lmata	29	def test_aws_akia_key(self, detector):
95549e5…	lmata	30	text = "key = AKIAIOSFODNN7EXAMPLE"
95549e5…	lmata	31	matches = detector.scan_content(text)
95549e5…	lmata	32	names = [m.pattern_name for m in matches]
95549e5…	lmata	33	assert "aws_access_key" in names
95549e5…	lmata	34
95549e5…	lmata	35	def test_aws_asia_key(self, detector):
95549e5…	lmata	36	# ASIA prefix + exactly 16 uppercase alphanumeric chars = 20-char key
95549e5…	lmata	37	text = "assume_role_key=ASIAIOSFODNN7EXAMPLE"
95549e5…	lmata	38	matches = detector.scan_content(text)
95549e5…	lmata	39	names = [m.pattern_name for m in matches]
95549e5…	lmata	40	assert "aws_access_key" in names
95549e5…	lmata	41
95549e5…	lmata	42	def test_github_token_ghp(self, detector):
95549e5…	lmata	43	text = "GITHUB_TOKEN=ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ123456789012"
95549e5…	lmata	44	matches = detector.scan_content(text)
95549e5…	lmata	45	names = [m.pattern_name for m in matches]
95549e5…	lmata	46	assert "github_token" in names
95549e5…	lmata	47
95549e5…	lmata	48	def test_openai_sk_key(self, detector):
95549e5…	lmata	49	text = 'api_key = "sk-abcdefghijklmnopqrstuvwxyz12345678901234567890"'
95549e5…	lmata	50	matches = detector.scan_content(text)
95549e5…	lmata	51	names = [m.pattern_name for m in matches]
95549e5…	lmata	52	assert "api_key_sk" in names
95549e5…	lmata	53
95549e5…	lmata	54	def test_generic_api_key_assignment(self, detector):
95549e5…	lmata	55	text = 'API_KEY = "AbCdEfGhIjKlMnOpQrStUvWxYz123456"'
95549e5…	lmata	56	matches = detector.scan_content(text)
95549e5…	lmata	57	names = [m.pattern_name for m in matches]
95549e5…	lmata	58	assert "api_key_assignment" in names
95549e5…	lmata	59
95549e5…	lmata	60	def test_severity_is_high_for_aws_key(self, detector):
95549e5…	lmata	61	text = "AKIAIOSFODNN7EXAMPLE"
95549e5…	lmata	62	matches = detector.scan_content(text)
95549e5…	lmata	63	assert any(m.severity == "high" for m in matches)
95549e5…	lmata	64
95549e5…	lmata	65	def test_match_text_is_redacted(self, detector):
95549e5…	lmata	66	text = "AKIAIOSFODNN7EXAMPLE"
95549e5…	lmata	67	matches = detector.scan_content(text)
95549e5…	lmata	68	assert all(m.match_text == REDACTED for m in matches)
95549e5…	lmata	69
95549e5…	lmata	70	def test_line_number_is_correct(self, detector):
95549e5…	lmata	71	text = "# header\nAKIAIOSFODNN7EXAMPLE\n# footer"
95549e5…	lmata	72	matches = detector.scan_content(text)
95549e5…	lmata	73	aws_matches = [m for m in matches if m.pattern_name == "aws_access_key"]
95549e5…	lmata	74	assert len(aws_matches) >= 1
95549e5…	lmata	75	assert aws_matches[0].line_number == 2
95549e5…	lmata	76
95549e5…	lmata	77
95549e5…	lmata	78	class TestPasswordDetection:
95549e5…	lmata	79	def test_password_equals_string(self, detector):
95549e5…	lmata	80	text = 'password = "super_s3cr3t_pass"'
95549e5…	lmata	81	matches = detector.scan_content(text)
95549e5…	lmata	82	names = [m.pattern_name for m in matches]
95549e5…	lmata	83	assert "password_assignment" in names
95549e5…	lmata	84
95549e5…	lmata	85	def test_passwd_variant(self, detector):
95549e5…	lmata	86	text = "passwd = 'hunter2hunter2'"
95549e5…	lmata	87	matches = detector.scan_content(text)
95549e5…	lmata	88	names = [m.pattern_name for m in matches]
95549e5…	lmata	89	assert "password_assignment" in names
95549e5…	lmata	90
95549e5…	lmata	91	def test_secret_key_variant(self, detector):
95549e5…	lmata	92	text = 'secret = "mysecretvalue123"'
95549e5…	lmata	93	matches = detector.scan_content(text)
95549e5…	lmata	94	names = [m.pattern_name for m in matches]
95549e5…	lmata	95	assert "password_assignment" in names
95549e5…	lmata	96
95549e5…	lmata	97	def test_severity_high(self, detector):
95549e5…	lmata	98	text = 'password = "hunter2hunter2"'
95549e5…	lmata	99	matches = detector.scan_content(text)
95549e5…	lmata	100	pw = [m for m in matches if m.pattern_name == "password_assignment"]
95549e5…	lmata	101	assert all(m.severity == "high" for m in pw)
95549e5…	lmata	102
95549e5…	lmata	103
95549e5…	lmata	104	class TestPrivateKeyDetection:
95549e5…	lmata	105	def test_rsa_private_key_header(self, detector):
95549e5…	lmata	106	text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----"
95549e5…	lmata	107	matches = detector.scan_content(text)
95549e5…	lmata	108	names = [m.pattern_name for m in matches]
95549e5…	lmata	109	assert "private_key_pem" in names
95549e5…	lmata	110
95549e5…	lmata	111	def test_generic_private_key_header(self, detector):
95549e5…	lmata	112	text = "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w...\n-----END PRIVATE KEY-----"
95549e5…	lmata	113	matches = detector.scan_content(text)
95549e5…	lmata	114	names = [m.pattern_name for m in matches]
95549e5…	lmata	115	assert "private_key_pem" in names
95549e5…	lmata	116
95549e5…	lmata	117	def test_openssh_private_key_header(self, detector):
95549e5…	lmata	118	text = "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1...\n-----END OPENSSH PRIVATE KEY-----"
95549e5…	lmata	119	matches = detector.scan_content(text)
95549e5…	lmata	120	names = [m.pattern_name for m in matches]
95549e5…	lmata	121	assert "private_key_pem" in names
95549e5…	lmata	122
95549e5…	lmata	123	def test_severity_high(self, detector):
95549e5…	lmata	124	text = "-----BEGIN RSA PRIVATE KEY-----"
95549e5…	lmata	125	matches = detector.scan_content(text)
95549e5…	lmata	126	pk = [m for m in matches if m.pattern_name == "private_key_pem"]
95549e5…	lmata	127	assert all(m.severity == "high" for m in pk)
95549e5…	lmata	128
95549e5…	lmata	129
95549e5…	lmata	130	class TestConnectionStringDetection:
95549e5…	lmata	131	def test_postgres_with_credentials(self, detector):
95549e5…	lmata	132	text = 'DATABASE_URL = "postgresql://admin:[email protected]:5432/mydb"'
95549e5…	lmata	133	matches = detector.scan_content(text)
95549e5…	lmata	134	names = [m.pattern_name for m in matches]
95549e5…	lmata	135	assert "connection_string" in names
95549e5…	lmata	136
95549e5…	lmata	137	def test_mysql_with_credentials(self, detector):
95549e5…	lmata	138	text = "conn = mysql://user:passw0rd@localhost/schema"
95549e5…	lmata	139	matches = detector.scan_content(text)
95549e5…	lmata	140	names = [m.pattern_name for m in matches]
95549e5…	lmata	141	assert "connection_string" in names
95549e5…	lmata	142
95549e5…	lmata	143	def test_mongodb_with_credentials(self, detector):
95549e5…	lmata	144	text = 'uri = "mongodb://root:[email protected]:27017/db"'
95549e5…	lmata	145	matches = detector.scan_content(text)
95549e5…	lmata	146	names = [m.pattern_name for m in matches]
95549e5…	lmata	147	assert "connection_string" in names
95549e5…	lmata	148
95549e5…	lmata	149	def test_mongodb_srv_with_credentials(self, detector):
95549e5…	lmata	150	text = 'uri = "mongodb+srv://admin:[email protected]/mydb"'
95549e5…	lmata	151	matches = detector.scan_content(text)
95549e5…	lmata	152	names = [m.pattern_name for m in matches]
95549e5…	lmata	153	assert "connection_string" in names
95549e5…	lmata	154
95549e5…	lmata	155	def test_severity_high(self, detector):
95549e5…	lmata	156	text = "postgresql://admin:[email protected]/mydb"
95549e5…	lmata	157	matches = detector.scan_content(text)
95549e5…	lmata	158	cs = [m for m in matches if m.pattern_name == "connection_string"]
95549e5…	lmata	159	assert all(m.severity == "high" for m in cs)
95549e5…	lmata	160
95549e5…	lmata	161
95549e5…	lmata	162	class TestJWTDetection:
95549e5…	lmata	163	def test_valid_jwt(self, detector):
95549e5…	lmata	164	# A real-looking but fake JWT
95549e5…	lmata	165	header = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
95549e5…	lmata	166	payload = "eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ"
95549e5…	lmata	167	signature = "SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
95549e5…	lmata	168	jwt = f"{header}.{payload}.{signature}"
95549e5…	lmata	169	text = f'Authorization: Bearer {jwt}'
95549e5…	lmata	170	matches = detector.scan_content(text)
95549e5…	lmata	171	names = [m.pattern_name for m in matches]
95549e5…	lmata	172	assert "jwt_token" in names
95549e5…	lmata	173
95549e5…	lmata	174	def test_severity_medium(self, detector):
95549e5…	lmata	175	header = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
95549e5…	lmata	176	payload = "eyJzdWIiOiIxMjM0NTY3ODkwIn0"
95549e5…	lmata	177	sig = "SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
95549e5…	lmata	178	text = f"{header}.{payload}.{sig}"
95549e5…	lmata	179	matches = detector.scan_content(text)
95549e5…	lmata	180	jwt = [m for m in matches if m.pattern_name == "jwt_token"]
95549e5…	lmata	181	assert all(m.severity == "medium" for m in jwt)
95549e5…	lmata	182
95549e5…	lmata	183
95549e5…	lmata	184	# ---------------------------------------------------------------------------
95549e5…	lmata	185	# Redaction tests
95549e5…	lmata	186	# ---------------------------------------------------------------------------
95549e5…	lmata	187
95549e5…	lmata	188
95549e5…	lmata	189	class TestRedaction:
95549e5…	lmata	190	def test_redact_aws_key(self, detector):
95549e5…	lmata	191	text = "key = AKIAIOSFODNN7EXAMPLE"
95549e5…	lmata	192	result = detector.redact(text)
95549e5…	lmata	193	assert "AKIAIOSFODNN7EXAMPLE" not in result
95549e5…	lmata	194	assert REDACTED in result
95549e5…	lmata	195
95549e5…	lmata	196	def test_redact_password(self, detector):
95549e5…	lmata	197	text = 'password = "hunter2hunter2"'
95549e5…	lmata	198	result = detector.redact(text)
95549e5…	lmata	199	assert "hunter2hunter2" not in result
95549e5…	lmata	200	assert REDACTED in result
95549e5…	lmata	201
95549e5…	lmata	202	def test_redact_pem_header(self, detector):
95549e5…	lmata	203	text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA\n-----END RSA PRIVATE KEY-----"
95549e5…	lmata	204	result = detector.redact(text)
95549e5…	lmata	205	assert "-----BEGIN RSA PRIVATE KEY-----" not in result
95549e5…	lmata	206	assert REDACTED in result
95549e5…	lmata	207
95549e5…	lmata	208	def test_redact_connection_string(self, detector):
95549e5…	lmata	209	text = "postgresql://admin:[email protected]/mydb"
95549e5…	lmata	210	result = detector.redact(text)
95549e5…	lmata	211	assert "s3cret" not in result
95549e5…	lmata	212	assert REDACTED in result
95549e5…	lmata	213
95549e5…	lmata	214	def test_redact_jwt(self, detector):
95549e5…	lmata	215	header = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
95549e5…	lmata	216	payload = "eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ"
95549e5…	lmata	217	sig = "SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
95549e5…	lmata	218	jwt = f"{header}.{payload}.{sig}"
95549e5…	lmata	219	result = detector.redact(jwt)
95549e5…	lmata	220	assert jwt not in result
95549e5…	lmata	221	assert REDACTED in result
95549e5…	lmata	222
95549e5…	lmata	223	def test_redact_returns_unchanged_clean_text(self, detector):
95549e5…	lmata	224	text = "def hello():\n return 'world'\n"
95549e5…	lmata	225	result = detector.redact(text)
95549e5…	lmata	226	assert result == text
95549e5…	lmata	227
95549e5…	lmata	228	def test_redact_multiple_secrets_in_one_string(self, detector):
95549e5…	lmata	229	text = (
95549e5…	lmata	230	"AKIAIOSFODNN7EXAMPLE\n"
95549e5…	lmata	231	'password = "mysecretvalue"\n'
95549e5…	lmata	232	)
95549e5…	lmata	233	result = detector.redact(text)
95549e5…	lmata	234	assert "AKIAIOSFODNN7EXAMPLE" not in result
95549e5…	lmata	235	assert "mysecretvalue" not in result
95549e5…	lmata	236
95549e5…	lmata	237
95549e5…	lmata	238	# ---------------------------------------------------------------------------
95549e5…	lmata	239	# scan_file tests
95549e5…	lmata	240	# ---------------------------------------------------------------------------
95549e5…	lmata	241
95549e5…	lmata	242
95549e5…	lmata	243	class TestScanFile:
95549e5…	lmata	244	def test_scan_file_detects_secrets(self, detector, tmp_path):
95549e5…	lmata	245	secret_file = tmp_path / "config.py"
95549e5…	lmata	246	secret_file.write_text('AWS_KEY = "AKIAIOSFODNN7EXAMPLE"\n', encoding="utf-8")
95549e5…	lmata	247	matches = detector.scan_file(secret_file)
95549e5…	lmata	248	assert len(matches) >= 1
95549e5…	lmata	249	assert any(m.pattern_name == "aws_access_key" for m in matches)
95549e5…	lmata	250
95549e5…	lmata	251	def test_scan_file_clean_file(self, detector, tmp_path):
95549e5…	lmata	252	clean_file = tmp_path / "utils.py"
95549e5…	lmata	253	clean_file.write_text("def add(a, b):\n return a + b\n", encoding="utf-8")
95549e5…	lmata	254	matches = detector.scan_file(clean_file)
95549e5…	lmata	255	assert matches == []
95549e5…	lmata	256
95549e5…	lmata	257	def test_scan_file_missing_file_returns_empty(self, detector, tmp_path):
95549e5…	lmata	258	missing = tmp_path / "does_not_exist.py"
95549e5…	lmata	259	matches = detector.scan_file(missing)
95549e5…	lmata	260	assert matches == []
95549e5…	lmata	261
95549e5…	lmata	262
95549e5…	lmata	263	# ---------------------------------------------------------------------------
95549e5…	lmata	264	# No false positives on clean code
95549e5…	lmata	265	# ---------------------------------------------------------------------------
95549e5…	lmata	266
95549e5…	lmata	267
95549e5…	lmata	268	class TestNoFalsePositives:
95549e5…	lmata	269	CLEAN_SNIPPETS = [
95549e5…	lmata	270	# Normal variable names
95549e5…	lmata	271	"password_length = 12\npassword_complexity = True\n",
95549e5…	lmata	272	# Password prompt (no literal value)
95549e5…	lmata	273	"password = input('Enter password: ')\n",
95549e5…	lmata	274	# Short strings (below minimum length threshold)
95549e5…	lmata	275	"secret = 'abc'\n",
95549e5…	lmata	276	# Postgres URL without credentials
95549e5…	lmata	277	"DB_URL = 'postgresql://localhost/mydb'\n",
95549e5…	lmata	278	# A function named after a key concept
95549e5…	lmata	279	"def get_api_key_name():\n return 'key_name'\n",
95549e5…	lmata	280	# Normal assignment that looks vaguely like an env var
95549e5…	lmata	281	"API_BASE_URL = 'https://api.example.com'\n",
95549e5…	lmata	282	# JWT-shaped but too short / clearly not a real token
95549e5…	lmata	283	"token = 'eyJ.x.y'\n",
95549e5…	lmata	284	]
95549e5…	lmata	285
95549e5…	lmata	286	@pytest.mark.parametrize("snippet", CLEAN_SNIPPETS)
95549e5…	lmata	287	def test_no_false_positive(self, detector, snippet):
95549e5…	lmata	288	matches = detector.scan_content(snippet)
95549e5…	lmata	289	assert matches == [], f"Unexpected match in: {snippet!r} → {matches}"
95549e5…	lmata	290
95549e5…	lmata	291
95549e5…	lmata	292	# ---------------------------------------------------------------------------
95549e5…	lmata	293	# SensitiveMatch dataclass
95549e5…	lmata	294	# ---------------------------------------------------------------------------
95549e5…	lmata	295
95549e5…	lmata	296
95549e5…	lmata	297	class TestSensitiveMatch:
95549e5…	lmata	298	def test_fields(self):
95549e5…	lmata	299	m = SensitiveMatch(
95549e5…	lmata	300	pattern_name="aws_access_key",
95549e5…	lmata	301	line_number=3,
95549e5…	lmata	302	match_text=REDACTED,
95549e5…	lmata	303	severity="high",
95549e5…	lmata	304	)
95549e5…	lmata	305	assert m.pattern_name == "aws_access_key"
95549e5…	lmata	306	assert m.line_number == 3
95549e5…	lmata	307	assert m.match_text == REDACTED
95549e5…	lmata	308	assert m.severity == "high"
95549e5…	lmata	309
95549e5…	lmata	310
95549e5…	lmata	311	# ---------------------------------------------------------------------------
95549e5…	lmata	312	# CLI --redact flag
95549e5…	lmata	313	# ---------------------------------------------------------------------------
95549e5…	lmata	314
95549e5…	lmata	315
95549e5…	lmata	316	class TestCLIRedactFlag:
95549e5…	lmata	317	def test_redact_flag_accepted(self):
95549e5…	lmata	318	"""--redact flag should be accepted by the ingest command without error."""
95549e5…	lmata	319	from navegador.cli.commands import main
95549e5…	lmata	320
95549e5…	lmata	321	runner = CliRunner()
95549e5…	lmata	322	with runner.isolated_filesystem():
95549e5…	lmata	323	Path("src").mkdir()
95549e5…	lmata	324	with patch("navegador.cli.commands._get_store", return_value=MagicMock()), \
95549e5…	lmata	325	patch("navegador.ingestion.RepoIngester") as MockRI:
95549e5…	lmata	326	MockRI.return_value.ingest.return_value = {"files": 1, "functions": 2,
95549e5…	lmata	327	"classes": 0, "edges": 3, "skipped": 0}
95549e5…	lmata	328	result = runner.invoke(main, ["ingest", "src", "--redact"])
95549e5…	lmata	329	assert result.exit_code == 0
95549e5…	lmata	330
95549e5…	lmata	331	def test_redact_flag_passes_to_ingester(self):
95549e5…	lmata	332	"""RepoIngester must be constructed with redact=True when --redact is given."""
95549e5…	lmata	333	from navegador.cli.commands import main
95549e5…	lmata	334
95549e5…	lmata	335	runner = CliRunner()
95549e5…	lmata	336	with runner.isolated_filesystem():
95549e5…	lmata	337	Path("src").mkdir()
95549e5…	lmata	338	with patch("navegador.cli.commands._get_store", return_value=MagicMock()), \
95549e5…	lmata	339	patch("navegador.ingestion.RepoIngester") as MockRI:
95549e5…	lmata	340	MockRI.return_value.ingest.return_value = {"files": 0, "functions": 0,
95549e5…	lmata	341	"classes": 0, "edges": 0, "skipped": 0}
95549e5…	lmata	342	runner.invoke(main, ["ingest", "src", "--redact"])
95549e5…	lmata	343	MockRI.assert_called_once()
95549e5…	lmata	344	_, kwargs = MockRI.call_args
95549e5…	lmata	345	assert kwargs.get("redact") is True
95549e5…	lmata	346
95549e5…	lmata	347	def test_no_redact_flag_defaults_false(self):
95549e5…	lmata	348	"""Without --redact, RepoIngester should be constructed with redact=False (default)."""
95549e5…	lmata	349	from navegador.cli.commands import main
95549e5…	lmata	350
95549e5…	lmata	351	runner = CliRunner()
95549e5…	lmata	352	with runner.isolated_filesystem():
95549e5…	lmata	353	Path("src").mkdir()
95549e5…	lmata	354	with patch("navegador.cli.commands._get_store", return_value=MagicMock()), \
95549e5…	lmata	355	patch("navegador.ingestion.RepoIngester") as MockRI:
95549e5…	lmata	356	MockRI.return_value.ingest.return_value = {"files": 0, "functions": 0,
95549e5…	lmata	357	"classes": 0, "edges": 0, "skipped": 0}
95549e5…	lmata	358	runner.invoke(main, ["ingest", "src"])
95549e5…	lmata	359	MockRI.assert_called_once()
95549e5…	lmata	360	_, kwargs = MockRI.call_args
95549e5…	lmata	361	assert kwargs.get("redact", False) is False

Navegador

Keyboard Shortcuts