Navegador

navegador / tests / test_security.py
Source Blame History 361 lines
95549e5… lmata 1 """Tests for navegador.security — sensitive content detection and redaction."""
95549e5… lmata 2
95549e5… lmata 3 import json
95549e5… lmata 4 from pathlib import Path
95549e5… lmata 5 from unittest.mock import MagicMock, patch
95549e5… lmata 6
95549e5… lmata 7 import pytest
95549e5… lmata 8 from click.testing import CliRunner
95549e5… lmata 9
95549e5… lmata 10 from navegador.security import REDACTED, SensitiveContentDetector, SensitiveMatch
95549e5… lmata 11
95549e5… lmata 12
95549e5… lmata 13 # ---------------------------------------------------------------------------
95549e5… lmata 14 # Fixtures
95549e5… lmata 15 # ---------------------------------------------------------------------------
95549e5… lmata 16
95549e5… lmata 17
95549e5… lmata 18 @pytest.fixture()
95549e5… lmata 19 def detector():
95549e5… lmata 20 return SensitiveContentDetector()
95549e5… lmata 21
95549e5… lmata 22
95549e5… lmata 23 # ---------------------------------------------------------------------------
95549e5… lmata 24 # Pattern detection tests
95549e5… lmata 25 # ---------------------------------------------------------------------------
95549e5… lmata 26
95549e5… lmata 27
95549e5… lmata 28 class TestAPIKeyDetection:
95549e5… lmata 29 def test_aws_akia_key(self, detector):
95549e5… lmata 30 text = "key = AKIAIOSFODNN7EXAMPLE"
95549e5… lmata 31 matches = detector.scan_content(text)
95549e5… lmata 32 names = [m.pattern_name for m in matches]
95549e5… lmata 33 assert "aws_access_key" in names
95549e5… lmata 34
95549e5… lmata 35 def test_aws_asia_key(self, detector):
95549e5… lmata 36 # ASIA prefix + exactly 16 uppercase alphanumeric chars = 20-char key
95549e5… lmata 37 text = "assume_role_key=ASIAIOSFODNN7EXAMPLE"
95549e5… lmata 38 matches = detector.scan_content(text)
95549e5… lmata 39 names = [m.pattern_name for m in matches]
95549e5… lmata 40 assert "aws_access_key" in names
95549e5… lmata 41
95549e5… lmata 42 def test_github_token_ghp(self, detector):
95549e5… lmata 43 text = "GITHUB_TOKEN=ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ123456789012"
95549e5… lmata 44 matches = detector.scan_content(text)
95549e5… lmata 45 names = [m.pattern_name for m in matches]
95549e5… lmata 46 assert "github_token" in names
95549e5… lmata 47
95549e5… lmata 48 def test_openai_sk_key(self, detector):
95549e5… lmata 49 text = 'api_key = "sk-abcdefghijklmnopqrstuvwxyz12345678901234567890"'
95549e5… lmata 50 matches = detector.scan_content(text)
95549e5… lmata 51 names = [m.pattern_name for m in matches]
95549e5… lmata 52 assert "api_key_sk" in names
95549e5… lmata 53
95549e5… lmata 54 def test_generic_api_key_assignment(self, detector):
95549e5… lmata 55 text = 'API_KEY = "AbCdEfGhIjKlMnOpQrStUvWxYz123456"'
95549e5… lmata 56 matches = detector.scan_content(text)
95549e5… lmata 57 names = [m.pattern_name for m in matches]
95549e5… lmata 58 assert "api_key_assignment" in names
95549e5… lmata 59
95549e5… lmata 60 def test_severity_is_high_for_aws_key(self, detector):
95549e5… lmata 61 text = "AKIAIOSFODNN7EXAMPLE"
95549e5… lmata 62 matches = detector.scan_content(text)
95549e5… lmata 63 assert any(m.severity == "high" for m in matches)
95549e5… lmata 64
95549e5… lmata 65 def test_match_text_is_redacted(self, detector):
95549e5… lmata 66 text = "AKIAIOSFODNN7EXAMPLE"
95549e5… lmata 67 matches = detector.scan_content(text)
95549e5… lmata 68 assert all(m.match_text == REDACTED for m in matches)
95549e5… lmata 69
95549e5… lmata 70 def test_line_number_is_correct(self, detector):
95549e5… lmata 71 text = "# header\nAKIAIOSFODNN7EXAMPLE\n# footer"
95549e5… lmata 72 matches = detector.scan_content(text)
95549e5… lmata 73 aws_matches = [m for m in matches if m.pattern_name == "aws_access_key"]
95549e5… lmata 74 assert len(aws_matches) >= 1
95549e5… lmata 75 assert aws_matches[0].line_number == 2
95549e5… lmata 76
95549e5… lmata 77
95549e5… lmata 78 class TestPasswordDetection:
95549e5… lmata 79 def test_password_equals_string(self, detector):
95549e5… lmata 80 text = 'password = "super_s3cr3t_pass"'
95549e5… lmata 81 matches = detector.scan_content(text)
95549e5… lmata 82 names = [m.pattern_name for m in matches]
95549e5… lmata 83 assert "password_assignment" in names
95549e5… lmata 84
95549e5… lmata 85 def test_passwd_variant(self, detector):
95549e5… lmata 86 text = "passwd = 'hunter2hunter2'"
95549e5… lmata 87 matches = detector.scan_content(text)
95549e5… lmata 88 names = [m.pattern_name for m in matches]
95549e5… lmata 89 assert "password_assignment" in names
95549e5… lmata 90
95549e5… lmata 91 def test_secret_key_variant(self, detector):
95549e5… lmata 92 text = 'secret = "mysecretvalue123"'
95549e5… lmata 93 matches = detector.scan_content(text)
95549e5… lmata 94 names = [m.pattern_name for m in matches]
95549e5… lmata 95 assert "password_assignment" in names
95549e5… lmata 96
95549e5… lmata 97 def test_severity_high(self, detector):
95549e5… lmata 98 text = 'password = "hunter2hunter2"'
95549e5… lmata 99 matches = detector.scan_content(text)
95549e5… lmata 100 pw = [m for m in matches if m.pattern_name == "password_assignment"]
95549e5… lmata 101 assert all(m.severity == "high" for m in pw)
95549e5… lmata 102
95549e5… lmata 103
95549e5… lmata 104 class TestPrivateKeyDetection:
95549e5… lmata 105 def test_rsa_private_key_header(self, detector):
95549e5… lmata 106 text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----"
95549e5… lmata 107 matches = detector.scan_content(text)
95549e5… lmata 108 names = [m.pattern_name for m in matches]
95549e5… lmata 109 assert "private_key_pem" in names
95549e5… lmata 110
95549e5… lmata 111 def test_generic_private_key_header(self, detector):
95549e5… lmata 112 text = "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w...\n-----END PRIVATE KEY-----"
95549e5… lmata 113 matches = detector.scan_content(text)
95549e5… lmata 114 names = [m.pattern_name for m in matches]
95549e5… lmata 115 assert "private_key_pem" in names
95549e5… lmata 116
95549e5… lmata 117 def test_openssh_private_key_header(self, detector):
95549e5… lmata 118 text = "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1...\n-----END OPENSSH PRIVATE KEY-----"
95549e5… lmata 119 matches = detector.scan_content(text)
95549e5… lmata 120 names = [m.pattern_name for m in matches]
95549e5… lmata 121 assert "private_key_pem" in names
95549e5… lmata 122
95549e5… lmata 123 def test_severity_high(self, detector):
95549e5… lmata 124 text = "-----BEGIN RSA PRIVATE KEY-----"
95549e5… lmata 125 matches = detector.scan_content(text)
95549e5… lmata 126 pk = [m for m in matches if m.pattern_name == "private_key_pem"]
95549e5… lmata 127 assert all(m.severity == "high" for m in pk)
95549e5… lmata 128
95549e5… lmata 129
95549e5… lmata 130 class TestConnectionStringDetection:
95549e5… lmata 131 def test_postgres_with_credentials(self, detector):
95549e5… lmata 132 text = 'DATABASE_URL = "postgresql://admin:[email protected]:5432/mydb"'
95549e5… lmata 133 matches = detector.scan_content(text)
95549e5… lmata 134 names = [m.pattern_name for m in matches]
95549e5… lmata 135 assert "connection_string" in names
95549e5… lmata 136
95549e5… lmata 137 def test_mysql_with_credentials(self, detector):
95549e5… lmata 138 text = "conn = mysql://user:passw0rd@localhost/schema"
95549e5… lmata 139 matches = detector.scan_content(text)
95549e5… lmata 140 names = [m.pattern_name for m in matches]
95549e5… lmata 141 assert "connection_string" in names
95549e5… lmata 142
95549e5… lmata 143 def test_mongodb_with_credentials(self, detector):
95549e5… lmata 144 text = 'uri = "mongodb://root:[email protected]:27017/db"'
95549e5… lmata 145 matches = detector.scan_content(text)
95549e5… lmata 146 names = [m.pattern_name for m in matches]
95549e5… lmata 147 assert "connection_string" in names
95549e5… lmata 148
95549e5… lmata 149 def test_mongodb_srv_with_credentials(self, detector):
95549e5… lmata 150 text = 'uri = "mongodb+srv://admin:[email protected]/mydb"'
95549e5… lmata 151 matches = detector.scan_content(text)
95549e5… lmata 152 names = [m.pattern_name for m in matches]
95549e5… lmata 153 assert "connection_string" in names
95549e5… lmata 154
95549e5… lmata 155 def test_severity_high(self, detector):
95549e5… lmata 156 text = "postgresql://admin:[email protected]/mydb"
95549e5… lmata 157 matches = detector.scan_content(text)
95549e5… lmata 158 cs = [m for m in matches if m.pattern_name == "connection_string"]
95549e5… lmata 159 assert all(m.severity == "high" for m in cs)
95549e5… lmata 160
95549e5… lmata 161
95549e5… lmata 162 class TestJWTDetection:
95549e5… lmata 163 def test_valid_jwt(self, detector):
95549e5… lmata 164 # A real-looking but fake JWT
95549e5… lmata 165 header = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
95549e5… lmata 166 payload = "eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ"
95549e5… lmata 167 signature = "SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
95549e5… lmata 168 jwt = f"{header}.{payload}.{signature}"
95549e5… lmata 169 text = f'Authorization: Bearer {jwt}'
95549e5… lmata 170 matches = detector.scan_content(text)
95549e5… lmata 171 names = [m.pattern_name for m in matches]
95549e5… lmata 172 assert "jwt_token" in names
95549e5… lmata 173
95549e5… lmata 174 def test_severity_medium(self, detector):
95549e5… lmata 175 header = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
95549e5… lmata 176 payload = "eyJzdWIiOiIxMjM0NTY3ODkwIn0"
95549e5… lmata 177 sig = "SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
95549e5… lmata 178 text = f"{header}.{payload}.{sig}"
95549e5… lmata 179 matches = detector.scan_content(text)
95549e5… lmata 180 jwt = [m for m in matches if m.pattern_name == "jwt_token"]
95549e5… lmata 181 assert all(m.severity == "medium" for m in jwt)
95549e5… lmata 182
95549e5… lmata 183
95549e5… lmata 184 # ---------------------------------------------------------------------------
95549e5… lmata 185 # Redaction tests
95549e5… lmata 186 # ---------------------------------------------------------------------------
95549e5… lmata 187
95549e5… lmata 188
95549e5… lmata 189 class TestRedaction:
95549e5… lmata 190 def test_redact_aws_key(self, detector):
95549e5… lmata 191 text = "key = AKIAIOSFODNN7EXAMPLE"
95549e5… lmata 192 result = detector.redact(text)
95549e5… lmata 193 assert "AKIAIOSFODNN7EXAMPLE" not in result
95549e5… lmata 194 assert REDACTED in result
95549e5… lmata 195
95549e5… lmata 196 def test_redact_password(self, detector):
95549e5… lmata 197 text = 'password = "hunter2hunter2"'
95549e5… lmata 198 result = detector.redact(text)
95549e5… lmata 199 assert "hunter2hunter2" not in result
95549e5… lmata 200 assert REDACTED in result
95549e5… lmata 201
95549e5… lmata 202 def test_redact_pem_header(self, detector):
95549e5… lmata 203 text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA\n-----END RSA PRIVATE KEY-----"
95549e5… lmata 204 result = detector.redact(text)
95549e5… lmata 205 assert "-----BEGIN RSA PRIVATE KEY-----" not in result
95549e5… lmata 206 assert REDACTED in result
95549e5… lmata 207
95549e5… lmata 208 def test_redact_connection_string(self, detector):
95549e5… lmata 209 text = "postgresql://admin:[email protected]/mydb"
95549e5… lmata 210 result = detector.redact(text)
95549e5… lmata 211 assert "s3cret" not in result
95549e5… lmata 212 assert REDACTED in result
95549e5… lmata 213
95549e5… lmata 214 def test_redact_jwt(self, detector):
95549e5… lmata 215 header = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
95549e5… lmata 216 payload = "eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ"
95549e5… lmata 217 sig = "SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
95549e5… lmata 218 jwt = f"{header}.{payload}.{sig}"
95549e5… lmata 219 result = detector.redact(jwt)
95549e5… lmata 220 assert jwt not in result
95549e5… lmata 221 assert REDACTED in result
95549e5… lmata 222
95549e5… lmata 223 def test_redact_returns_unchanged_clean_text(self, detector):
95549e5… lmata 224 text = "def hello():\n return 'world'\n"
95549e5… lmata 225 result = detector.redact(text)
95549e5… lmata 226 assert result == text
95549e5… lmata 227
95549e5… lmata 228 def test_redact_multiple_secrets_in_one_string(self, detector):
95549e5… lmata 229 text = (
95549e5… lmata 230 "AKIAIOSFODNN7EXAMPLE\n"
95549e5… lmata 231 'password = "mysecretvalue"\n'
95549e5… lmata 232 )
95549e5… lmata 233 result = detector.redact(text)
95549e5… lmata 234 assert "AKIAIOSFODNN7EXAMPLE" not in result
95549e5… lmata 235 assert "mysecretvalue" not in result
95549e5… lmata 236
95549e5… lmata 237
95549e5… lmata 238 # ---------------------------------------------------------------------------
95549e5… lmata 239 # scan_file tests
95549e5… lmata 240 # ---------------------------------------------------------------------------
95549e5… lmata 241
95549e5… lmata 242
95549e5… lmata 243 class TestScanFile:
95549e5… lmata 244 def test_scan_file_detects_secrets(self, detector, tmp_path):
95549e5… lmata 245 secret_file = tmp_path / "config.py"
95549e5… lmata 246 secret_file.write_text('AWS_KEY = "AKIAIOSFODNN7EXAMPLE"\n', encoding="utf-8")
95549e5… lmata 247 matches = detector.scan_file(secret_file)
95549e5… lmata 248 assert len(matches) >= 1
95549e5… lmata 249 assert any(m.pattern_name == "aws_access_key" for m in matches)
95549e5… lmata 250
95549e5… lmata 251 def test_scan_file_clean_file(self, detector, tmp_path):
95549e5… lmata 252 clean_file = tmp_path / "utils.py"
95549e5… lmata 253 clean_file.write_text("def add(a, b):\n return a + b\n", encoding="utf-8")
95549e5… lmata 254 matches = detector.scan_file(clean_file)
95549e5… lmata 255 assert matches == []
95549e5… lmata 256
95549e5… lmata 257 def test_scan_file_missing_file_returns_empty(self, detector, tmp_path):
95549e5… lmata 258 missing = tmp_path / "does_not_exist.py"
95549e5… lmata 259 matches = detector.scan_file(missing)
95549e5… lmata 260 assert matches == []
95549e5… lmata 261
95549e5… lmata 262
95549e5… lmata 263 # ---------------------------------------------------------------------------
95549e5… lmata 264 # No false positives on clean code
95549e5… lmata 265 # ---------------------------------------------------------------------------
95549e5… lmata 266
95549e5… lmata 267
95549e5… lmata 268 class TestNoFalsePositives:
95549e5… lmata 269 CLEAN_SNIPPETS = [
95549e5… lmata 270 # Normal variable names
95549e5… lmata 271 "password_length = 12\npassword_complexity = True\n",
95549e5… lmata 272 # Password prompt (no literal value)
95549e5… lmata 273 "password = input('Enter password: ')\n",
95549e5… lmata 274 # Short strings (below minimum length threshold)
95549e5… lmata 275 "secret = 'abc'\n",
95549e5… lmata 276 # Postgres URL without credentials
95549e5… lmata 277 "DB_URL = 'postgresql://localhost/mydb'\n",
95549e5… lmata 278 # A function named after a key concept
95549e5… lmata 279 "def get_api_key_name():\n return 'key_name'\n",
95549e5… lmata 280 # Normal assignment that looks vaguely like an env var
95549e5… lmata 281 "API_BASE_URL = 'https://api.example.com'\n",
95549e5… lmata 282 # JWT-shaped but too short / clearly not a real token
95549e5… lmata 283 "token = 'eyJ.x.y'\n",
95549e5… lmata 284 ]
95549e5… lmata 285
95549e5… lmata 286 @pytest.mark.parametrize("snippet", CLEAN_SNIPPETS)
95549e5… lmata 287 def test_no_false_positive(self, detector, snippet):
95549e5… lmata 288 matches = detector.scan_content(snippet)
95549e5… lmata 289 assert matches == [], f"Unexpected match in: {snippet!r} → {matches}"
95549e5… lmata 290
95549e5… lmata 291
95549e5… lmata 292 # ---------------------------------------------------------------------------
95549e5… lmata 293 # SensitiveMatch dataclass
95549e5… lmata 294 # ---------------------------------------------------------------------------
95549e5… lmata 295
95549e5… lmata 296
95549e5… lmata 297 class TestSensitiveMatch:
95549e5… lmata 298 def test_fields(self):
95549e5… lmata 299 m = SensitiveMatch(
95549e5… lmata 300 pattern_name="aws_access_key",
95549e5… lmata 301 line_number=3,
95549e5… lmata 302 match_text=REDACTED,
95549e5… lmata 303 severity="high",
95549e5… lmata 304 )
95549e5… lmata 305 assert m.pattern_name == "aws_access_key"
95549e5… lmata 306 assert m.line_number == 3
95549e5… lmata 307 assert m.match_text == REDACTED
95549e5… lmata 308 assert m.severity == "high"
95549e5… lmata 309
95549e5… lmata 310
95549e5… lmata 311 # ---------------------------------------------------------------------------
95549e5… lmata 312 # CLI --redact flag
95549e5… lmata 313 # ---------------------------------------------------------------------------
95549e5… lmata 314
95549e5… lmata 315
95549e5… lmata 316 class TestCLIRedactFlag:
95549e5… lmata 317 def test_redact_flag_accepted(self):
95549e5… lmata 318 """--redact flag should be accepted by the ingest command without error."""
95549e5… lmata 319 from navegador.cli.commands import main
95549e5… lmata 320
95549e5… lmata 321 runner = CliRunner()
95549e5… lmata 322 with runner.isolated_filesystem():
95549e5… lmata 323 Path("src").mkdir()
95549e5… lmata 324 with patch("navegador.cli.commands._get_store", return_value=MagicMock()), \
95549e5… lmata 325 patch("navegador.ingestion.RepoIngester") as MockRI:
95549e5… lmata 326 MockRI.return_value.ingest.return_value = {"files": 1, "functions": 2,
95549e5… lmata 327 "classes": 0, "edges": 3, "skipped": 0}
95549e5… lmata 328 result = runner.invoke(main, ["ingest", "src", "--redact"])
95549e5… lmata 329 assert result.exit_code == 0
95549e5… lmata 330
95549e5… lmata 331 def test_redact_flag_passes_to_ingester(self):
95549e5… lmata 332 """RepoIngester must be constructed with redact=True when --redact is given."""
95549e5… lmata 333 from navegador.cli.commands import main
95549e5… lmata 334
95549e5… lmata 335 runner = CliRunner()
95549e5… lmata 336 with runner.isolated_filesystem():
95549e5… lmata 337 Path("src").mkdir()
95549e5… lmata 338 with patch("navegador.cli.commands._get_store", return_value=MagicMock()), \
95549e5… lmata 339 patch("navegador.ingestion.RepoIngester") as MockRI:
95549e5… lmata 340 MockRI.return_value.ingest.return_value = {"files": 0, "functions": 0,
95549e5… lmata 341 "classes": 0, "edges": 0, "skipped": 0}
95549e5… lmata 342 runner.invoke(main, ["ingest", "src", "--redact"])
95549e5… lmata 343 MockRI.assert_called_once()
95549e5… lmata 344 _, kwargs = MockRI.call_args
95549e5… lmata 345 assert kwargs.get("redact") is True
95549e5… lmata 346
95549e5… lmata 347 def test_no_redact_flag_defaults_false(self):
95549e5… lmata 348 """Without --redact, RepoIngester should be constructed with redact=False (default)."""
95549e5… lmata 349 from navegador.cli.commands import main
95549e5… lmata 350
95549e5… lmata 351 runner = CliRunner()
95549e5… lmata 352 with runner.isolated_filesystem():
95549e5… lmata 353 Path("src").mkdir()
95549e5… lmata 354 with patch("navegador.cli.commands._get_store", return_value=MagicMock()), \
95549e5… lmata 355 patch("navegador.ingestion.RepoIngester") as MockRI:
95549e5… lmata 356 MockRI.return_value.ingest.return_value = {"files": 0, "functions": 0,
95549e5… lmata 357 "classes": 0, "edges": 0, "skipped": 0}
95549e5… lmata 358 runner.invoke(main, ["ingest", "src"])
95549e5… lmata 359 MockRI.assert_called_once()
95549e5… lmata 360 _, kwargs = MockRI.call_args
95549e5… lmata 361 assert kwargs.get("redact", False) is False

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button