Navegador
feat: add IaC language support — HCL/Terraform, Puppet, Ansible, Bash, Chef New parsers: - HCL/Terraform (.tf, .hcl) with resource/variable/module/data/output/provider/locals extraction and cross-reference detection (var.x, module.x, resource.name) - Puppet (.pp) with class/define/node/resource/include/parameter extraction - Bash/Shell (.sh, .bash, .zsh) with function/variable/source extraction and call graph - Ansible (YAML, heuristic detection) with playbook/play/task/handler/role/variable extraction New enrichers: - Terraform enricher for cross-file module resolution and provider grouping - Chef enricher promoting Ruby parser output with Chef-specific semantic types Also: - Go module support (go.mod) in DependencyIngester - New [iac] optional dependency group in pyproject.toml - 88 new tests across 5 test files
b45288f8d236b0bcab6c1813039800e2432052c9cb96bec996a989f33d45acf4
| --- navegador/dependencies.py | ||
| +++ navegador/dependencies.py | ||
| @@ -154,10 +154,77 @@ | ||
| 154 | 154 | self._upsert_dep("cargo", pkg_name, version, str(p)) |
| 155 | 155 | count += 1 |
| 156 | 156 | |
| 157 | 157 | logger.info("DependencyIngester.ingest_cargo(%s): %d packages", p, count) |
| 158 | 158 | return {"packages": count} |
| 159 | + | |
| 160 | + # ── go / go.mod ─────────────────────────────────────────────────────────── | |
| 161 | + | |
| 162 | + def ingest_gomod(self, gomod_path: str | Path) -> dict[str, Any]: | |
| 163 | + """ | |
| 164 | + Parse a ``go.mod`` and ingest the module declaration and all | |
| 165 | + ``require`` entries as external dependencies. | |
| 166 | + | |
| 167 | + Parameters | |
| 168 | + ---------- | |
| 169 | + gomod_path: | |
| 170 | + Absolute or relative path to ``go.mod``. | |
| 171 | + | |
| 172 | + Returns | |
| 173 | + ------- | |
| 174 | + dict with key ``packages`` (int count ingested) | |
| 175 | + """ | |
| 176 | + p = Path(gomod_path).resolve() | |
| 177 | + text = p.read_text(encoding="utf-8") | |
| 178 | + | |
| 179 | + count = 0 | |
| 180 | + in_require = False | |
| 181 | + | |
| 182 | + for raw_line in text.splitlines(): | |
| 183 | + line = raw_line.strip() | |
| 184 | + | |
| 185 | + # Module declaration | |
| 186 | + if line.startswith("module "): | |
| 187 | + mod_name = line.removeprefix("module").strip() | |
| 188 | + self.store.create_node( | |
| 189 | + NodeLabel.Concept, | |
| 190 | + { | |
| 191 | + "name": mod_name, | |
| 192 | + "description": f"go:{mod_name}", | |
| 193 | + "domain": _DOMAIN, | |
| 194 | + "status": "module", | |
| 195 | + }, | |
| 196 | + ) | |
| 197 | + continue | |
| 198 | + | |
| 199 | + # Require block boundaries | |
| 200 | + if line == "require (": | |
| 201 | + in_require = True | |
| 202 | + continue | |
| 203 | + if line == ")" and in_require: | |
| 204 | + in_require = False | |
| 205 | + continue | |
| 206 | + | |
| 207 | + # Single-line require | |
| 208 | + if line.startswith("require ") and "(" not in line: | |
| 209 | + parts = line.removeprefix("require").strip().split() | |
| 210 | + if len(parts) >= 2: | |
| 211 | + pkg_name, version = parts[0], parts[1] | |
| 212 | + self._upsert_dep("go", pkg_name, version, str(p)) | |
| 213 | + count += 1 | |
| 214 | + continue | |
| 215 | + | |
| 216 | + # Inside require block | |
| 217 | + if in_require and line and not line.startswith("//"): | |
| 218 | + parts = line.split() | |
| 219 | + if len(parts) >= 2: | |
| 220 | + pkg_name, version = parts[0], parts[1] | |
| 221 | + self._upsert_dep("go", pkg_name, version, str(p)) | |
| 222 | + count += 1 | |
| 223 | + | |
| 224 | + logger.info("DependencyIngester.ingest_gomod(%s): %d packages", p, count) | |
| 225 | + return {"packages": count} | |
| 159 | 226 | |
| 160 | 227 | # ── Core helpers ────────────────────────────────────────────────────────── |
| 161 | 228 | |
| 162 | 229 | def _upsert_dep( |
| 163 | 230 | self, |
| 164 | 231 |
| --- navegador/dependencies.py | |
| +++ navegador/dependencies.py | |
| @@ -154,10 +154,77 @@ | |
| 154 | self._upsert_dep("cargo", pkg_name, version, str(p)) |
| 155 | count += 1 |
| 156 | |
| 157 | logger.info("DependencyIngester.ingest_cargo(%s): %d packages", p, count) |
| 158 | return {"packages": count} |
| 159 | |
| 160 | # ── Core helpers ────────────────────────────────────────────────────────── |
| 161 | |
| 162 | def _upsert_dep( |
| 163 | self, |
| 164 |
| --- navegador/dependencies.py | |
| +++ navegador/dependencies.py | |
| @@ -154,10 +154,77 @@ | |
| 154 | self._upsert_dep("cargo", pkg_name, version, str(p)) |
| 155 | count += 1 |
| 156 | |
| 157 | logger.info("DependencyIngester.ingest_cargo(%s): %d packages", p, count) |
| 158 | return {"packages": count} |
| 159 | |
| 160 | # ── go / go.mod ─────────────────────────────────────────────────────────── |
| 161 | |
| 162 | def ingest_gomod(self, gomod_path: str | Path) -> dict[str, Any]: |
| 163 | """ |
| 164 | Parse a ``go.mod`` and ingest the module declaration and all |
| 165 | ``require`` entries as external dependencies. |
| 166 | |
| 167 | Parameters |
| 168 | ---------- |
| 169 | gomod_path: |
| 170 | Absolute or relative path to ``go.mod``. |
| 171 | |
| 172 | Returns |
| 173 | ------- |
| 174 | dict with key ``packages`` (int count ingested) |
| 175 | """ |
| 176 | p = Path(gomod_path).resolve() |
| 177 | text = p.read_text(encoding="utf-8") |
| 178 | |
| 179 | count = 0 |
| 180 | in_require = False |
| 181 | |
| 182 | for raw_line in text.splitlines(): |
| 183 | line = raw_line.strip() |
| 184 | |
| 185 | # Module declaration |
| 186 | if line.startswith("module "): |
| 187 | mod_name = line.removeprefix("module").strip() |
| 188 | self.store.create_node( |
| 189 | NodeLabel.Concept, |
| 190 | { |
| 191 | "name": mod_name, |
| 192 | "description": f"go:{mod_name}", |
| 193 | "domain": _DOMAIN, |
| 194 | "status": "module", |
| 195 | }, |
| 196 | ) |
| 197 | continue |
| 198 | |
| 199 | # Require block boundaries |
| 200 | if line == "require (": |
| 201 | in_require = True |
| 202 | continue |
| 203 | if line == ")" and in_require: |
| 204 | in_require = False |
| 205 | continue |
| 206 | |
| 207 | # Single-line require |
| 208 | if line.startswith("require ") and "(" not in line: |
| 209 | parts = line.removeprefix("require").strip().split() |
| 210 | if len(parts) >= 2: |
| 211 | pkg_name, version = parts[0], parts[1] |
| 212 | self._upsert_dep("go", pkg_name, version, str(p)) |
| 213 | count += 1 |
| 214 | continue |
| 215 | |
| 216 | # Inside require block |
| 217 | if in_require and line and not line.startswith("//"): |
| 218 | parts = line.split() |
| 219 | if len(parts) >= 2: |
| 220 | pkg_name, version = parts[0], parts[1] |
| 221 | self._upsert_dep("go", pkg_name, version, str(p)) |
| 222 | count += 1 |
| 223 | |
| 224 | logger.info("DependencyIngester.ingest_gomod(%s): %d packages", p, count) |
| 225 | return {"packages": count} |
| 226 | |
| 227 | # ── Core helpers ────────────────────────────────────────────────────────── |
| 228 | |
| 229 | def _upsert_dep( |
| 230 | self, |
| 231 |
| --- navegador/enrichment/base.py | ||
| +++ navegador/enrichment/base.py | ||
| @@ -57,23 +57,21 @@ | ||
| 57 | 57 | def detect(self) -> bool: |
| 58 | 58 | """Check if the framework is present by looking for real imports and marker files.""" |
| 59 | 59 | # Check Import nodes for actual framework imports |
| 60 | 60 | for pattern in self.detection_patterns: |
| 61 | 61 | result = self.store.query( |
| 62 | - "MATCH (n:Import) WHERE n.name = $name OR n.module = $name " | |
| 63 | - "RETURN count(n) AS c", | |
| 62 | + "MATCH (n:Import) WHERE n.name = $name OR n.module = $name RETURN count(n) AS c", | |
| 64 | 63 | {"name": pattern}, |
| 65 | 64 | ) |
| 66 | 65 | rows = result.result_set or [] |
| 67 | 66 | if rows and rows[0][0] > 0: |
| 68 | 67 | return True |
| 69 | 68 | |
| 70 | 69 | # Check for marker files by exact filename match |
| 71 | 70 | for filename in self.detection_files: |
| 72 | 71 | result = self.store.query( |
| 73 | - "MATCH (f:File) WHERE f.name = $name " | |
| 74 | - "RETURN count(f) AS c", | |
| 72 | + "MATCH (f:File) WHERE f.name = $name RETURN count(f) AS c", | |
| 75 | 73 | {"name": filename}, |
| 76 | 74 | ) |
| 77 | 75 | rows = result.result_set or [] |
| 78 | 76 | if rows and rows[0][0] > 0: |
| 79 | 77 | return True |
| 80 | 78 | |
| 81 | 79 | ADDED navegador/enrichment/chef.py |
| 82 | 80 | ADDED navegador/enrichment/terraform.py |
| 83 | 81 | ADDED navegador/ingestion/ansible.py |
| 84 | 82 | ADDED navegador/ingestion/bash.py |
| 85 | 83 | ADDED navegador/ingestion/hcl.py |
| --- navegador/enrichment/base.py | |
| +++ navegador/enrichment/base.py | |
| @@ -57,23 +57,21 @@ | |
| 57 | def detect(self) -> bool: |
| 58 | """Check if the framework is present by looking for real imports and marker files.""" |
| 59 | # Check Import nodes for actual framework imports |
| 60 | for pattern in self.detection_patterns: |
| 61 | result = self.store.query( |
| 62 | "MATCH (n:Import) WHERE n.name = $name OR n.module = $name " |
| 63 | "RETURN count(n) AS c", |
| 64 | {"name": pattern}, |
| 65 | ) |
| 66 | rows = result.result_set or [] |
| 67 | if rows and rows[0][0] > 0: |
| 68 | return True |
| 69 | |
| 70 | # Check for marker files by exact filename match |
| 71 | for filename in self.detection_files: |
| 72 | result = self.store.query( |
| 73 | "MATCH (f:File) WHERE f.name = $name " |
| 74 | "RETURN count(f) AS c", |
| 75 | {"name": filename}, |
| 76 | ) |
| 77 | rows = result.result_set or [] |
| 78 | if rows and rows[0][0] > 0: |
| 79 | return True |
| 80 | |
| 81 | DDED navegador/enrichment/chef.py |
| 82 | DDED navegador/enrichment/terraform.py |
| 83 | DDED navegador/ingestion/ansible.py |
| 84 | DDED navegador/ingestion/bash.py |
| 85 | DDED navegador/ingestion/hcl.py |
| --- navegador/enrichment/base.py | |
| +++ navegador/enrichment/base.py | |
| @@ -57,23 +57,21 @@ | |
| 57 | def detect(self) -> bool: |
| 58 | """Check if the framework is present by looking for real imports and marker files.""" |
| 59 | # Check Import nodes for actual framework imports |
| 60 | for pattern in self.detection_patterns: |
| 61 | result = self.store.query( |
| 62 | "MATCH (n:Import) WHERE n.name = $name OR n.module = $name RETURN count(n) AS c", |
| 63 | {"name": pattern}, |
| 64 | ) |
| 65 | rows = result.result_set or [] |
| 66 | if rows and rows[0][0] > 0: |
| 67 | return True |
| 68 | |
| 69 | # Check for marker files by exact filename match |
| 70 | for filename in self.detection_files: |
| 71 | result = self.store.query( |
| 72 | "MATCH (f:File) WHERE f.name = $name RETURN count(f) AS c", |
| 73 | {"name": filename}, |
| 74 | ) |
| 75 | rows = result.result_set or [] |
| 76 | if rows and rows[0][0] > 0: |
| 77 | return True |
| 78 | |
| 79 | DDED navegador/enrichment/chef.py |
| 80 | DDED navegador/enrichment/terraform.py |
| 81 | DDED navegador/ingestion/ansible.py |
| 82 | DDED navegador/ingestion/bash.py |
| 83 | DDED navegador/ingestion/hcl.py |
| --- a/navegador/enrichment/chef.py | ||
| +++ b/navegador/enrichment/chef.py | ||
| @@ -0,0 +1,203 @@ | ||
| 1 | +""" | |
| 2 | +Chef framework enricher. | |
| 3 | + | |
| 4 | +Promotes generic graph nodes created by the Ruby parser to Chef-specific | |
| 5 | +semantic types: | |
| 6 | + - chef_recipe — files under recipes/ | |
| 7 | + - chef_cookbook — metadata.rb files under cookbooks/ | |
| 8 | + - chef_resource — functions/methods in recipes/ or libraries/ matching | |
| 9 | + Chef resource names (package, template, service, etc.) | |
| 10 | + - include_recipe — DEPENDS_ON edges for cross-recipe includes | |
| 11 | +""" | |
| 12 | + | |
| 13 | +from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher | |
| 14 | +from navegador.graph.store import GraphStore | |
| 15 | + | |
| 16 | +# Built-in Chef resource types that appear as method calls in recipes | |
| 17 | +_CHEF_RESOURCES = frozenset( | |
| 18 | + { | |
| 19 | + "package", | |
| 20 | + "template", | |
| 21 | + "service", | |
| 22 | + "execute", | |
| 23 | + "file", | |
| 24 | + "directory", | |
| 25 | + "cookbook_file", | |
| 26 | + "remote_file", | |
| 27 | + "cron", | |
| 28 | + "user", | |
| 29 | + "group", | |
| 30 | + "mount", | |
| 31 | + "link", | |
| 32 | + "bash", | |
| 33 | + "ruby_block", | |
| 34 | + "apt_package", | |
| 35 | + "yum_package", | |
| 36 | + "powershell_script", | |
| 37 | + "windows_service", | |
| 38 | + "chef_gem", | |
| 39 | + "log", | |
| 40 | + "http_request", | |
| 41 | + "remote_directory", | |
| 42 | + } | |
| 43 | +) | |
| 44 | + | |
| 45 | + | |
| 46 | +class ChefEnricher(FrameworkEnricher): | |
| 47 | + """Enriches a navegador graph with Chef-specific semantic types.""" | |
| 48 | + | |
| 49 | + def __init__(self, store: GraphStore) -> None: | |
| 50 | + super().__init__(store) | |
| 51 | + | |
| 52 | + # ── Identity ────────────────────────────────────────────────────────────── | |
| 53 | + | |
| 54 | + @property | |
| 55 | + def framework_name(self) -> str: | |
| 56 | + return "chef" | |
| 57 | + | |
| 58 | + @property | |
| 59 | + def detection_patterns(self) -> list[str]: | |
| 60 | + return ["chef"] | |
| 61 | + | |
| 62 | + @property | |
| 63 | + def detection_files(self) -> list[str]: | |
| 64 | + return ["metadata.rb", "Berksfile"] | |
| 65 | + | |
| 66 | + # ── Enrichment ──────────────────────────────────────────────────────────── | |
| 67 | + | |
| 68 | + def enrich(self) -> EnrichmentResult: | |
| 69 | + result = EnrichmentResult() | |
| 70 | + | |
| 71 | + recipes = self._enrich_recipes() | |
| 72 | + result.promoted += recipes | |
| 73 | + result.patterns_found["recipes"] = recipes | |
| 74 | + | |
| 75 | + cookbooks = self._enrich_cookbooks() | |
| 76 | + result.promoted += cookbooks | |
| 77 | + result.patterns_found["cookbooks"] = cookbooks | |
| 78 | + | |
| 79 | + resources = self._enrich_resources() | |
| 80 | + result.promoted += resources | |
| 81 | + result.patterns_found["resources"] = resources | |
| 82 | + | |
| 83 | + includes = self._enrich_include_recipe() | |
| 84 | + result.edges_added += includes | |
| 85 | + result.patterns_found["include_recipe"] = includes | |
| 86 | + | |
| 87 | + return result | |
| 88 | + | |
| 89 | + # ── Pattern helpers ─────────────────────────────────────────────────────── | |
| 90 | + | |
| 91 | + def _enrich_recipes(self) -> int: | |
| 92 | + """Promote File nodes under /recipes/ to chef_recipe.""" | |
| 93 | + promoted = 0 | |
| 94 | + query_result = self.store.query( | |
| 95 | + "MATCH (n:File) WHERE n.file_path CONTAINS $pattern RETURN n.name, n.file_path", | |
| 96 | + {"pattern": "/recipes/"}, | |
| 97 | + ) | |
| 98 | + rows = query_result.result_set or [] | |
| 99 | + for row in rows: | |
| 100 | + name, file_path = row[0], row[1] | |
| 101 | + if name and file_path: | |
| 102 | + self._promote_node(name, file_path, "chef_recipe") | |
| 103 | + promoted += 1 | |
| 104 | + return promoted | |
| 105 | + | |
| 106 | + def _enrich_cookbooks(self) -> int: | |
| 107 | + """Promote metadata.rb File nodes under /cookbooks/ to chef_cookbook.""" | |
| 108 | + promoted = 0 | |
| 109 | + query_result = self.store.query( | |
| 110 | + "MATCH (n:File) WHERE n.file_path CONTAINS $cookbooks " | |
| 111 | + "AND n.name = $name " | |
| 112 | + "RETURN n.name, n.file_path", | |
| 113 | + {"cookbooks": "/cookbooks/", "name": "metadata.rb"}, | |
| 114 | + ) | |
| 115 | + rows = query_result.result_set or [] | |
| 116 | + for row in rows: | |
| 117 | + name, file_path = row[0], row[1] | |
| 118 | + if name and file_path: | |
| 119 | + self._promote_node(name, file_path, "chef_cookbook") | |
| 120 | + promoted += 1 | |
| 121 | + return promoted | |
| 122 | + | |
| 123 | + def _enrich_resources(self) -> int: | |
| 124 | + """Promote Function/Method nodes in recipes/ or libraries/ whose names | |
| 125 | + match Chef built-in resource types.""" | |
| 126 | + promoted = 0 | |
| 127 | + for path_fragment in ("/recipes/", "/libraries/"): | |
| 128 | + query_result = self.store.query( | |
| 129 | + "MATCH (n) WHERE (n:Function OR n:Method) " | |
| 130 | + "AND n.file_path CONTAINS $pattern " | |
| 131 | + "RETURN n.name, n.file_path", | |
| 132 | + {"pattern": path_fragment}, | |
| 133 | + ) | |
| 134 | + rows = query_result.result_set or [] | |
| 135 | + for row in rows: | |
| 136 | + name, file_path = row[0], row[1] | |
| 137 | + if name and file_path and name in _CHEF_RESOURCES: | |
| 138 | + self._promote_node(name, file_path, "chef_resource") | |
| 139 | + promoted += 1 | |
| 140 | + return promoted | |
| 141 | + | |
| 142 | + def _enrich_include_recipe(self) -> int: | |
| 143 | + """Link include_recipe calls to the referenced recipe File nodes. | |
| 144 | + | |
| 145 | + Looks for Function nodes named ``include_recipe`` and follows CALLS | |
| 146 | + edges or checks node properties to find the recipe name argument, | |
| 147 | + then creates a DEPENDS_ON edge to the matching recipe File node. | |
| 148 | + """ | |
| 149 | + edges_added = 0 | |
| 150 | + | |
| 151 | + # Strategy 1: follow CALLS edges from include_recipe nodes | |
| 152 | + query_result = self.store.query( | |
| 153 | + "MATCH (n:Function)-[:CALLS]->(target) " | |
| 154 | + "WHERE n.name = $name " | |
| 155 | + "RETURN n.file_path, target.name", | |
| 156 | + {"name": "include_recipe"}, | |
| 157 | + ) | |
| 158 | + rows = query_result.result_set or [] | |
| 159 | + for row in rows: | |
| 160 | + caller_path, recipe_ref = row[0], row[1] | |
| 161 | + if caller_path and recipe_ref: | |
| 162 | + # recipe_ref may be "cookbook::recipe" — extract recipe name | |
| 163 | + recipe_name = recipe_ref.split("::")[-1] if "::" in recipe_ref else recipe_ref | |
| 164 | + # Find the recipe File node | |
| 165 | + match_result = self.store.query( | |
| 166 | + "MATCH (f:File) WHERE f.file_path CONTAINS $recipes " | |
| 167 | + "AND f.name CONTAINS $recipe " | |
| 168 | + "RETURN f.name", | |
| 169 | + {"recipes": "/recipes/", "recipe": recipe_name}, | |
| 170 | + ) | |
| 171 | + match_rows = match_result.result_set or [] | |
| 172 | + if match_rows and match_rows[0][0]: | |
| 173 | + # Create DEPENDS_ON from the caller's file to the recipe file | |
| 174 | + caller_file_result = self.store.query( | |
| 175 | + "MATCH (f:File) WHERE f.file_path = $path RETURN f.name", | |
| 176 | + {"path": caller_path}, | |
| 177 | + ) | |
| 178 | + caller_rows = caller_file_result.result_set or [] | |
| 179 | + if caller_rows and caller_rows[0][0]: | |
| 180 | + self._add_semantic_edge( | |
| 181 | + caller_rows[0][0], | |
| 182 | + "DEPENDS_ON", | |
| 183 | + match_rows[0][0], | |
| 184 | + ) | |
| 185 | + edges_added += 1 | |
| 186 | + | |
| 187 | + # Strategy 2: check signature/docstring for include_recipe calls | |
| 188 | + for prop in ("signature", "docstring"): | |
| 189 | + query_result = self.store.query( | |
| 190 | + f"MATCH (n) WHERE (n:Function OR n:Method) " | |
| 191 | + f"AND n.{prop} IS NOT NULL " | |
| 192 | + f"AND n.{prop} CONTAINS $pattern " | |
| 193 | + "RETURN n.name, n.file_path", | |
| 194 | + {"pattern": "include_recipe"}, | |
| 195 | + ) | |
| 196 | + rows = query_result.result_set or [] | |
| 197 | + for row in rows: | |
| 198 | + name, file_path = row[0], row[1] | |
| 199 | + if name and file_path and name == "include_recipe": | |
| 200 | + # Already handled in strategy 1 via CALLS edges | |
| 201 | + continue | |
| 202 | + | |
| 203 | + return edges_added |
| --- a/navegador/enrichment/chef.py | |
| +++ b/navegador/enrichment/chef.py | |
| @@ -0,0 +1,203 @@ | |
| --- a/navegador/enrichment/chef.py | |
| +++ b/navegador/enrichment/chef.py | |
| @@ -0,0 +1,203 @@ | |
| 1 | """ |
| 2 | Chef framework enricher. |
| 3 | |
| 4 | Promotes generic graph nodes created by the Ruby parser to Chef-specific |
| 5 | semantic types: |
| 6 | - chef_recipe — files under recipes/ |
| 7 | - chef_cookbook — metadata.rb files under cookbooks/ |
| 8 | - chef_resource — functions/methods in recipes/ or libraries/ matching |
| 9 | Chef resource names (package, template, service, etc.) |
| 10 | - include_recipe — DEPENDS_ON edges for cross-recipe includes |
| 11 | """ |
| 12 | |
| 13 | from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher |
| 14 | from navegador.graph.store import GraphStore |
| 15 | |
| 16 | # Built-in Chef resource types that appear as method calls in recipes |
| 17 | _CHEF_RESOURCES = frozenset( |
| 18 | { |
| 19 | "package", |
| 20 | "template", |
| 21 | "service", |
| 22 | "execute", |
| 23 | "file", |
| 24 | "directory", |
| 25 | "cookbook_file", |
| 26 | "remote_file", |
| 27 | "cron", |
| 28 | "user", |
| 29 | "group", |
| 30 | "mount", |
| 31 | "link", |
| 32 | "bash", |
| 33 | "ruby_block", |
| 34 | "apt_package", |
| 35 | "yum_package", |
| 36 | "powershell_script", |
| 37 | "windows_service", |
| 38 | "chef_gem", |
| 39 | "log", |
| 40 | "http_request", |
| 41 | "remote_directory", |
| 42 | } |
| 43 | ) |
| 44 | |
| 45 | |
| 46 | class ChefEnricher(FrameworkEnricher): |
| 47 | """Enriches a navegador graph with Chef-specific semantic types.""" |
| 48 | |
| 49 | def __init__(self, store: GraphStore) -> None: |
| 50 | super().__init__(store) |
| 51 | |
| 52 | # ── Identity ────────────────────────────────────────────────────────────── |
| 53 | |
| 54 | @property |
| 55 | def framework_name(self) -> str: |
| 56 | return "chef" |
| 57 | |
| 58 | @property |
| 59 | def detection_patterns(self) -> list[str]: |
| 60 | return ["chef"] |
| 61 | |
| 62 | @property |
| 63 | def detection_files(self) -> list[str]: |
| 64 | return ["metadata.rb", "Berksfile"] |
| 65 | |
| 66 | # ── Enrichment ──────────────────────────────────────────────────────────── |
| 67 | |
| 68 | def enrich(self) -> EnrichmentResult: |
| 69 | result = EnrichmentResult() |
| 70 | |
| 71 | recipes = self._enrich_recipes() |
| 72 | result.promoted += recipes |
| 73 | result.patterns_found["recipes"] = recipes |
| 74 | |
| 75 | cookbooks = self._enrich_cookbooks() |
| 76 | result.promoted += cookbooks |
| 77 | result.patterns_found["cookbooks"] = cookbooks |
| 78 | |
| 79 | resources = self._enrich_resources() |
| 80 | result.promoted += resources |
| 81 | result.patterns_found["resources"] = resources |
| 82 | |
| 83 | includes = self._enrich_include_recipe() |
| 84 | result.edges_added += includes |
| 85 | result.patterns_found["include_recipe"] = includes |
| 86 | |
| 87 | return result |
| 88 | |
| 89 | # ── Pattern helpers ─────────────────────────────────────────────────────── |
| 90 | |
| 91 | def _enrich_recipes(self) -> int: |
| 92 | """Promote File nodes under /recipes/ to chef_recipe.""" |
| 93 | promoted = 0 |
| 94 | query_result = self.store.query( |
| 95 | "MATCH (n:File) WHERE n.file_path CONTAINS $pattern RETURN n.name, n.file_path", |
| 96 | {"pattern": "/recipes/"}, |
| 97 | ) |
| 98 | rows = query_result.result_set or [] |
| 99 | for row in rows: |
| 100 | name, file_path = row[0], row[1] |
| 101 | if name and file_path: |
| 102 | self._promote_node(name, file_path, "chef_recipe") |
| 103 | promoted += 1 |
| 104 | return promoted |
| 105 | |
| 106 | def _enrich_cookbooks(self) -> int: |
| 107 | """Promote metadata.rb File nodes under /cookbooks/ to chef_cookbook.""" |
| 108 | promoted = 0 |
| 109 | query_result = self.store.query( |
| 110 | "MATCH (n:File) WHERE n.file_path CONTAINS $cookbooks " |
| 111 | "AND n.name = $name " |
| 112 | "RETURN n.name, n.file_path", |
| 113 | {"cookbooks": "/cookbooks/", "name": "metadata.rb"}, |
| 114 | ) |
| 115 | rows = query_result.result_set or [] |
| 116 | for row in rows: |
| 117 | name, file_path = row[0], row[1] |
| 118 | if name and file_path: |
| 119 | self._promote_node(name, file_path, "chef_cookbook") |
| 120 | promoted += 1 |
| 121 | return promoted |
| 122 | |
| 123 | def _enrich_resources(self) -> int: |
| 124 | """Promote Function/Method nodes in recipes/ or libraries/ whose names |
| 125 | match Chef built-in resource types.""" |
| 126 | promoted = 0 |
| 127 | for path_fragment in ("/recipes/", "/libraries/"): |
| 128 | query_result = self.store.query( |
| 129 | "MATCH (n) WHERE (n:Function OR n:Method) " |
| 130 | "AND n.file_path CONTAINS $pattern " |
| 131 | "RETURN n.name, n.file_path", |
| 132 | {"pattern": path_fragment}, |
| 133 | ) |
| 134 | rows = query_result.result_set or [] |
| 135 | for row in rows: |
| 136 | name, file_path = row[0], row[1] |
| 137 | if name and file_path and name in _CHEF_RESOURCES: |
| 138 | self._promote_node(name, file_path, "chef_resource") |
| 139 | promoted += 1 |
| 140 | return promoted |
| 141 | |
| 142 | def _enrich_include_recipe(self) -> int: |
| 143 | """Link include_recipe calls to the referenced recipe File nodes. |
| 144 | |
| 145 | Looks for Function nodes named ``include_recipe`` and follows CALLS |
| 146 | edges or checks node properties to find the recipe name argument, |
| 147 | then creates a DEPENDS_ON edge to the matching recipe File node. |
| 148 | """ |
| 149 | edges_added = 0 |
| 150 | |
| 151 | # Strategy 1: follow CALLS edges from include_recipe nodes |
| 152 | query_result = self.store.query( |
| 153 | "MATCH (n:Function)-[:CALLS]->(target) " |
| 154 | "WHERE n.name = $name " |
| 155 | "RETURN n.file_path, target.name", |
| 156 | {"name": "include_recipe"}, |
| 157 | ) |
| 158 | rows = query_result.result_set or [] |
| 159 | for row in rows: |
| 160 | caller_path, recipe_ref = row[0], row[1] |
| 161 | if caller_path and recipe_ref: |
| 162 | # recipe_ref may be "cookbook::recipe" — extract recipe name |
| 163 | recipe_name = recipe_ref.split("::")[-1] if "::" in recipe_ref else recipe_ref |
| 164 | # Find the recipe File node |
| 165 | match_result = self.store.query( |
| 166 | "MATCH (f:File) WHERE f.file_path CONTAINS $recipes " |
| 167 | "AND f.name CONTAINS $recipe " |
| 168 | "RETURN f.name", |
| 169 | {"recipes": "/recipes/", "recipe": recipe_name}, |
| 170 | ) |
| 171 | match_rows = match_result.result_set or [] |
| 172 | if match_rows and match_rows[0][0]: |
| 173 | # Create DEPENDS_ON from the caller's file to the recipe file |
| 174 | caller_file_result = self.store.query( |
| 175 | "MATCH (f:File) WHERE f.file_path = $path RETURN f.name", |
| 176 | {"path": caller_path}, |
| 177 | ) |
| 178 | caller_rows = caller_file_result.result_set or [] |
| 179 | if caller_rows and caller_rows[0][0]: |
| 180 | self._add_semantic_edge( |
| 181 | caller_rows[0][0], |
| 182 | "DEPENDS_ON", |
| 183 | match_rows[0][0], |
| 184 | ) |
| 185 | edges_added += 1 |
| 186 | |
| 187 | # Strategy 2: check signature/docstring for include_recipe calls |
| 188 | for prop in ("signature", "docstring"): |
| 189 | query_result = self.store.query( |
| 190 | f"MATCH (n) WHERE (n:Function OR n:Method) " |
| 191 | f"AND n.{prop} IS NOT NULL " |
| 192 | f"AND n.{prop} CONTAINS $pattern " |
| 193 | "RETURN n.name, n.file_path", |
| 194 | {"pattern": "include_recipe"}, |
| 195 | ) |
| 196 | rows = query_result.result_set or [] |
| 197 | for row in rows: |
| 198 | name, file_path = row[0], row[1] |
| 199 | if name and file_path and name == "include_recipe": |
| 200 | # Already handled in strategy 1 via CALLS edges |
| 201 | continue |
| 202 | |
| 203 | return edges_added |
| --- a/navegador/enrichment/terraform.py | ||
| +++ b/navegador/enrichment/terraform.py | ||
| @@ -0,0 +1,230 @@ | ||
| 1 | +""" | |
| 2 | +Terraform enricher for cross-file module resolution and resource linking. | |
| 3 | + | |
| 4 | +Promotes and links Terraform graph nodes: | |
| 5 | + - Cross-file variable references (REFERENCES edges) | |
| 6 | + - Module source resolution (DEPENDS_ON edges to local source dirs) | |
| 7 | + - Provider grouping (BELONGS_TO edges to provider nodes) | |
| 8 | +""" | |
| 9 | + | |
| 10 | +from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher | |
| 11 | +from navegador.graph.store import GraphStore | |
| 12 | + | |
| 13 | +# Common Terraform provider prefixes and their canonical provider names | |
| 14 | +_PROVIDER_PREFIXES = { | |
| 15 | + "aws_": "aws", | |
| 16 | + "google_": "google", | |
| 17 | + "azurerm_": "azurerm", | |
| 18 | + "azuread_": "azuread", | |
| 19 | + "kubernetes_": "kubernetes", | |
| 20 | + "helm_": "helm", | |
| 21 | + "vault_": "vault", | |
| 22 | + "datadog_": "datadog", | |
| 23 | + "cloudflare_": "cloudflare", | |
| 24 | + "digitalocean_": "digitalocean", | |
| 25 | + "github_": "github", | |
| 26 | + "null_": "null", | |
| 27 | + "random_": "random", | |
| 28 | + "local_": "local", | |
| 29 | + "tls_": "tls", | |
| 30 | + "archive_": "archive", | |
| 31 | + "external_": "external", | |
| 32 | + "template_": "template", | |
| 33 | + "time_": "time", | |
| 34 | +} | |
| 35 | + | |
| 36 | + | |
| 37 | +class TerraformEnricher(FrameworkEnricher): | |
| 38 | + """Enriches a navegador graph with Terraform-specific semantics.""" | |
| 39 | + | |
| 40 | + def __init__(self, store: GraphStore) -> None: | |
| 41 | + super().__init__(store) | |
| 42 | + | |
| 43 | + # ── Identity ────────────────────────────────────────────────────────────── | |
| 44 | + | |
| 45 | + @property | |
| 46 | + def framework_name(self) -> str: | |
| 47 | + return "terraform" | |
| 48 | + | |
| 49 | + @property | |
| 50 | + def detection_patterns(self) -> list[str]: | |
| 51 | + return [] # No import nodes for Terraform | |
| 52 | + | |
| 53 | + @property | |
| 54 | + def detection_files(self) -> list[str]: | |
| 55 | + return ["main.tf", "variables.tf", "outputs.tf", "providers.tf"] | |
| 56 | + | |
| 57 | + # ── Enrichment ──────────────────────────────────────────────────────────── | |
| 58 | + | |
| 59 | + def enrich(self) -> EnrichmentResult: | |
| 60 | + result = EnrichmentResult() | |
| 61 | + | |
| 62 | + var_refs = self._enrich_variable_references() | |
| 63 | + result.edges_added += var_refs | |
| 64 | + result.patterns_found["variable_references"] = var_refs | |
| 65 | + | |
| 66 | + module_deps = self._enrich_module_sources() | |
| 67 | + result.edges_added += module_deps | |
| 68 | + result.patterns_found["module_sources"] = module_deps | |
| 69 | + | |
| 70 | + provider_links = self._enrich_provider_grouping() | |
| 71 | + result.edges_added += provider_links | |
| 72 | + result.patterns_found["provider_grouping"] = provider_links | |
| 73 | + | |
| 74 | + return result | |
| 75 | + | |
| 76 | + # ── Pattern helpers ─────────────────────────────────────────────────────── | |
| 77 | + | |
| 78 | + def _enrich_variable_references(self) -> int: | |
| 79 | + """Find terraform_variable and terraform_output nodes that reference | |
| 80 | + variables defined in other files, and create REFERENCES edges.""" | |
| 81 | + edges_added = 0 | |
| 82 | + | |
| 83 | + # Find all terraform_variable nodes | |
| 84 | + var_result = self.store.query( | |
| 85 | + "MATCH (v) WHERE v.semantic_type = $var_type RETURN v.name, v.file_path", | |
| 86 | + {"var_type": "terraform_variable"}, | |
| 87 | + ) | |
| 88 | + var_rows = var_result.result_set or [] | |
| 89 | + var_by_name: dict[str, list[str]] = {} | |
| 90 | + for row in var_rows: | |
| 91 | + name, file_path = row[0], row[1] | |
| 92 | + if name and file_path: | |
| 93 | + var_by_name.setdefault(name, []).append(file_path) | |
| 94 | + | |
| 95 | + # Find terraform_output nodes and check if they reference variables | |
| 96 | + # from other files (outputs often reference var.xxx) | |
| 97 | + output_result = self.store.query( | |
| 98 | + "MATCH (o) WHERE o.semantic_type = $out_type RETURN o.name, o.file_path", | |
| 99 | + {"out_type": "terraform_output"}, | |
| 100 | + ) | |
| 101 | + output_rows = output_result.result_set or [] | |
| 102 | + for row in output_rows: | |
| 103 | + out_name, out_file = row[0], row[1] | |
| 104 | + if not (out_name and out_file): | |
| 105 | + continue | |
| 106 | + | |
| 107 | + # Check CALLS or REFERENCES edges from this output to variables | |
| 108 | + ref_result = self.store.query( | |
| 109 | + "MATCH (o)-[:CALLS]->(target) " | |
| 110 | + "WHERE o.name = $name AND o.file_path = $path " | |
| 111 | + "RETURN target.name, target.file_path", | |
| 112 | + {"name": out_name, "path": out_file}, | |
| 113 | + ) | |
| 114 | + ref_rows = ref_result.result_set or [] | |
| 115 | + for ref_row in ref_rows: | |
| 116 | + target_name, target_path = ref_row[0], ref_row[1] | |
| 117 | + if target_name and target_path and target_path != out_file: | |
| 118 | + self._add_semantic_edge(out_name, "REFERENCES", target_name) | |
| 119 | + edges_added += 1 | |
| 120 | + | |
| 121 | + # Also link variables in different files that share the same name | |
| 122 | + # (e.g. variables.tf defines var, main.tf uses it) | |
| 123 | + for var_name, paths in var_by_name.items(): | |
| 124 | + if len(paths) <= 1: | |
| 125 | + continue | |
| 126 | + # Find nodes in other files that reference this variable | |
| 127 | + for path in paths: | |
| 128 | + ref_result = self.store.query( | |
| 129 | + "MATCH (n) WHERE n.file_path <> $path " | |
| 130 | + "AND n.name = $name " | |
| 131 | + "AND n.semantic_type = $var_type " | |
| 132 | + "RETURN n.name, n.file_path", | |
| 133 | + {"path": path, "name": var_name, "var_type": "terraform_variable"}, | |
| 134 | + ) | |
| 135 | + ref_rows = ref_result.result_set or [] | |
| 136 | + for ref_row in ref_rows: | |
| 137 | + ref_name = ref_row[0] | |
| 138 | + if ref_name: | |
| 139 | + self._add_semantic_edge(var_name, "REFERENCES", ref_name) | |
| 140 | + edges_added += 1 | |
| 141 | + | |
| 142 | + return edges_added | |
| 143 | + | |
| 144 | + def _enrich_module_sources(self) -> int: | |
| 145 | + """Find terraform_module nodes with local source paths and create | |
| 146 | + DEPENDS_ON edges to File nodes in the referenced directory.""" | |
| 147 | + edges_added = 0 | |
| 148 | + | |
| 149 | + # Find Module nodes with terraform_module semantic type | |
| 150 | + module_result = self.store.query( | |
| 151 | + "MATCH (m) WHERE m.semantic_type = $mod_type RETURN m.name, m.file_path", | |
| 152 | + {"mod_type": "terraform_module"}, | |
| 153 | + ) | |
| 154 | + module_rows = module_result.result_set or [] | |
| 155 | + | |
| 156 | + for row in module_rows: | |
| 157 | + mod_name, mod_file = row[0], row[1] | |
| 158 | + if not (mod_name and mod_file): | |
| 159 | + continue | |
| 160 | + | |
| 161 | + # Check for CALLS edges that may point to the source path, | |
| 162 | + # or look for a source property on the node | |
| 163 | + source_result = self.store.query( | |
| 164 | + "MATCH (m)-[:CALLS]->(target) " | |
| 165 | + "WHERE m.name = $name AND m.file_path = $path " | |
| 166 | + "RETURN target.name, target.file_path", | |
| 167 | + {"name": mod_name, "path": mod_file}, | |
| 168 | + ) | |
| 169 | + source_rows = source_result.result_set or [] | |
| 170 | + | |
| 171 | + for source_row in source_rows: | |
| 172 | + target_name, target_path = source_row[0], source_row[1] | |
| 173 | + if target_name and target_path: | |
| 174 | + self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name) | |
| 175 | + edges_added += 1 | |
| 176 | + continue | |
| 177 | + | |
| 178 | + # Fallback: look for File nodes whose path contains the module name | |
| 179 | + # (local modules are often in ./modules/<name>/) | |
| 180 | + file_result = self.store.query( | |
| 181 | + "MATCH (f:File) WHERE f.file_path CONTAINS $fragment RETURN f.name", | |
| 182 | + {"fragment": f"/modules/{mod_name}/"}, | |
| 183 | + ) | |
| 184 | + file_rows = file_result.result_set or [] | |
| 185 | + for file_row in file_rows: | |
| 186 | + target_name = file_row[0] | |
| 187 | + if target_name: | |
| 188 | + self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name) | |
| 189 | + edges_added += 1 | |
| 190 | + | |
| 191 | + return edges_added | |
| 192 | + | |
| 193 | + def _enrich_provider_grouping(self) -> int: | |
| 194 | + """Group Terraform resources by their provider prefix and create | |
| 195 | + BELONGS_TO edges from resources to provider nodes.""" | |
| 196 | + edges_added = 0 | |
| 197 | + | |
| 198 | + # Find all terraform_resource nodes | |
| 199 | + resource_result = self.store.query( | |
| 200 | + "MATCH (r) WHERE r.semantic_type = $res_type RETURN r.name, r.file_path", | |
| 201 | + {"res_type": "terraform_resource"}, | |
| 202 | + ) | |
| 203 | + resource_rows = resource_result.result_set or [] | |
| 204 | + | |
| 205 | + for row in resource_rows: | |
| 206 | + res_name, res_file = row[0], row[1] | |
| 207 | + if not (res_name and res_file): | |
| 208 | + continue | |
| 209 | + | |
| 210 | + # Match resource name against provider prefixes | |
| 211 | + for prefix, provider in _PROVIDER_PREFIXES.items(): | |
| 212 | + if res_name.startswith(prefix): | |
| 213 | + # Find or reference the provider node | |
| 214 | + provider_result = self.store.query( | |
| 215 | + "MATCH (p) WHERE p.name = $provider " | |
| 216 | + "AND p.semantic_type = $prov_type " | |
| 217 | + "RETURN p.name", | |
| 218 | + {"provider": provider, "prov_type": "terraform_provider"}, | |
| 219 | + ) | |
| 220 | + provider_rows = provider_result.result_set or [] | |
| 221 | + if provider_rows and provider_rows[0][0]: | |
| 222 | + self._add_semantic_edge( | |
| 223 | + res_name, | |
| 224 | + "BELONGS_TO", | |
| 225 | + provider, | |
| 226 | + ) | |
| 227 | + edges_added += 1 | |
| 228 | + break # Only match the first (most specific) prefix | |
| 229 | + | |
| 230 | + return edges_added |
| --- a/navegador/enrichment/terraform.py | |
| +++ b/navegador/enrichment/terraform.py | |
| @@ -0,0 +1,230 @@ | |
| --- a/navegador/enrichment/terraform.py | |
| +++ b/navegador/enrichment/terraform.py | |
| @@ -0,0 +1,230 @@ | |
| 1 | """ |
| 2 | Terraform enricher for cross-file module resolution and resource linking. |
| 3 | |
| 4 | Promotes and links Terraform graph nodes: |
| 5 | - Cross-file variable references (REFERENCES edges) |
| 6 | - Module source resolution (DEPENDS_ON edges to local source dirs) |
| 7 | - Provider grouping (BELONGS_TO edges to provider nodes) |
| 8 | """ |
| 9 | |
| 10 | from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher |
| 11 | from navegador.graph.store import GraphStore |
| 12 | |
| 13 | # Common Terraform provider prefixes and their canonical provider names |
| 14 | _PROVIDER_PREFIXES = { |
| 15 | "aws_": "aws", |
| 16 | "google_": "google", |
| 17 | "azurerm_": "azurerm", |
| 18 | "azuread_": "azuread", |
| 19 | "kubernetes_": "kubernetes", |
| 20 | "helm_": "helm", |
| 21 | "vault_": "vault", |
| 22 | "datadog_": "datadog", |
| 23 | "cloudflare_": "cloudflare", |
| 24 | "digitalocean_": "digitalocean", |
| 25 | "github_": "github", |
| 26 | "null_": "null", |
| 27 | "random_": "random", |
| 28 | "local_": "local", |
| 29 | "tls_": "tls", |
| 30 | "archive_": "archive", |
| 31 | "external_": "external", |
| 32 | "template_": "template", |
| 33 | "time_": "time", |
| 34 | } |
| 35 | |
| 36 | |
| 37 | class TerraformEnricher(FrameworkEnricher): |
| 38 | """Enriches a navegador graph with Terraform-specific semantics.""" |
| 39 | |
| 40 | def __init__(self, store: GraphStore) -> None: |
| 41 | super().__init__(store) |
| 42 | |
| 43 | # ── Identity ────────────────────────────────────────────────────────────── |
| 44 | |
| 45 | @property |
| 46 | def framework_name(self) -> str: |
| 47 | return "terraform" |
| 48 | |
| 49 | @property |
| 50 | def detection_patterns(self) -> list[str]: |
| 51 | return [] # No import nodes for Terraform |
| 52 | |
| 53 | @property |
| 54 | def detection_files(self) -> list[str]: |
| 55 | return ["main.tf", "variables.tf", "outputs.tf", "providers.tf"] |
| 56 | |
| 57 | # ── Enrichment ──────────────────────────────────────────────────────────── |
| 58 | |
| 59 | def enrich(self) -> EnrichmentResult: |
| 60 | result = EnrichmentResult() |
| 61 | |
| 62 | var_refs = self._enrich_variable_references() |
| 63 | result.edges_added += var_refs |
| 64 | result.patterns_found["variable_references"] = var_refs |
| 65 | |
| 66 | module_deps = self._enrich_module_sources() |
| 67 | result.edges_added += module_deps |
| 68 | result.patterns_found["module_sources"] = module_deps |
| 69 | |
| 70 | provider_links = self._enrich_provider_grouping() |
| 71 | result.edges_added += provider_links |
| 72 | result.patterns_found["provider_grouping"] = provider_links |
| 73 | |
| 74 | return result |
| 75 | |
| 76 | # ── Pattern helpers ─────────────────────────────────────────────────────── |
| 77 | |
| 78 | def _enrich_variable_references(self) -> int: |
| 79 | """Find terraform_variable and terraform_output nodes that reference |
| 80 | variables defined in other files, and create REFERENCES edges.""" |
| 81 | edges_added = 0 |
| 82 | |
| 83 | # Find all terraform_variable nodes |
| 84 | var_result = self.store.query( |
| 85 | "MATCH (v) WHERE v.semantic_type = $var_type RETURN v.name, v.file_path", |
| 86 | {"var_type": "terraform_variable"}, |
| 87 | ) |
| 88 | var_rows = var_result.result_set or [] |
| 89 | var_by_name: dict[str, list[str]] = {} |
| 90 | for row in var_rows: |
| 91 | name, file_path = row[0], row[1] |
| 92 | if name and file_path: |
| 93 | var_by_name.setdefault(name, []).append(file_path) |
| 94 | |
| 95 | # Find terraform_output nodes and check if they reference variables |
| 96 | # from other files (outputs often reference var.xxx) |
| 97 | output_result = self.store.query( |
| 98 | "MATCH (o) WHERE o.semantic_type = $out_type RETURN o.name, o.file_path", |
| 99 | {"out_type": "terraform_output"}, |
| 100 | ) |
| 101 | output_rows = output_result.result_set or [] |
| 102 | for row in output_rows: |
| 103 | out_name, out_file = row[0], row[1] |
| 104 | if not (out_name and out_file): |
| 105 | continue |
| 106 | |
| 107 | # Check CALLS or REFERENCES edges from this output to variables |
| 108 | ref_result = self.store.query( |
| 109 | "MATCH (o)-[:CALLS]->(target) " |
| 110 | "WHERE o.name = $name AND o.file_path = $path " |
| 111 | "RETURN target.name, target.file_path", |
| 112 | {"name": out_name, "path": out_file}, |
| 113 | ) |
| 114 | ref_rows = ref_result.result_set or [] |
| 115 | for ref_row in ref_rows: |
| 116 | target_name, target_path = ref_row[0], ref_row[1] |
| 117 | if target_name and target_path and target_path != out_file: |
| 118 | self._add_semantic_edge(out_name, "REFERENCES", target_name) |
| 119 | edges_added += 1 |
| 120 | |
| 121 | # Also link variables in different files that share the same name |
| 122 | # (e.g. variables.tf defines var, main.tf uses it) |
| 123 | for var_name, paths in var_by_name.items(): |
| 124 | if len(paths) <= 1: |
| 125 | continue |
| 126 | # Find nodes in other files that reference this variable |
| 127 | for path in paths: |
| 128 | ref_result = self.store.query( |
| 129 | "MATCH (n) WHERE n.file_path <> $path " |
| 130 | "AND n.name = $name " |
| 131 | "AND n.semantic_type = $var_type " |
| 132 | "RETURN n.name, n.file_path", |
| 133 | {"path": path, "name": var_name, "var_type": "terraform_variable"}, |
| 134 | ) |
| 135 | ref_rows = ref_result.result_set or [] |
| 136 | for ref_row in ref_rows: |
| 137 | ref_name = ref_row[0] |
| 138 | if ref_name: |
| 139 | self._add_semantic_edge(var_name, "REFERENCES", ref_name) |
| 140 | edges_added += 1 |
| 141 | |
| 142 | return edges_added |
| 143 | |
| 144 | def _enrich_module_sources(self) -> int: |
| 145 | """Find terraform_module nodes with local source paths and create |
| 146 | DEPENDS_ON edges to File nodes in the referenced directory.""" |
| 147 | edges_added = 0 |
| 148 | |
| 149 | # Find Module nodes with terraform_module semantic type |
| 150 | module_result = self.store.query( |
| 151 | "MATCH (m) WHERE m.semantic_type = $mod_type RETURN m.name, m.file_path", |
| 152 | {"mod_type": "terraform_module"}, |
| 153 | ) |
| 154 | module_rows = module_result.result_set or [] |
| 155 | |
| 156 | for row in module_rows: |
| 157 | mod_name, mod_file = row[0], row[1] |
| 158 | if not (mod_name and mod_file): |
| 159 | continue |
| 160 | |
| 161 | # Check for CALLS edges that may point to the source path, |
| 162 | # or look for a source property on the node |
| 163 | source_result = self.store.query( |
| 164 | "MATCH (m)-[:CALLS]->(target) " |
| 165 | "WHERE m.name = $name AND m.file_path = $path " |
| 166 | "RETURN target.name, target.file_path", |
| 167 | {"name": mod_name, "path": mod_file}, |
| 168 | ) |
| 169 | source_rows = source_result.result_set or [] |
| 170 | |
| 171 | for source_row in source_rows: |
| 172 | target_name, target_path = source_row[0], source_row[1] |
| 173 | if target_name and target_path: |
| 174 | self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name) |
| 175 | edges_added += 1 |
| 176 | continue |
| 177 | |
| 178 | # Fallback: look for File nodes whose path contains the module name |
| 179 | # (local modules are often in ./modules/<name>/) |
| 180 | file_result = self.store.query( |
| 181 | "MATCH (f:File) WHERE f.file_path CONTAINS $fragment RETURN f.name", |
| 182 | {"fragment": f"/modules/{mod_name}/"}, |
| 183 | ) |
| 184 | file_rows = file_result.result_set or [] |
| 185 | for file_row in file_rows: |
| 186 | target_name = file_row[0] |
| 187 | if target_name: |
| 188 | self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name) |
| 189 | edges_added += 1 |
| 190 | |
| 191 | return edges_added |
| 192 | |
| 193 | def _enrich_provider_grouping(self) -> int: |
| 194 | """Group Terraform resources by their provider prefix and create |
| 195 | BELONGS_TO edges from resources to provider nodes.""" |
| 196 | edges_added = 0 |
| 197 | |
| 198 | # Find all terraform_resource nodes |
| 199 | resource_result = self.store.query( |
| 200 | "MATCH (r) WHERE r.semantic_type = $res_type RETURN r.name, r.file_path", |
| 201 | {"res_type": "terraform_resource"}, |
| 202 | ) |
| 203 | resource_rows = resource_result.result_set or [] |
| 204 | |
| 205 | for row in resource_rows: |
| 206 | res_name, res_file = row[0], row[1] |
| 207 | if not (res_name and res_file): |
| 208 | continue |
| 209 | |
| 210 | # Match resource name against provider prefixes |
| 211 | for prefix, provider in _PROVIDER_PREFIXES.items(): |
| 212 | if res_name.startswith(prefix): |
| 213 | # Find or reference the provider node |
| 214 | provider_result = self.store.query( |
| 215 | "MATCH (p) WHERE p.name = $provider " |
| 216 | "AND p.semantic_type = $prov_type " |
| 217 | "RETURN p.name", |
| 218 | {"provider": provider, "prov_type": "terraform_provider"}, |
| 219 | ) |
| 220 | provider_rows = provider_result.result_set or [] |
| 221 | if provider_rows and provider_rows[0][0]: |
| 222 | self._add_semantic_edge( |
| 223 | res_name, |
| 224 | "BELONGS_TO", |
| 225 | provider, |
| 226 | ) |
| 227 | edges_added += 1 |
| 228 | break # Only match the first (most specific) prefix |
| 229 | |
| 230 | return edges_added |
| --- a/navegador/ingestion/ansible.py | ||
| +++ b/navegador/ingestion/ansible.py | ||
| @@ -0,0 +1,616 @@ | ||
| 1 | +""" | |
| 2 | +Ansible playbook/task parser — extracts plays, tasks, handlers, roles, | |
| 3 | +and variables from Ansible YAML files into the navegador graph. | |
| 4 | + | |
| 5 | +Unlike other parsers this does NOT use tree-sitter. Ansible semantics | |
| 6 | +are encoded in YAML structure (dicts with well-known keys like ``hosts``, | |
| 7 | +``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk | |
| 8 | +the resulting Python data structures directly. | |
| 9 | + | |
| 10 | +Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP. | |
| 11 | +""" | |
| 12 | + | |
| 13 | +import logging | |
| 14 | +import re | |
| 15 | +from pathlib import Path | |
| 16 | + | |
| 17 | +import yaml | |
| 18 | + | |
| 19 | +from navegador.graph.sath | |
| 20 | + | |
| 21 | +from navegador.graph.schema import EdgeType, NodeLabel | |
| 22 | +from navegador.graph.store import GraphStore | |
| 23 | +from navegador.ingestion.parser import LanguageParser | |
| 24 | + | |
| 25 | +logger = logging.getLogger(__name__) | |
| 26 | + | |
| 27 | +# Well-known Ansible module names — used to identify task dicts that lack | |
| 28 | +# an explicit ``name`` key and to extract the module used by a task. | |
| 29 | +_ANSIBLE_MODULES = { | |
| 30 | + "apt", | |
| 31 | + "yum", | |
| 32 | + "dnf", | |
| 33 | + "pip", | |
| 34 | + "gem", | |
| 35 | + "npm", | |
| 36 | + "copy", | |
| 37 | + "template", | |
| 38 | + "file", | |
| 39 | + "lineinfile", | |
| 40 | + "blockinfile", | |
| 41 | + "service", | |
| 42 | + "systemd", | |
| 43 | + "command", | |
| 44 | + "shell", | |
| 45 | + "raw", | |
| 46 | + "script", | |
| 47 | + "git", | |
| 48 | + "get_url", | |
| 49 | + "uri", | |
| 50 | + "unarchive", | |
| 51 | + "user", | |
| 52 | + "group", | |
| 53 | + "cron", | |
| 54 | + "mount", | |
| 55 | + "docker_container", | |
| 56 | + "docker_image", | |
| 57 | + "k8s", | |
| 58 | + "helm", | |
| 59 | + "debug", | |
| 60 | + "assert", | |
| 61 | + "fail", | |
| 62 | + "set_fact", | |
| 63 | + "include_tasks", | |
| 64 | + "import_tasks", | |
| 65 | + "include_role", | |
| 66 | + "import_role", | |
| 67 | + "block", | |
| 68 | + "rescue", | |
| 69 | + "always", | |
| 70 | + "wait_for", | |
| 71 | + "pause", | |
| 72 | + "stat", | |
| 73 | + "find", | |
| 74 | + "replace", | |
| 75 | + "package", | |
| 76 | + "hostname", | |
| 77 | + "timezone", | |
| 78 | + "sysctl", | |
| 79 | + "authorized_key", | |
| 80 | + "firewalld", | |
| 81 | + "iptables", | |
| 82 | + "aws_s3", | |
| 83 | + "ec2", | |
| 84 | + "ec2_instance", | |
| 85 | + "s3_bucket", | |
| 86 | + "ansible.builtin.copy", | |
| 87 | + "ansible.builtin.template", | |
| 88 | + "ansible.builtin.file", | |
| 89 | + "ansible.builtin.command", | |
| 90 | + "ansible.builtin.shell", | |
| 91 | + "ansible.builtin.service", | |
| 92 | + "ansible.builtin.debug", | |
| 93 | + "ansible.builtin.set_fact", | |
| 94 | + "ansible.builtin.include_tasks", | |
| 95 | + "ansible.builtin.import_tasks", | |
| 96 | + "ansible.builtin.include_role", | |
| 97 | + "ansible.builtin.import_role", | |
| 98 | + "ansible.builtin.apt", | |
| 99 | + "ansible.builtin.yum", | |
| 100 | + "ansible.builtin.pip", | |
| 101 | + "ansible.builtin.git", | |
| 102 | + "ansible.builtin.user", | |
| 103 | + "ansible.builtin.group", | |
| 104 | + "ansible.builtin.uri", | |
| 105 | + "ansible.builtin.get_url", | |
| 106 | + "ansible.builtin.lineinfile", | |
| 107 | + "ansible.builtin.blockinfile", | |
| 108 | + "ansible.builtin.systemd", | |
| 109 | + "ansible.builtin.raw", | |
| 110 | + "ansible.builtin.script", | |
| 111 | + "ansible.builtin.unarchive", | |
| 112 | + "ansible.builtin.assert", | |
| 113 | + "ansible.builtin.fail", | |
| 114 | + "ansible.builtin.wait_for", | |
| 115 | + "ansible.builtin.pause", | |
| 116 | + "ansible.builtin.stat", | |
| 117 | + "ansible.builtin.find", | |
| 118 | + "ansible.builtin.replace", | |
| 119 | + "ansible.builtin.package", | |
| 120 | +} | |
| 121 | + | |
| 122 | +# Patterns in file paths that strongly suggest Ansible content | |
| 123 | +_ROLE_TASKS_RE = re.compile(r"roles/[^/]+/tasks/") | |
| 124 | +_ROLE_HANDLERS_RE = re.compile(r"roles/[^/]+/handlers/") | |
| 125 | +_ROLE_DEFAULTS_RE = re.compile(r"roles/[^/]+/defaults/") | |
| 126 | +_ROLE_VARS_RE = re.compile(r"roles/[^/]+/vars/") | |
| 127 | +_PLAYBOOKS_DIR_RE = re.compile(r"(^|/)playbooks/") | |
| 128 | +_COMMON_PLAYBOOK_RE = re.compile( | |
| 129 | + r"(^|/)(playbook[^/]*|site|main|common|deploy|provision|setup|configure)\.(yml|yaml)$" | |
| 130 | +) | |
| 131 | +_GROUP_VARS_RE = re.compile(r"(^|/)group_vars/") | |
| 132 | +_HOST_VARS_RE = re.compile(r"(^|/)host_vars/") | |
| 133 | + | |
| 134 | + | |
| 135 | +class AnsibleParser(LanguageParser): | |
| 136 | + """Parses Ansible YAML files into the navegador graph.""" | |
| 137 | + | |
| 138 | + def __init__(self) -> None: | |
| 139 | + pass # no tree-sitter parser needed | |
| 140 | + | |
| 141 | + @staticmethod | |
| 142 | + def is_ansible_file(path: Path, repo_root: Path | None = None) -> bool: | |
| 143 | + """Return True if *path* looks like an Ansible YAML file.""" | |
| 144 | + if path.suffix not in (".yml", ".yaml"): | |
| 145 | + return False | |
| 146 | + | |
| 147 | + rel = str(path) | |
| 148 | + if repo_root is not None: | |
| 149 | + try: | |
| 150 | + rel = str(path.relative_to(repo_root)) | |
| 151 | + except ValueError: | |
| 152 | + pass | |
| 153 | + | |
| 154 | + # Structural heuristics based on path | |
| 155 | + if _ROLE_TASKS_RE.search(rel): | |
| 156 | + return True | |
| 157 | + if _ROLE_HANDLERS_RE.search(rel): | |
| 158 | + return True | |
| 159 | + if _ROLE_DEFAULTS_RE.search(rel): | |
| 160 | + return True | |
| 161 | + if _ROLE_VARS_RE.search(rel): | |
| 162 | + return True | |
| 163 | + if _PLAYBOOKS_DIR_RE.search(rel): | |
| 164 | + return True | |
| 165 | + if _GROUP_VARS_RE.search(rel): | |
| 166 | + return True | |
| 167 | + if _HOST_VARS_RE.search(rel): | |
| 168 | + return True | |
| 169 | + | |
| 170 | + # ansible.cfg sibling in repo root | |
| 171 | + if repo_root is not None and (repo_root / "ansible.cfg").exists(): | |
| 172 | + if _COMMON_PLAYBOOK_RE.search(rel): | |
| 173 | + return True | |
| 174 | + | |
| 175 | + # Content-based: top-level list whose items contain "hosts:" key | |
| 176 | + try: | |
| 177 | + text = path.read_text(encoding="utf-8", errors="replace") | |
| 178 | + except OSError: | |
| 179 | + return False | |
| 180 | + | |
| 181 | + if not text.lstrip().startswith("---"ors="replace") | |
| 182 | + yaml.YAMLyum", | |
| 183 | + "ansible.xcept OSError: | |
| 184 | + ]: | |
| 185 | + rel_pand variables from Ansible YAML files into the navegador graph. | |
| 186 | + | |
| 187 | +Unlike other parsers this does NOT use tree-sitter. Ansible semantics | |
| 188 | +are encoded in YAML structure (dicts with well-known keys like ``hosts``, | |
| 189 | +``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk | |
| 190 | +the resulting Python data structures directly. | |
| 191 | + | |
| 192 | +Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP. | |
| 193 | +""" | |
| 194 | + | |
| 195 | +import logging | |
| 196 | +import re | |
| 197 | +from pathlib import Path | |
| 198 | + | |
| 199 | +from navegador.graph.schema import EdgeType, NodeLabel | |
| 200 | +from navegador.graph.store impo""" | |
| 201 | +Ansible playbook/task parser — extracts plays, tasks, handle errors="replace") | |
| 202 | + (OSError, yaml.YAMLError)t) | |
| 203 | + except Exception as exc: | |
| 204 | + logger.warning("Could not parse Ansible file %s: %s", rel_path, exc) | |
| 205 | + return stats | |
| 206 | + | |
| 207 | + if data is None: | |
| 208 | + return stats | |
| 209 | + | |
| 210 | + # File node | |
| 211 | + store.create_node( | |
| 212 | + NodeLabel.File, | |
| 213 | + { | |
| 214 | + "name": path.name, | |
| 215 | + "path": rel_path, | |
| 216 | + "language": "ansible", | |
| 217 | + "line_count": text.count("\n"), | |
| 218 | + }, | |
| 219 | + ) | |
| 220 | + | |
| 221 | + rel_str = rel_path.replace("\\", "/") | |
| 222 | + | |
| 223 | + # Dispatch based on file type | |
| 224 | + if _ROLE_DEFAULTS_RE.search(rel_str) or _ROLE_VARS_RE.search(rel_str): | |
| 225 | + self._parse_variable_file(data, rel_path, store, stats) | |
| 226 | + elif _GROUP_VARS_RE.search(rel_str) or _HOST_VARS_RE.search(rel_str): | |
| 227 | + self._parse_variable_file(data, rel_path, store, stats) | |
| 228 | + elif _ROLE_HANDLERS_RE.search(rel_str): | |
| 229 | + self._parse_handler_file(data, rel_path, store, stats) | |
| 230 | + elif _ROLE_TASKS_RE.search(rel_str): | |
| 231 | + self._parse_task_file(data, rel_path, store, stats) | |
| 232 | + elif ( | |
| 233 | + isinstance(data, list) | |
| 234 | + and data | |
| 235 | + and any(isinstance(item, dict) and "hosts" in item for item in data) | |
| 236 | + ): | |
| 237 | + self._parse_playbook(data, rel_path, store, stats) | |
| 238 | + elif isinstance(data, list): | |
| 239 | + # Might be a task list (e.g. included task file) | |
| 240 | + self._parse_task_file(data, rel_path, store, stats) | |
| 241 | + elif isinstance(data, dict): | |
| 242 | + # Standalone variable file | |
| 243 | + self._parse_variable_file(data, rel_path, store, stats) | |
| 244 | + | |
| 245 | + return stats | |
| 246 | + | |
| 247 | + # ── Playbook parsing ───────────────────────────────────────────────────── | |
| 248 | + | |
| 249 | + def _parse_playbook( | |
| 250 | + self, | |
| 251 | + data: list, | |
| 252 | + file_path: str, | |
| 253 | + store: GraphStore, | |
| 254 | + stats: dict, | |
| 255 | + ) -> None: | |
| 256 | + """Parse a full playbook (list of plays).""" | |
| 257 | + playbook_name = Path(file_path).stem | |
| 258 | + | |
| 259 | + # Module node for the playbook file | |
| 260 | + store.create_node( | |
| 261 | + NodeLabel.Module, | |
| 262 | + { | |
| 263 | + "name": playbook_name, | |
| 264 | + "file_path": file_path, | |
| 265 | + "docstring": "", | |
| 266 | + "semantic_type": "ansible_playbook", | |
| 267 | + }, | |
| 268 | + ) | |
| 269 | + store.create_edge( | |
| 270 | + NodeLabel.File, | |
| 271 | + {"path": file_path}, | |
| 272 | + EdgeType.CONTAINS, | |
| 273 | + NodeLabel.Module, | |
| 274 | + {"name": playbook_name, "file_path": file_path}, | |
| 275 | + ) | |
| 276 | + stats["edges"] += 1 | |
| 277 | + | |
| 278 | + for play in data: | |
| 279 | + if not isinstance(play, dict): | |
| 280 | + continue | |
| 281 | + if "hosts" not in play: | |
| 282 | + continue | |
| 283 | + self._parse_play(play, file_path, playbook_name, store, stats) | |
| 284 | + | |
| 285 | + def _parse_play( | |
| 286 | + self, | |
| 287 | + play: dict, | |
| 288 | + file_path: str, | |
| 289 | + playbook_name: str, | |
| 290 | + store: GraphStore, | |
| 291 | + stats: dict, | |
| 292 | + ) -> None: | |
| 293 | + """Parse a single play dict.""" | |
| 294 | + play_name = play.get("name", f"play:{play.get('hosts', 'unknown')}") | |
| 295 | + | |
| 296 | + store.create_node( | |
| 297 | + NodeLabel.Class, | |
| 298 | + { | |
| 299 | + "name": play_name, | |
| 300 | + "file_path": file_path, | |
| 301 | + "line_start": 0, | |
| 302 | + "line_end": 0, | |
| 303 | + "docstring": f"hosts: {play.get('hosts', '')}", | |
| 304 | + "semantic_type": "ansible_play", | |
| 305 | + }, | |
| 306 | + ) | |
| 307 | + store.create_edge( | |
| 308 | + NodeLabel.Module, | |
| 309 | + {"name": playbook_name, "file_path": file_path}, | |
| 310 | + EdgeType.CONTAINS, | |
| 311 | + NodeLabel.Class, | |
| 312 | + {"name": play_name, "file_path": file_path}, | |
| 313 | + ) | |
| 314 | + stats["classes"] += 1 | |
| 315 | + stats["edges"] += 1 | |
| 316 | + | |
| 317 | + # Tasks | |
| 318 | + for task_dict in play.get("tasks", []) or []: | |
| 319 | + if isinstance(task_dict, dict): | |
| 320 | + self._parse_task(task_dict, file_path, play_name, store, stats) | |
| 321 | + | |
| 322 | + # Pre-tasks | |
| 323 | + for task_dict in play.get("pre_tasks", []) or []: | |
| 324 | + if isinstance(task_dict, dict): | |
| 325 | + self._parse_task(task_dict, file_path, play_name, store, stats) | |
| 326 | + | |
| 327 | + # Post-tasks | |
| 328 | + for task_dict in play.get("post_tasks", []) or []: | |
| 329 | + if isinstance(task_dict, dict): | |
| 330 | + self._parse_task(task_dict, file_path, play_name, store, stats) | |
| 331 | + | |
| 332 | + # Handlers | |
| 333 | + for handler_dict in play.get("handlers", []) or []: | |
| 334 | + if isinstance(handler_dict, dict): | |
| 335 | + self._parse_handler(handler_dict, file_path, play_name, store, stats) | |
| 336 | + | |
| 337 | + # Roles | |
| 338 | + for role in play.get("roles", []) or []: | |
| 339 | + self._parse_role_reference(role, file_path, play_name, store, stats) | |
| 340 | + | |
| 341 | + # Variables | |
| 342 | + self._parse_vars_block(play.get("vars"), file_path, play_name, store, stats) | |
| 343 | + | |
| 344 | + # ── Task parsing ───────────────────────────────────────────────────────── | |
| 345 | + | |
| 346 | + def _task_name(self, task: dict) -> str: | |
| 347 | + """Derive a task name from the dict.""" | |
| 348 | + if "name" in task and task["name"]: | |
| 349 | + return str(task["name"]) | |
| 350 | + # Fall back to module name | |
| 351 | + for key in task: | |
| 352 | + if key in _ANSIBLE_MODULES: | |
| 353 | + return key | |
| 354 | + # Last resort: first non-meta key | |
| 355 | + _meta_keys = { | |
| 356 | + "name", | |
| 357 | + "register", | |
| 358 | + "when", | |
| 359 | + "notify", | |
| 360 | + "tags", | |
| 361 | + "become", | |
| 362 | + "become_user", | |
| 363 | + "ignore_errors", | |
| 364 | + "changed_when", | |
| 365 | + "failed_when", | |
| 366 | + "loop", | |
| 367 | + "with_items", | |
| 368 | + "with_dict", | |
| 369 | + "with_fileglob", | |
| 370 | + "until", | |
| 371 | + "retries", | |
| 372 | + "delay", | |
| 373 | + "no_log", | |
| 374 | + "environment", | |
| 375 | + "vars", | |
| 376 | + "listen", | |
| 377 | + "delegate_to", | |
| 378 | + "run_once", | |
| 379 | + "timeout", | |
| 380 | + } | |
| 381 | + for key in task: | |
| 382 | + if key not in _meta_keys: | |
| 383 | + return key | |
| 384 | + return "unnamed_task" | |
| 385 | + | |
| 386 | + def _parse_task( | |
| 387 | + self, | |
| 388 | + task: dict, | |
| 389 | + file_path: str, | |
| 390 | + parent_name: str, | |
| 391 | + store: GraphStore, | |
| 392 | + stats: dict, | |
| 393 | + ) -> None: | |
| 394 | + """Parse a single task dict into a Function node.""" | |
| 395 | + task_name = self._task_name(task) | |
| 396 | + | |
| 397 | + store.create_node( | |
| 398 | + NodeLabel.Function, | |
| 399 | + { | |
| 400 | + "name": task_name, | |
| 401 | + "file_path": file_path, | |
| 402 | + "line_start": 0, | |
| 403 | + "line_end": 0, | |
| 404 | + "docstring": "", | |
| 405 | + "semantic_type": "ansible_task", | |
| 406 | + }, | |
| 407 | + ) | |
| 408 | + store.create_edge( | |
| 409 | + NodeLabel.Class, | |
| 410 | + {"name": parent_name, "file_path": file_path}, | |
| 411 | + EdgeType.CONTAINS, | |
| 412 | + NodeLabel.Function, | |
| 413 | + {"name": task_name, "file_path": file_path}, | |
| 414 | + ) | |
| 415 | + stats["functions"] += 1 | |
| 416 | + stats["edges"] += 1 | |
| 417 | + | |
| 418 | + # notify: -> CALLS edge to handler | |
| 419 | + notify = task.get("notify") | |
| 420 | + if notify: | |
| 421 | + if isinstance(notify, str): | |
| 422 | + notify = [notify] | |
| 423 | + for handler_name in notify: | |
| 424 | + store.create_edge( | |
| 425 | + NodeLabel.Function, | |
| 426 | + {"name": task_name, "file_path": file_path}, | |
| 427 | + EdgeType.CALLS, | |
| 428 | + NodeLabel.Function, | |
| 429 | + {"name": str(handler_name), "file_path": file_path}, | |
| 430 | + ) | |
| 431 | + stats["edges"] += 1 | |
| 432 | + | |
| 433 | + # Handle block/rescue/always | |
| 434 | + for block_key in ("block", "rescue", "always"): | |
| 435 | + block_tasks = task.get(block_key) | |
| 436 | + if isinstance(block_tasks, list): | |
| 437 | + for sub_task in block_tasks: | |
| 438 | + if isinstance(sub_task, dict): | |
| 439 | + self._parse_task(sub_task, file_path, parent_name, store, stats) | |
| 440 | + | |
| 441 | + # ── Handler parsing ────────────────────────────────────────────────────── | |
| 442 | + | |
| 443 | + def _parse_handler( | |
| 444 | + self, | |
| 445 | + handler: dict, | |
| 446 | + file_path: str, | |
| 447 | + parent_name: str, | |
| 448 | + store: GraphStore, | |
| 449 | + stats: dict, | |
| 450 | + ) -> None: | |
| 451 | + """Parse a handler dict into a Function node.""" | |
| 452 | + handler_name = handler.get("name", self._task_name(handler)) | |
| 453 | + | |
| 454 | + store.create_node( | |
| 455 | + NodeLabel.Function, | |
| 456 | + { | |
| 457 | + "name": handler_name, | |
| 458 | + "file_path": file_path, | |
| 459 | + "line_start": 0, | |
| 460 | + "line_end": 0, | |
| 461 | + "docstring": "", | |
| 462 | + "semantic_type": "ansible_handler", | |
| 463 | + }, | |
| 464 | + ) | |
| 465 | + store.create_edge( | |
| 466 | + NodeLabel.Class, | |
| 467 | + {"name": parent_name, "file_path": file_path}, | |
| 468 | + EdgeType.CONTAINS, | |
| 469 | + NodeLabel.Function, | |
| 470 | + {"name": handler_name, "file_path": file_path}, | |
| 471 | + ) | |
| 472 | + stats["functions"] += 1 | |
| 473 | + stats["edges"] += 1 | |
| 474 | + | |
| 475 | + # ── Role reference parsing ─────────────────────────────────────────────── | |
| 476 | + | |
| 477 | + def _parse_role_reference( | |
| 478 | + self, | |
| 479 | + role, | |
| 480 | + file_path: str, | |
| 481 | + play_name: str, | |
| 482 | + store: GraphStore, | |
| 483 | + stats: dict, | |
| 484 | + ) -> None: | |
| 485 | + """Parse a role reference (string or dict with 'role' key).""" | |
| 486 | + if isinstance(role, str): | |
| 487 | + role_name = role | |
| 488 | + elif isinstance(role, dict): | |
| 489 | + role_name = role.get("role") or role.get("name", "") | |
| 490 | + else: | |
| 491 | + return | |
| 492 | + | |
| 493 | + if not role_name: | |
| 494 | + return | |
| 495 | + | |
| 496 | + store.create_node( | |
| 497 | + NodeLabel.Import, | |
| 498 | + { | |
| 499 | + "name": role_name, | |
| 500 | + "file_path": file_path, | |
| 501 | + "line_start": 0, | |
| 502 | + "module": role_name, | |
| 503 | + "semantic_type": "ansible_role", | |
| 504 | + }, | |
| 505 | + ) | |
| 506 | + store.create_edge( | |
| 507 | + NodeLabel.Class, | |
| 508 | + {"name": play_name, "file_path": file_path}, | |
| 509 | + EdgeType.IMPORTS, | |
| 510 | + NodeLabel.Import, | |
| 511 | + {"name": role_name, "file_path": file_path}, | |
| 512 | + ) | |
| 513 | + stats["edges"] += 1 | |
| 514 | + | |
| 515 | + # ── Variable parsing ───────────────────────────────────────────────────── | |
| 516 | + | |
| 517 | + def _parse_vars_block( | |
| 518 | + self, | |
| 519 | + vars_data, | |
| 520 | + file_path: str, | |
| 521 | + parent_name: str, | |
| 522 | + store: GraphStore, | |
| 523 | + stats: dict, | |
| 524 | + ) -> None: | |
| 525 | + """Parse a vars: block (dict) into Variable nodes.""" | |
| 526 | + if not isinstance(vars_data, dict): | |
| 527 | + return | |
| 528 | + | |
| 529 | + for var_name, var_value in vars_data.items(): | |
| 530 | + store.create_node( | |
| 531 | + NodeLabel.Variable, | |
| 532 | + { | |
| 533 | + "name": str(var_name), | |
| 534 | + "file_path": file_path, | |
| 535 | + "line_start": 0, | |
| 536 | + "semantic_type": "ansible_variable", | |
| 537 | + }, | |
| 538 | + ) | |
| 539 | + store.create_edge( | |
| 540 | + NodeLabel.Class, | |
| 541 | + {"name": parent_name, "file_path": file_path}, | |
| 542 | + EdgeType.CONTAINS, | |
| 543 | + NodeLabel.Variable, | |
| 544 | + {"name": str(var_name), "file_path": file_path}, | |
| 545 | + ) | |
| 546 | + stats["edges"] += 1 | |
| 547 | + | |
| 548 | + # ── Standalone file parsers ────────────────────────────────────────────── | |
| 549 | + | |
| 550 | + def _parse_task_file( | |
| 551 | + self, | |
| 552 | + data, | |
| 553 | + file_path: str, | |
| 554 | + store: GraphStore, | |
| 555 | + stats: dict, | |
| 556 | + ) -> None: | |
| 557 | + """Parse a standalone task file (roles/*/tasks/main.yml or included file).""" | |
| 558 | + if not isinstance(data, list): | |
| 559 | + return | |
| 560 | + | |
| 561 | + # Use file stem as a synthetic parent class | |
| 562 | + parent_name = Path(file_path).stem | |
| 563 | + store.create_node( | |
| 564 | + NodeLabel.Class, | |
| 565 | + { | |
| 566 | + "name": parent_name, | |
| 567 | + "file_path": file_path, | |
| 568 | + "line_start": 0, | |
| 569 | + "line_end": 0, | |
| 570 | + "docstring": "", | |
| 571 | + "semantic_type": "ansible_play", | |
| 572 | + }, | |
| 573 | + ) | |
| 574 | + store.create_edge( | |
| 575 | + NodeLabel.File, | |
| 576 | + {"path": file_path}, | |
| 577 | + EdgeType.CONTAINS, | |
| 578 | + NodeLabel.Class, | |
| 579 | + {"name": parent_name, "file_path": file_path}, | |
| 580 | + ) | |
| 581 | + stats["classes"] += 1 | |
| 582 | + stats["edges"] += 1 | |
| 583 | + | |
| 584 | + for task_dict in data: | |
| 585 | + if isinstance(task_dict, dict): | |
| 586 | + self._parse_task(task_dict, file_path, parent_name, store, stats) | |
| 587 | + | |
| 588 | + def _parse_handler_file( | |
| 589 | + self, | |
| 590 | + data, | |
| 591 | + file_path: str, | |
| 592 | + store: GraphStore, | |
| 593 | + stats: dict, | |
| 594 | + ) -> None: | |
| 595 | + """Parse a standalone handler file (roles/*/handlers/main.yml).""" | |
| 596 | + if not isinstance(data, list): | |
| 597 | + return | |
| 598 | + | |
| 599 | + parent_name = Path(file_path).stem | |
| 600 | + store.create_node( | |
| 601 | + NodeLabel.Class, | |
| 602 | + { | |
| 603 | + "name": parent_name, | |
| 604 | + "file_path": file_path, | |
| 605 | + "line_start": 0, | |
| 606 | + "line_end": 0, | |
| 607 | + "docstring": "", | |
| 608 | + "semantic_type": "ansible_play", | |
| 609 | + }, | |
| 610 | + ) | |
| 611 | + store.create_edge( | |
| 612 | + NodeLabel.File, | |
| 613 | + {"path": file_path}, | |
| 614 | + EdgeType.CONTAINS, | |
| 615 | + NodeLabel.Class, | |
| 616 | + {"name": parent_name, "file_path": file |
| --- a/navegador/ingestion/ansible.py | |
| +++ b/navegador/ingestion/ansible.py | |
| @@ -0,0 +1,616 @@ | |
| --- a/navegador/ingestion/ansible.py | |
| +++ b/navegador/ingestion/ansible.py | |
| @@ -0,0 +1,616 @@ | |
| 1 | """ |
| 2 | Ansible playbook/task parser — extracts plays, tasks, handlers, roles, |
| 3 | and variables from Ansible YAML files into the navegador graph. |
| 4 | |
| 5 | Unlike other parsers this does NOT use tree-sitter. Ansible semantics |
| 6 | are encoded in YAML structure (dicts with well-known keys like ``hosts``, |
| 7 | ``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk |
| 8 | the resulting Python data structures directly. |
| 9 | |
| 10 | Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP. |
| 11 | """ |
| 12 | |
| 13 | import logging |
| 14 | import re |
| 15 | from pathlib import Path |
| 16 | |
| 17 | import yaml |
| 18 | |
| 19 | from navegador.graph.sath |
| 20 | |
| 21 | from navegador.graph.schema import EdgeType, NodeLabel |
| 22 | from navegador.graph.store import GraphStore |
| 23 | from navegador.ingestion.parser import LanguageParser |
| 24 | |
| 25 | logger = logging.getLogger(__name__) |
| 26 | |
| 27 | # Well-known Ansible module names — used to identify task dicts that lack |
| 28 | # an explicit ``name`` key and to extract the module used by a task. |
| 29 | _ANSIBLE_MODULES = { |
| 30 | "apt", |
| 31 | "yum", |
| 32 | "dnf", |
| 33 | "pip", |
| 34 | "gem", |
| 35 | "npm", |
| 36 | "copy", |
| 37 | "template", |
| 38 | "file", |
| 39 | "lineinfile", |
| 40 | "blockinfile", |
| 41 | "service", |
| 42 | "systemd", |
| 43 | "command", |
| 44 | "shell", |
| 45 | "raw", |
| 46 | "script", |
| 47 | "git", |
| 48 | "get_url", |
| 49 | "uri", |
| 50 | "unarchive", |
| 51 | "user", |
| 52 | "group", |
| 53 | "cron", |
| 54 | "mount", |
| 55 | "docker_container", |
| 56 | "docker_image", |
| 57 | "k8s", |
| 58 | "helm", |
| 59 | "debug", |
| 60 | "assert", |
| 61 | "fail", |
| 62 | "set_fact", |
| 63 | "include_tasks", |
| 64 | "import_tasks", |
| 65 | "include_role", |
| 66 | "import_role", |
| 67 | "block", |
| 68 | "rescue", |
| 69 | "always", |
| 70 | "wait_for", |
| 71 | "pause", |
| 72 | "stat", |
| 73 | "find", |
| 74 | "replace", |
| 75 | "package", |
| 76 | "hostname", |
| 77 | "timezone", |
| 78 | "sysctl", |
| 79 | "authorized_key", |
| 80 | "firewalld", |
| 81 | "iptables", |
| 82 | "aws_s3", |
| 83 | "ec2", |
| 84 | "ec2_instance", |
| 85 | "s3_bucket", |
| 86 | "ansible.builtin.copy", |
| 87 | "ansible.builtin.template", |
| 88 | "ansible.builtin.file", |
| 89 | "ansible.builtin.command", |
| 90 | "ansible.builtin.shell", |
| 91 | "ansible.builtin.service", |
| 92 | "ansible.builtin.debug", |
| 93 | "ansible.builtin.set_fact", |
| 94 | "ansible.builtin.include_tasks", |
| 95 | "ansible.builtin.import_tasks", |
| 96 | "ansible.builtin.include_role", |
| 97 | "ansible.builtin.import_role", |
| 98 | "ansible.builtin.apt", |
| 99 | "ansible.builtin.yum", |
| 100 | "ansible.builtin.pip", |
| 101 | "ansible.builtin.git", |
| 102 | "ansible.builtin.user", |
| 103 | "ansible.builtin.group", |
| 104 | "ansible.builtin.uri", |
| 105 | "ansible.builtin.get_url", |
| 106 | "ansible.builtin.lineinfile", |
| 107 | "ansible.builtin.blockinfile", |
| 108 | "ansible.builtin.systemd", |
| 109 | "ansible.builtin.raw", |
| 110 | "ansible.builtin.script", |
| 111 | "ansible.builtin.unarchive", |
| 112 | "ansible.builtin.assert", |
| 113 | "ansible.builtin.fail", |
| 114 | "ansible.builtin.wait_for", |
| 115 | "ansible.builtin.pause", |
| 116 | "ansible.builtin.stat", |
| 117 | "ansible.builtin.find", |
| 118 | "ansible.builtin.replace", |
| 119 | "ansible.builtin.package", |
| 120 | } |
| 121 | |
| 122 | # Patterns in file paths that strongly suggest Ansible content |
| 123 | _ROLE_TASKS_RE = re.compile(r"roles/[^/]+/tasks/") |
| 124 | _ROLE_HANDLERS_RE = re.compile(r"roles/[^/]+/handlers/") |
| 125 | _ROLE_DEFAULTS_RE = re.compile(r"roles/[^/]+/defaults/") |
| 126 | _ROLE_VARS_RE = re.compile(r"roles/[^/]+/vars/") |
| 127 | _PLAYBOOKS_DIR_RE = re.compile(r"(^|/)playbooks/") |
| 128 | _COMMON_PLAYBOOK_RE = re.compile( |
| 129 | r"(^|/)(playbook[^/]*|site|main|common|deploy|provision|setup|configure)\.(yml|yaml)$" |
| 130 | ) |
| 131 | _GROUP_VARS_RE = re.compile(r"(^|/)group_vars/") |
| 132 | _HOST_VARS_RE = re.compile(r"(^|/)host_vars/") |
| 133 | |
| 134 | |
| 135 | class AnsibleParser(LanguageParser): |
| 136 | """Parses Ansible YAML files into the navegador graph.""" |
| 137 | |
| 138 | def __init__(self) -> None: |
| 139 | pass # no tree-sitter parser needed |
| 140 | |
| 141 | @staticmethod |
| 142 | def is_ansible_file(path: Path, repo_root: Path | None = None) -> bool: |
| 143 | """Return True if *path* looks like an Ansible YAML file.""" |
| 144 | if path.suffix not in (".yml", ".yaml"): |
| 145 | return False |
| 146 | |
| 147 | rel = str(path) |
| 148 | if repo_root is not None: |
| 149 | try: |
| 150 | rel = str(path.relative_to(repo_root)) |
| 151 | except ValueError: |
| 152 | pass |
| 153 | |
| 154 | # Structural heuristics based on path |
| 155 | if _ROLE_TASKS_RE.search(rel): |
| 156 | return True |
| 157 | if _ROLE_HANDLERS_RE.search(rel): |
| 158 | return True |
| 159 | if _ROLE_DEFAULTS_RE.search(rel): |
| 160 | return True |
| 161 | if _ROLE_VARS_RE.search(rel): |
| 162 | return True |
| 163 | if _PLAYBOOKS_DIR_RE.search(rel): |
| 164 | return True |
| 165 | if _GROUP_VARS_RE.search(rel): |
| 166 | return True |
| 167 | if _HOST_VARS_RE.search(rel): |
| 168 | return True |
| 169 | |
| 170 | # ansible.cfg sibling in repo root |
| 171 | if repo_root is not None and (repo_root / "ansible.cfg").exists(): |
| 172 | if _COMMON_PLAYBOOK_RE.search(rel): |
| 173 | return True |
| 174 | |
| 175 | # Content-based: top-level list whose items contain "hosts:" key |
| 176 | try: |
| 177 | text = path.read_text(encoding="utf-8", errors="replace") |
| 178 | except OSError: |
| 179 | return False |
| 180 | |
| 181 | if not text.lstrip().startswith("---"ors="replace") |
| 182 | yaml.YAMLyum", |
| 183 | "ansible.xcept OSError: |
| 184 | ]: |
| 185 | rel_pand variables from Ansible YAML files into the navegador graph. |
| 186 | |
| 187 | Unlike other parsers this does NOT use tree-sitter. Ansible semantics |
| 188 | are encoded in YAML structure (dicts with well-known keys like ``hosts``, |
| 189 | ``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk |
| 190 | the resulting Python data structures directly. |
| 191 | |
| 192 | Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP. |
| 193 | """ |
| 194 | |
| 195 | import logging |
| 196 | import re |
| 197 | from pathlib import Path |
| 198 | |
| 199 | from navegador.graph.schema import EdgeType, NodeLabel |
| 200 | from navegador.graph.store impo""" |
| 201 | Ansible playbook/task parser — extracts plays, tasks, handle errors="replace") |
| 202 | (OSError, yaml.YAMLError)t) |
| 203 | except Exception as exc: |
| 204 | logger.warning("Could not parse Ansible file %s: %s", rel_path, exc) |
| 205 | return stats |
| 206 | |
| 207 | if data is None: |
| 208 | return stats |
| 209 | |
| 210 | # File node |
| 211 | store.create_node( |
| 212 | NodeLabel.File, |
| 213 | { |
| 214 | "name": path.name, |
| 215 | "path": rel_path, |
| 216 | "language": "ansible", |
| 217 | "line_count": text.count("\n"), |
| 218 | }, |
| 219 | ) |
| 220 | |
| 221 | rel_str = rel_path.replace("\\", "/") |
| 222 | |
| 223 | # Dispatch based on file type |
| 224 | if _ROLE_DEFAULTS_RE.search(rel_str) or _ROLE_VARS_RE.search(rel_str): |
| 225 | self._parse_variable_file(data, rel_path, store, stats) |
| 226 | elif _GROUP_VARS_RE.search(rel_str) or _HOST_VARS_RE.search(rel_str): |
| 227 | self._parse_variable_file(data, rel_path, store, stats) |
| 228 | elif _ROLE_HANDLERS_RE.search(rel_str): |
| 229 | self._parse_handler_file(data, rel_path, store, stats) |
| 230 | elif _ROLE_TASKS_RE.search(rel_str): |
| 231 | self._parse_task_file(data, rel_path, store, stats) |
| 232 | elif ( |
| 233 | isinstance(data, list) |
| 234 | and data |
| 235 | and any(isinstance(item, dict) and "hosts" in item for item in data) |
| 236 | ): |
| 237 | self._parse_playbook(data, rel_path, store, stats) |
| 238 | elif isinstance(data, list): |
| 239 | # Might be a task list (e.g. included task file) |
| 240 | self._parse_task_file(data, rel_path, store, stats) |
| 241 | elif isinstance(data, dict): |
| 242 | # Standalone variable file |
| 243 | self._parse_variable_file(data, rel_path, store, stats) |
| 244 | |
| 245 | return stats |
| 246 | |
| 247 | # ── Playbook parsing ───────────────────────────────────────────────────── |
| 248 | |
| 249 | def _parse_playbook( |
| 250 | self, |
| 251 | data: list, |
| 252 | file_path: str, |
| 253 | store: GraphStore, |
| 254 | stats: dict, |
| 255 | ) -> None: |
| 256 | """Parse a full playbook (list of plays).""" |
| 257 | playbook_name = Path(file_path).stem |
| 258 | |
| 259 | # Module node for the playbook file |
| 260 | store.create_node( |
| 261 | NodeLabel.Module, |
| 262 | { |
| 263 | "name": playbook_name, |
| 264 | "file_path": file_path, |
| 265 | "docstring": "", |
| 266 | "semantic_type": "ansible_playbook", |
| 267 | }, |
| 268 | ) |
| 269 | store.create_edge( |
| 270 | NodeLabel.File, |
| 271 | {"path": file_path}, |
| 272 | EdgeType.CONTAINS, |
| 273 | NodeLabel.Module, |
| 274 | {"name": playbook_name, "file_path": file_path}, |
| 275 | ) |
| 276 | stats["edges"] += 1 |
| 277 | |
| 278 | for play in data: |
| 279 | if not isinstance(play, dict): |
| 280 | continue |
| 281 | if "hosts" not in play: |
| 282 | continue |
| 283 | self._parse_play(play, file_path, playbook_name, store, stats) |
| 284 | |
| 285 | def _parse_play( |
| 286 | self, |
| 287 | play: dict, |
| 288 | file_path: str, |
| 289 | playbook_name: str, |
| 290 | store: GraphStore, |
| 291 | stats: dict, |
| 292 | ) -> None: |
| 293 | """Parse a single play dict.""" |
| 294 | play_name = play.get("name", f"play:{play.get('hosts', 'unknown')}") |
| 295 | |
| 296 | store.create_node( |
| 297 | NodeLabel.Class, |
| 298 | { |
| 299 | "name": play_name, |
| 300 | "file_path": file_path, |
| 301 | "line_start": 0, |
| 302 | "line_end": 0, |
| 303 | "docstring": f"hosts: {play.get('hosts', '')}", |
| 304 | "semantic_type": "ansible_play", |
| 305 | }, |
| 306 | ) |
| 307 | store.create_edge( |
| 308 | NodeLabel.Module, |
| 309 | {"name": playbook_name, "file_path": file_path}, |
| 310 | EdgeType.CONTAINS, |
| 311 | NodeLabel.Class, |
| 312 | {"name": play_name, "file_path": file_path}, |
| 313 | ) |
| 314 | stats["classes"] += 1 |
| 315 | stats["edges"] += 1 |
| 316 | |
| 317 | # Tasks |
| 318 | for task_dict in play.get("tasks", []) or []: |
| 319 | if isinstance(task_dict, dict): |
| 320 | self._parse_task(task_dict, file_path, play_name, store, stats) |
| 321 | |
| 322 | # Pre-tasks |
| 323 | for task_dict in play.get("pre_tasks", []) or []: |
| 324 | if isinstance(task_dict, dict): |
| 325 | self._parse_task(task_dict, file_path, play_name, store, stats) |
| 326 | |
| 327 | # Post-tasks |
| 328 | for task_dict in play.get("post_tasks", []) or []: |
| 329 | if isinstance(task_dict, dict): |
| 330 | self._parse_task(task_dict, file_path, play_name, store, stats) |
| 331 | |
| 332 | # Handlers |
| 333 | for handler_dict in play.get("handlers", []) or []: |
| 334 | if isinstance(handler_dict, dict): |
| 335 | self._parse_handler(handler_dict, file_path, play_name, store, stats) |
| 336 | |
| 337 | # Roles |
| 338 | for role in play.get("roles", []) or []: |
| 339 | self._parse_role_reference(role, file_path, play_name, store, stats) |
| 340 | |
| 341 | # Variables |
| 342 | self._parse_vars_block(play.get("vars"), file_path, play_name, store, stats) |
| 343 | |
| 344 | # ── Task parsing ───────────────────────────────────────────────────────── |
| 345 | |
| 346 | def _task_name(self, task: dict) -> str: |
| 347 | """Derive a task name from the dict.""" |
| 348 | if "name" in task and task["name"]: |
| 349 | return str(task["name"]) |
| 350 | # Fall back to module name |
| 351 | for key in task: |
| 352 | if key in _ANSIBLE_MODULES: |
| 353 | return key |
| 354 | # Last resort: first non-meta key |
| 355 | _meta_keys = { |
| 356 | "name", |
| 357 | "register", |
| 358 | "when", |
| 359 | "notify", |
| 360 | "tags", |
| 361 | "become", |
| 362 | "become_user", |
| 363 | "ignore_errors", |
| 364 | "changed_when", |
| 365 | "failed_when", |
| 366 | "loop", |
| 367 | "with_items", |
| 368 | "with_dict", |
| 369 | "with_fileglob", |
| 370 | "until", |
| 371 | "retries", |
| 372 | "delay", |
| 373 | "no_log", |
| 374 | "environment", |
| 375 | "vars", |
| 376 | "listen", |
| 377 | "delegate_to", |
| 378 | "run_once", |
| 379 | "timeout", |
| 380 | } |
| 381 | for key in task: |
| 382 | if key not in _meta_keys: |
| 383 | return key |
| 384 | return "unnamed_task" |
| 385 | |
| 386 | def _parse_task( |
| 387 | self, |
| 388 | task: dict, |
| 389 | file_path: str, |
| 390 | parent_name: str, |
| 391 | store: GraphStore, |
| 392 | stats: dict, |
| 393 | ) -> None: |
| 394 | """Parse a single task dict into a Function node.""" |
| 395 | task_name = self._task_name(task) |
| 396 | |
| 397 | store.create_node( |
| 398 | NodeLabel.Function, |
| 399 | { |
| 400 | "name": task_name, |
| 401 | "file_path": file_path, |
| 402 | "line_start": 0, |
| 403 | "line_end": 0, |
| 404 | "docstring": "", |
| 405 | "semantic_type": "ansible_task", |
| 406 | }, |
| 407 | ) |
| 408 | store.create_edge( |
| 409 | NodeLabel.Class, |
| 410 | {"name": parent_name, "file_path": file_path}, |
| 411 | EdgeType.CONTAINS, |
| 412 | NodeLabel.Function, |
| 413 | {"name": task_name, "file_path": file_path}, |
| 414 | ) |
| 415 | stats["functions"] += 1 |
| 416 | stats["edges"] += 1 |
| 417 | |
| 418 | # notify: -> CALLS edge to handler |
| 419 | notify = task.get("notify") |
| 420 | if notify: |
| 421 | if isinstance(notify, str): |
| 422 | notify = [notify] |
| 423 | for handler_name in notify: |
| 424 | store.create_edge( |
| 425 | NodeLabel.Function, |
| 426 | {"name": task_name, "file_path": file_path}, |
| 427 | EdgeType.CALLS, |
| 428 | NodeLabel.Function, |
| 429 | {"name": str(handler_name), "file_path": file_path}, |
| 430 | ) |
| 431 | stats["edges"] += 1 |
| 432 | |
| 433 | # Handle block/rescue/always |
| 434 | for block_key in ("block", "rescue", "always"): |
| 435 | block_tasks = task.get(block_key) |
| 436 | if isinstance(block_tasks, list): |
| 437 | for sub_task in block_tasks: |
| 438 | if isinstance(sub_task, dict): |
| 439 | self._parse_task(sub_task, file_path, parent_name, store, stats) |
| 440 | |
| 441 | # ── Handler parsing ────────────────────────────────────────────────────── |
| 442 | |
| 443 | def _parse_handler( |
| 444 | self, |
| 445 | handler: dict, |
| 446 | file_path: str, |
| 447 | parent_name: str, |
| 448 | store: GraphStore, |
| 449 | stats: dict, |
| 450 | ) -> None: |
| 451 | """Parse a handler dict into a Function node.""" |
| 452 | handler_name = handler.get("name", self._task_name(handler)) |
| 453 | |
| 454 | store.create_node( |
| 455 | NodeLabel.Function, |
| 456 | { |
| 457 | "name": handler_name, |
| 458 | "file_path": file_path, |
| 459 | "line_start": 0, |
| 460 | "line_end": 0, |
| 461 | "docstring": "", |
| 462 | "semantic_type": "ansible_handler", |
| 463 | }, |
| 464 | ) |
| 465 | store.create_edge( |
| 466 | NodeLabel.Class, |
| 467 | {"name": parent_name, "file_path": file_path}, |
| 468 | EdgeType.CONTAINS, |
| 469 | NodeLabel.Function, |
| 470 | {"name": handler_name, "file_path": file_path}, |
| 471 | ) |
| 472 | stats["functions"] += 1 |
| 473 | stats["edges"] += 1 |
| 474 | |
| 475 | # ── Role reference parsing ─────────────────────────────────────────────── |
| 476 | |
| 477 | def _parse_role_reference( |
| 478 | self, |
| 479 | role, |
| 480 | file_path: str, |
| 481 | play_name: str, |
| 482 | store: GraphStore, |
| 483 | stats: dict, |
| 484 | ) -> None: |
| 485 | """Parse a role reference (string or dict with 'role' key).""" |
| 486 | if isinstance(role, str): |
| 487 | role_name = role |
| 488 | elif isinstance(role, dict): |
| 489 | role_name = role.get("role") or role.get("name", "") |
| 490 | else: |
| 491 | return |
| 492 | |
| 493 | if not role_name: |
| 494 | return |
| 495 | |
| 496 | store.create_node( |
| 497 | NodeLabel.Import, |
| 498 | { |
| 499 | "name": role_name, |
| 500 | "file_path": file_path, |
| 501 | "line_start": 0, |
| 502 | "module": role_name, |
| 503 | "semantic_type": "ansible_role", |
| 504 | }, |
| 505 | ) |
| 506 | store.create_edge( |
| 507 | NodeLabel.Class, |
| 508 | {"name": play_name, "file_path": file_path}, |
| 509 | EdgeType.IMPORTS, |
| 510 | NodeLabel.Import, |
| 511 | {"name": role_name, "file_path": file_path}, |
| 512 | ) |
| 513 | stats["edges"] += 1 |
| 514 | |
| 515 | # ── Variable parsing ───────────────────────────────────────────────────── |
| 516 | |
| 517 | def _parse_vars_block( |
| 518 | self, |
| 519 | vars_data, |
| 520 | file_path: str, |
| 521 | parent_name: str, |
| 522 | store: GraphStore, |
| 523 | stats: dict, |
| 524 | ) -> None: |
| 525 | """Parse a vars: block (dict) into Variable nodes.""" |
| 526 | if not isinstance(vars_data, dict): |
| 527 | return |
| 528 | |
| 529 | for var_name, var_value in vars_data.items(): |
| 530 | store.create_node( |
| 531 | NodeLabel.Variable, |
| 532 | { |
| 533 | "name": str(var_name), |
| 534 | "file_path": file_path, |
| 535 | "line_start": 0, |
| 536 | "semantic_type": "ansible_variable", |
| 537 | }, |
| 538 | ) |
| 539 | store.create_edge( |
| 540 | NodeLabel.Class, |
| 541 | {"name": parent_name, "file_path": file_path}, |
| 542 | EdgeType.CONTAINS, |
| 543 | NodeLabel.Variable, |
| 544 | {"name": str(var_name), "file_path": file_path}, |
| 545 | ) |
| 546 | stats["edges"] += 1 |
| 547 | |
| 548 | # ── Standalone file parsers ────────────────────────────────────────────── |
| 549 | |
| 550 | def _parse_task_file( |
| 551 | self, |
| 552 | data, |
| 553 | file_path: str, |
| 554 | store: GraphStore, |
| 555 | stats: dict, |
| 556 | ) -> None: |
| 557 | """Parse a standalone task file (roles/*/tasks/main.yml or included file).""" |
| 558 | if not isinstance(data, list): |
| 559 | return |
| 560 | |
| 561 | # Use file stem as a synthetic parent class |
| 562 | parent_name = Path(file_path).stem |
| 563 | store.create_node( |
| 564 | NodeLabel.Class, |
| 565 | { |
| 566 | "name": parent_name, |
| 567 | "file_path": file_path, |
| 568 | "line_start": 0, |
| 569 | "line_end": 0, |
| 570 | "docstring": "", |
| 571 | "semantic_type": "ansible_play", |
| 572 | }, |
| 573 | ) |
| 574 | store.create_edge( |
| 575 | NodeLabel.File, |
| 576 | {"path": file_path}, |
| 577 | EdgeType.CONTAINS, |
| 578 | NodeLabel.Class, |
| 579 | {"name": parent_name, "file_path": file_path}, |
| 580 | ) |
| 581 | stats["classes"] += 1 |
| 582 | stats["edges"] += 1 |
| 583 | |
| 584 | for task_dict in data: |
| 585 | if isinstance(task_dict, dict): |
| 586 | self._parse_task(task_dict, file_path, parent_name, store, stats) |
| 587 | |
| 588 | def _parse_handler_file( |
| 589 | self, |
| 590 | data, |
| 591 | file_path: str, |
| 592 | store: GraphStore, |
| 593 | stats: dict, |
| 594 | ) -> None: |
| 595 | """Parse a standalone handler file (roles/*/handlers/main.yml).""" |
| 596 | if not isinstance(data, list): |
| 597 | return |
| 598 | |
| 599 | parent_name = Path(file_path).stem |
| 600 | store.create_node( |
| 601 | NodeLabel.Class, |
| 602 | { |
| 603 | "name": parent_name, |
| 604 | "file_path": file_path, |
| 605 | "line_start": 0, |
| 606 | "line_end": 0, |
| 607 | "docstring": "", |
| 608 | "semantic_type": "ansible_play", |
| 609 | }, |
| 610 | ) |
| 611 | store.create_edge( |
| 612 | NodeLabel.File, |
| 613 | {"path": file_path}, |
| 614 | EdgeType.CONTAINS, |
| 615 | NodeLabel.Class, |
| 616 | {"name": parent_name, "file_path": file |
| --- a/navegador/ingestion/bash.py | ||
| +++ b/navegador/ingestion/bash.py | ||
| @@ -0,0 +1,263 @@ | ||
| 1 | +""" | |
| 2 | +Bash/Shell script parser — extracts functions, top-level variables, | |
| 3 | +source/. imports, and call edges from .sh/.bash files using tree-sitter. | |
| 4 | +""" | |
| 5 | + | |
| 6 | +import logging | |
| 7 | +from pathlib import Path | |
| 8 | + | |
| 9 | +from navegador.graph.schema import EdgeType, NodeLabel | |
| 10 | +from navegador.graph.store import GraphStore | |
| 11 | +from navegador.ingestion.parser import LanguageParser | |
| 12 | + | |
| 13 | +logger = logging.getLogger(__name__) | |
| 14 | + | |
| 15 | + | |
| 16 | +def _get_bash_language(): | |
| 17 | + try: | |
| 18 | + import tree_sitter_bash as tsbash # type: ignore[import] | |
| 19 | + from tree_sitter import Language | |
| 20 | + | |
| 21 | + return Language(tsbash.language()) | |
| 22 | + except ImportError as e: | |
| 23 | + raise ImportError("Install tree-sitter-bash: pip install tree-sitter-bash") from e | |
| 24 | + | |
| 25 | + | |
| 26 | +def _node_text(node, source: bytes) -> str: | |
| 27 | + return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace") | |
| 28 | + | |
| 29 | + | |
| 30 | +class BashParser(LanguageParser): | |
| 31 | + """Parses Bash/Shell script files into the navegador graph.""" | |
| 32 | + | |
| 33 | + def __init__(self) -> None: | |
| 34 | + from tree_sitter import Parser # type: ignore[import] | |
| 35 | + | |
| 36 | + self._parser = Parser(_get_bash_language()) | |
| 37 | + | |
| 38 | + def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: | |
| 39 | + source = path.read_bytes() | |
| 40 | + tree = self._parser.parse(source) | |
| 41 | + rel_path = str(path.relative_to(repo_root)) | |
| 42 | + | |
| 43 | + store.create_node( | |
| 44 | + NodeLabel.File, | |
| 45 | + { | |
| 46 | + "name": path.name, | |
| 47 | + "path": rel_path, | |
| 48 | + "language": "bash", | |
| 49 | + "line_count": source.count(b"\n"), | |
| 50 | + }, | |
| 51 | + ) | |
| 52 | + | |
| 53 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 54 | + self._walk(tree.root_node, source, rel_path, store, stats) | |
| 55 | + return stats | |
| 56 | + | |
| 57 | + # ── AST walker ──────────────────────────────────────────────────────────── | |
| 58 | + | |
| 59 | + def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None: | |
| 60 | + if node.type == "function_definition": | |
| 61 | + self._handle_function(node, source, file_path, store, stats) | |
| 62 | + return | |
| 63 | + if node.type == "variable_assignment": | |
| 64 | + self._handle_variable(node, source, file_path, store, stats) | |
| 65 | + return | |
| 66 | + if node.type == "command": | |
| 67 | + self._handle_command(node, source, file_path, store, stats) | |
| 68 | + return | |
| 69 | + for child in node.children: | |
| 70 | + self._walk(child, source, file_path, store, stats) | |
| 71 | + | |
| 72 | + # ── Handlers ────────────────────────────────────────────────────────────── | |
| 73 | + | |
| 74 | + def _handle_function( | |
| 75 | + self, | |
| 76 | + node, | |
| 77 | + source: bytes, | |
| 78 | + file_path: str, | |
| 79 | + store: GraphStore, | |
| 80 | + stats: dict, | |
| 81 | + ) -> None: | |
| 82 | + name_node = node.child_by_field_name("name") | |
| 83 | + if not name_node: | |
| 84 | + return | |
| 85 | + name = _node_text(name_node, source) | |
| 86 | + | |
| 87 | + store.create_node( | |
| 88 | + NodeLabel.Function, | |
| 89 | + { | |
| 90 | + "name": name, | |
| 91 | + "file_path": file_path, | |
| 92 | + "line_start": node.start_point[0] + 1, | |
| 93 | + "line_end": node.end_point[0] + 1, | |
| 94 | + "docstring": "", | |
| 95 | + "semantic_type": "shell_function", | |
| 96 | + }, | |
| 97 | + ) | |
| 98 | + | |
| 99 | + store.create_edge( | |
| 100 | + NodeLabel.File, | |
| 101 | + {"path": file_path}, | |
| 102 | + EdgeType.CONTAINS, | |
| 103 | + NodeLabel.Function, | |
| 104 | + {"name": name, "file_path": file_path}, | |
| 105 | + ) | |
| 106 | + stats["functions"] += 1 | |
| 107 | + stats["edges"] += 1 | |
| 108 | + | |
| 109 | + self._extract_calls(node, source, file_path, name, store, stats) | |
| 110 | + | |
| 111 | + def _handle_variable( | |
| 112 | + self, | |
| 113 | + node, | |
| 114 | + source: bytes, | |
| 115 | + file_path: str, | |
| 116 | + store: GraphStore, | |
| 117 | + stats: dict, | |
| 118 | + ) -> None: | |
| 119 | + # Only track top-level variable assignments (parent is program) | |
| 120 | + if node.parent is None or node.parent.type not in ("program", "source_file"): | |
| 121 | + return | |
| 122 | + | |
| 123 | + name_node = node.child_by_field_name("name") | |
| 124 | + if not name_node: | |
| 125 | + return | |
| 126 | + name = _node_text(name_node, source) | |
| 127 | + | |
| 128 | + value_node = node.child_by_field_name("value") | |
| 129 | + value = _node_text(value_node, source) if value_node else "" | |
| 130 | + | |
| 131 | + store.create_node( | |
| 132 | + NodeLabel.Variable, | |
| 133 | + { | |
| 134 | + "name": name, | |
| 135 | + "file_path": file_path, | |
| 136 | + "line_start": node.start_point[0] + 1, | |
| 137 | + "line_end": node.end_point[0] + 1, | |
| 138 | + "semantic_type": "shell_variable", | |
| 139 | + "value": value, | |
| 140 | + }, | |
| 141 | + ) | |
| 142 | + | |
| 143 | + store.create_edge( | |
| 144 | + NodeLabel.File, | |
| 145 | + {"path": file_path}, | |
| 146 | + EdgeType.CONTAINS, | |
| 147 | + NodeLabel.Variable, | |
| 148 | + {"name": name, "file_path": file_path}, | |
| 149 | + ) | |
| 150 | + stats["edges"] += 1 | |
| 151 | + | |
| 152 | + def _handle_command( | |
| 153 | + self, | |
| 154 | + node, | |
| 155 | + source: bytes, | |
| 156 | + file_path: str, | |
| 157 | + store: GraphStore, | |
| 158 | + stats: dict, | |
| 159 | + ) -> None: | |
| 160 | + """Handle source/. commands as imports.""" | |
| 161 | + name_node = node.child_by_field_name("name") | |
| 162 | + if not name_node: | |
| 163 | + return | |
| 164 | + cmd_name = _node_text(name_node, source) | |
| 165 | + | |
| 166 | + # Only handle source and . (dot-source) commands | |
| 167 | + if cmd_name not in ("source", "."): | |
| 168 | + return | |
| 169 | + | |
| 170 | + # The sourced file path is the first argument | |
| 171 | + arg_types = ("word", "string", "raw_string", "concatenation") | |
| 172 | + args = [child for child in node.children if child != name_node and child.type in arg_types] | |
| 173 | + if not args: | |
| 174 | + return | |
| 175 | + sourced_path = _node_text(args[0], source).strip("'\"") | |
| 176 | + | |
| 177 | + store.create_node( | |
| 178 | + NodeLabel.Import, | |
| 179 | + { | |
| 180 | + "name": sourced_path, | |
| 181 | + "file_path": file_path, | |
| 182 | + "line_start": node.start_point[0] + 1, | |
| 183 | + "module": sourced_path, | |
| 184 | + "semantic_type": "shell_source", | |
| 185 | + }, | |
| 186 | + ) | |
| 187 | + | |
| 188 | + store.create_edge( | |
| 189 | + NodeLabel.File, | |
| 190 | + {"path": file_path}, | |
| 191 | + EdgeType.IMPORTS, | |
| 192 | + NodeLabel.Import, | |
| 193 | + {"name": sourced_path, "file_path": file_path}, | |
| 194 | + ) | |
| 195 | + stats["edges"] += 1 | |
| 196 | + | |
| 197 | + # ── Call extraction ─────────────────────────────────────────────────────── | |
| 198 | + | |
| 199 | + def _extract_calls( | |
| 200 | + self, | |
| 201 | + fn_node, | |
| 202 | + source: bytes, | |
| 203 | + file_path: str, | |
| 204 | + fn_name: str, | |
| 205 | + store: GraphStore, | |
| 206 | + stats: dict, | |
| 207 | + ) -> None: | |
| 208 | + def walk(node): | |
| 209 | + if node.type == "command": | |
| 210 | + name_node = node.child_by_field_name("name") | |
| 211 | + if name_node: | |
| 212 | + callee = _node_text(name_node, source) | |
| 213 | + # Skip builtins and source commands — only track function calls | |
| 214 | + if callee not in ( | |
| 215 | + "source", | |
| 216 | + ".", | |
| 217 | + "echo", | |
| 218 | + "printf", | |
| 219 | + "cd", | |
| 220 | + "exit", | |
| 221 | + "return", | |
| 222 | + "export", | |
| 223 | + "local", | |
| 224 | + "readonly", | |
| 225 | + "declare", | |
| 226 | + "typeset", | |
| 227 | + "unset", | |
| 228 | + "shift", | |
| 229 | + "set", | |
| 230 | + "eval", | |
| 231 | + "exec", | |
| 232 | + "test", | |
| 233 | + "[", | |
| 234 | + "[[", | |
| 235 | + "true", | |
| 236 | + "false", | |
| 237 | + ":", | |
| 238 | + "read", | |
| 239 | + "if", | |
| 240 | + "then", | |
| 241 | + "else", | |
| 242 | + "fi", | |
| 243 | + "for", | |
| 244 | + "while", | |
| 245 | + "do", | |
| 246 | + "done", | |
| 247 | + "case", | |
| 248 | + "esac", | |
| 249 | + ): | |
| 250 | + store.create_edge( | |
| 251 | + NodeLabel.Function, | |
| 252 | + {"name": fn_name, "file_path": file_path}, | |
| 253 | + EdgeType.CALLS, | |
| 254 | + NodeLabel.Function, | |
| 255 | + {"name": callee, "file_path": file_path}, | |
| 256 | + ) | |
| 257 | + stats["edges"] += 1 | |
| 258 | + for child in node.children: | |
| 259 | + walk(child) | |
| 260 | + | |
| 261 | + body = fn_node.child_by_field_name("body") | |
| 262 | + if body: | |
| 263 | + walk(body) |
| --- a/navegador/ingestion/bash.py | |
| +++ b/navegador/ingestion/bash.py | |
| @@ -0,0 +1,263 @@ | |
| --- a/navegador/ingestion/bash.py | |
| +++ b/navegador/ingestion/bash.py | |
| @@ -0,0 +1,263 @@ | |
| 1 | """ |
| 2 | Bash/Shell script parser — extracts functions, top-level variables, |
| 3 | source/. imports, and call edges from .sh/.bash files using tree-sitter. |
| 4 | """ |
| 5 | |
| 6 | import logging |
| 7 | from pathlib import Path |
| 8 | |
| 9 | from navegador.graph.schema import EdgeType, NodeLabel |
| 10 | from navegador.graph.store import GraphStore |
| 11 | from navegador.ingestion.parser import LanguageParser |
| 12 | |
| 13 | logger = logging.getLogger(__name__) |
| 14 | |
| 15 | |
| 16 | def _get_bash_language(): |
| 17 | try: |
| 18 | import tree_sitter_bash as tsbash # type: ignore[import] |
| 19 | from tree_sitter import Language |
| 20 | |
| 21 | return Language(tsbash.language()) |
| 22 | except ImportError as e: |
| 23 | raise ImportError("Install tree-sitter-bash: pip install tree-sitter-bash") from e |
| 24 | |
| 25 | |
| 26 | def _node_text(node, source: bytes) -> str: |
| 27 | return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace") |
| 28 | |
| 29 | |
| 30 | class BashParser(LanguageParser): |
| 31 | """Parses Bash/Shell script files into the navegador graph.""" |
| 32 | |
| 33 | def __init__(self) -> None: |
| 34 | from tree_sitter import Parser # type: ignore[import] |
| 35 | |
| 36 | self._parser = Parser(_get_bash_language()) |
| 37 | |
| 38 | def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: |
| 39 | source = path.read_bytes() |
| 40 | tree = self._parser.parse(source) |
| 41 | rel_path = str(path.relative_to(repo_root)) |
| 42 | |
| 43 | store.create_node( |
| 44 | NodeLabel.File, |
| 45 | { |
| 46 | "name": path.name, |
| 47 | "path": rel_path, |
| 48 | "language": "bash", |
| 49 | "line_count": source.count(b"\n"), |
| 50 | }, |
| 51 | ) |
| 52 | |
| 53 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 54 | self._walk(tree.root_node, source, rel_path, store, stats) |
| 55 | return stats |
| 56 | |
| 57 | # ── AST walker ──────────────────────────────────────────────────────────── |
| 58 | |
| 59 | def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None: |
| 60 | if node.type == "function_definition": |
| 61 | self._handle_function(node, source, file_path, store, stats) |
| 62 | return |
| 63 | if node.type == "variable_assignment": |
| 64 | self._handle_variable(node, source, file_path, store, stats) |
| 65 | return |
| 66 | if node.type == "command": |
| 67 | self._handle_command(node, source, file_path, store, stats) |
| 68 | return |
| 69 | for child in node.children: |
| 70 | self._walk(child, source, file_path, store, stats) |
| 71 | |
| 72 | # ── Handlers ────────────────────────────────────────────────────────────── |
| 73 | |
| 74 | def _handle_function( |
| 75 | self, |
| 76 | node, |
| 77 | source: bytes, |
| 78 | file_path: str, |
| 79 | store: GraphStore, |
| 80 | stats: dict, |
| 81 | ) -> None: |
| 82 | name_node = node.child_by_field_name("name") |
| 83 | if not name_node: |
| 84 | return |
| 85 | name = _node_text(name_node, source) |
| 86 | |
| 87 | store.create_node( |
| 88 | NodeLabel.Function, |
| 89 | { |
| 90 | "name": name, |
| 91 | "file_path": file_path, |
| 92 | "line_start": node.start_point[0] + 1, |
| 93 | "line_end": node.end_point[0] + 1, |
| 94 | "docstring": "", |
| 95 | "semantic_type": "shell_function", |
| 96 | }, |
| 97 | ) |
| 98 | |
| 99 | store.create_edge( |
| 100 | NodeLabel.File, |
| 101 | {"path": file_path}, |
| 102 | EdgeType.CONTAINS, |
| 103 | NodeLabel.Function, |
| 104 | {"name": name, "file_path": file_path}, |
| 105 | ) |
| 106 | stats["functions"] += 1 |
| 107 | stats["edges"] += 1 |
| 108 | |
| 109 | self._extract_calls(node, source, file_path, name, store, stats) |
| 110 | |
| 111 | def _handle_variable( |
| 112 | self, |
| 113 | node, |
| 114 | source: bytes, |
| 115 | file_path: str, |
| 116 | store: GraphStore, |
| 117 | stats: dict, |
| 118 | ) -> None: |
| 119 | # Only track top-level variable assignments (parent is program) |
| 120 | if node.parent is None or node.parent.type not in ("program", "source_file"): |
| 121 | return |
| 122 | |
| 123 | name_node = node.child_by_field_name("name") |
| 124 | if not name_node: |
| 125 | return |
| 126 | name = _node_text(name_node, source) |
| 127 | |
| 128 | value_node = node.child_by_field_name("value") |
| 129 | value = _node_text(value_node, source) if value_node else "" |
| 130 | |
| 131 | store.create_node( |
| 132 | NodeLabel.Variable, |
| 133 | { |
| 134 | "name": name, |
| 135 | "file_path": file_path, |
| 136 | "line_start": node.start_point[0] + 1, |
| 137 | "line_end": node.end_point[0] + 1, |
| 138 | "semantic_type": "shell_variable", |
| 139 | "value": value, |
| 140 | }, |
| 141 | ) |
| 142 | |
| 143 | store.create_edge( |
| 144 | NodeLabel.File, |
| 145 | {"path": file_path}, |
| 146 | EdgeType.CONTAINS, |
| 147 | NodeLabel.Variable, |
| 148 | {"name": name, "file_path": file_path}, |
| 149 | ) |
| 150 | stats["edges"] += 1 |
| 151 | |
| 152 | def _handle_command( |
| 153 | self, |
| 154 | node, |
| 155 | source: bytes, |
| 156 | file_path: str, |
| 157 | store: GraphStore, |
| 158 | stats: dict, |
| 159 | ) -> None: |
| 160 | """Handle source/. commands as imports.""" |
| 161 | name_node = node.child_by_field_name("name") |
| 162 | if not name_node: |
| 163 | return |
| 164 | cmd_name = _node_text(name_node, source) |
| 165 | |
| 166 | # Only handle source and . (dot-source) commands |
| 167 | if cmd_name not in ("source", "."): |
| 168 | return |
| 169 | |
| 170 | # The sourced file path is the first argument |
| 171 | arg_types = ("word", "string", "raw_string", "concatenation") |
| 172 | args = [child for child in node.children if child != name_node and child.type in arg_types] |
| 173 | if not args: |
| 174 | return |
| 175 | sourced_path = _node_text(args[0], source).strip("'\"") |
| 176 | |
| 177 | store.create_node( |
| 178 | NodeLabel.Import, |
| 179 | { |
| 180 | "name": sourced_path, |
| 181 | "file_path": file_path, |
| 182 | "line_start": node.start_point[0] + 1, |
| 183 | "module": sourced_path, |
| 184 | "semantic_type": "shell_source", |
| 185 | }, |
| 186 | ) |
| 187 | |
| 188 | store.create_edge( |
| 189 | NodeLabel.File, |
| 190 | {"path": file_path}, |
| 191 | EdgeType.IMPORTS, |
| 192 | NodeLabel.Import, |
| 193 | {"name": sourced_path, "file_path": file_path}, |
| 194 | ) |
| 195 | stats["edges"] += 1 |
| 196 | |
| 197 | # ── Call extraction ─────────────────────────────────────────────────────── |
| 198 | |
| 199 | def _extract_calls( |
| 200 | self, |
| 201 | fn_node, |
| 202 | source: bytes, |
| 203 | file_path: str, |
| 204 | fn_name: str, |
| 205 | store: GraphStore, |
| 206 | stats: dict, |
| 207 | ) -> None: |
| 208 | def walk(node): |
| 209 | if node.type == "command": |
| 210 | name_node = node.child_by_field_name("name") |
| 211 | if name_node: |
| 212 | callee = _node_text(name_node, source) |
| 213 | # Skip builtins and source commands — only track function calls |
| 214 | if callee not in ( |
| 215 | "source", |
| 216 | ".", |
| 217 | "echo", |
| 218 | "printf", |
| 219 | "cd", |
| 220 | "exit", |
| 221 | "return", |
| 222 | "export", |
| 223 | "local", |
| 224 | "readonly", |
| 225 | "declare", |
| 226 | "typeset", |
| 227 | "unset", |
| 228 | "shift", |
| 229 | "set", |
| 230 | "eval", |
| 231 | "exec", |
| 232 | "test", |
| 233 | "[", |
| 234 | "[[", |
| 235 | "true", |
| 236 | "false", |
| 237 | ":", |
| 238 | "read", |
| 239 | "if", |
| 240 | "then", |
| 241 | "else", |
| 242 | "fi", |
| 243 | "for", |
| 244 | "while", |
| 245 | "do", |
| 246 | "done", |
| 247 | "case", |
| 248 | "esac", |
| 249 | ): |
| 250 | store.create_edge( |
| 251 | NodeLabel.Function, |
| 252 | {"name": fn_name, "file_path": file_path}, |
| 253 | EdgeType.CALLS, |
| 254 | NodeLabel.Function, |
| 255 | {"name": callee, "file_path": file_path}, |
| 256 | ) |
| 257 | stats["edges"] += 1 |
| 258 | for child in node.children: |
| 259 | walk(child) |
| 260 | |
| 261 | body = fn_node.child_by_field_name("body") |
| 262 | if body: |
| 263 | walk(body) |
| --- a/navegador/ingestion/hcl.py | ||
| +++ b/navegador/ingestion/hcl.py | ||
| @@ -0,0 +1,490 @@ | ||
| 1 | +""" | |
| 2 | +HCL/Terraform parser — extracts resources, data sources, providers, | |
| 3 | +variables, outputs, modules, and locals from .tf files using tree-sitter. | |
| 4 | +""" | |
| 5 | + | |
| 6 | +import logging | |
| 7 | +import re | |
| 8 | +from pathlib import Path | |
| 9 | + | |
| 10 | +from navegador.graph.schema import EdgeType, NodeLabel | |
| 11 | +from navegador.graph.store import GraphStore | |
| 12 | +from navegador.ingestion.parser import LanguageParser | |
| 13 | + | |
| 14 | +logger = logging.getLogger(__name__) | |
| 15 | + | |
| 16 | +# Patterns for reference extraction from expression text | |
| 17 | +_VAR_REF = re.compile(r"\bvar\.(\w+)") | |
| 18 | +_LOCAL_REF = re.compile(r"\blocal\.(\w+)") | |
| 19 | +_MODULE_REF = re.compile(r"\bmodule\.(\w+)") | |
| 20 | +_DATA_REF = re.compile(r"\bdata\.(\w+)\.(\w+)") | |
| 21 | +_RESOURCE_REF = re.compile( | |
| 22 | + r"(?<!\bdata\.)" # exclude data.resource_type references (handled by _DATA_REF) | |
| 23 | + r"\b(aws_\w+|google_\w+|azurerm_\w+|azuread_\w+|oci_\w+|digitalocean_\w+" | |
| 24 | + r"|cloudflare_\w+|helm_\w+|kubernetes_\w+|null_\w+|random_\w+" | |
| 25 | + r"|local_\w+|tls_\w+|template_\w+|archive_\w+|external_\w+)\.(\w+)" | |
| 26 | +) | |
| 27 | + | |
| 28 | + | |
| 29 | +def _get_hcl_language(): | |
| 30 | + try: | |
| 31 | + import tree_sitter_hcl as tshcl # type: ignore[import] | |
| 32 | + from tree_sitter import Language | |
| 33 | + | |
| 34 | + return Language(tshcl.language()) | |
| 35 | + except ImportError as e: | |
| 36 | + raise ImportError("Install tree-sitter-hcl: pip install tree-sitter-hcl") from e | |
| 37 | + | |
| 38 | + | |
| 39 | +def _node_text(node, source: bytes) -> str: | |
| 40 | + return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace") | |
| 41 | + | |
| 42 | + | |
| 43 | +def _string_lit_text(node, source: bytes) -> str: | |
| 44 | + """Extract the inner text from a string_lit node (strips quotes).""" | |
| 45 | + for child in node.children: | |
| 46 | + if child.type == "template_literal": | |
| 47 | + return _node_text(child, source) | |
| 48 | + # Fallback: strip surrounding quotes from the full text | |
| 49 | + text = _node_text(node, source) | |
| 50 | + return text.strip('"').strip("'") | |
| 51 | + | |
| 52 | + | |
| 53 | +class HCLParser(LanguageParser): | |
| 54 | + """Parses HCL/Terraform files into the navegador graph.""" | |
| 55 | + | |
| 56 | + def __init__(self) -> None: | |
| 57 | + from tree_sitter import Parser # type: ignore[import] | |
| 58 | + | |
| 59 | + self._parser = Parser(_get_hcl_language()) | |
| 60 | + | |
| 61 | + def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: | |
| 62 | + source = path.read_bytes() | |
| 63 | + tree = self._parser.parse(source) | |
| 64 | + rel_path = str(path.relative_to(repo_root)) | |
| 65 | + | |
| 66 | + store.create_node( | |
| 67 | + NodeLabel.File, | |
| 68 | + { | |
| 69 | + "name": path.name, | |
| 70 | + "path": rel_path, | |
| 71 | + "language": "hcl", | |
| 72 | + "line_count": source.count(b"\n"), | |
| 73 | + }, | |
| 74 | + ) | |
| 75 | + | |
| 76 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 77 | + self._walk(tree.root_node, source, rel_path, store, stats) | |
| 78 | + return stats | |
| 79 | + | |
| 80 | + # ── AST walker ──────────────────────────────────────────────────────────── | |
| 81 | + | |
| 82 | + def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None: | |
| 83 | + """Walk the top-level body looking for block nodes.""" | |
| 84 | + for child in node.children: | |
| 85 | + if child.type == "body": | |
| 86 | + for body_child in child.children: | |
| 87 | + if body_child.type == "block": | |
| 88 | + self._handle_block(body_child, source, file_path, store, stats) | |
| 89 | + elif child.type == "block": | |
| 90 | + self._handle_block(child, source, file_path, store, stats) | |
| 91 | + | |
| 92 | + def _handle_block( | |
| 93 | + self, node, source: bytes, file_path: str, store: GraphStore, stats: dict | |
| 94 | + ) -> None: | |
| 95 | + """Dispatch a block based on its block-type identifier.""" | |
| 96 | + block_type = None | |
| 97 | + labels: list[str] = [] | |
| 98 | + body_node = None | |
| 99 | + | |
| 100 | + for child in node.children: | |
| 101 | + if child.type == "identifier" and block_type is None: | |
| 102 | + block_type = _node_text(child, source) | |
| 103 | + elif child.type == "string_lit": | |
| 104 | + labels.append(_string_lit_text(child, source)) | |
| 105 | + elif child.type == "body": | |
| 106 | + body_node = child | |
| 107 | + | |
| 108 | + if not block_type: | |
| 109 | + return | |
| 110 | + | |
| 111 | + if block_type == "resource" and len(labels) >= 2: | |
| 112 | + self._handle_resource(node, source, file_path, store, stats, labels, body_node) | |
| 113 | + elif block_type == "data" and len(labels) >= 2: | |
| 114 | + self._handle_data(node, source, file_path, store, stats, labels, body_node) | |
| 115 | + elif block_type == "provider" and len(labels) >= 1: | |
| 116 | + self._handle_provider(node, source, file_path, store, stats, labels, body_node) | |
| 117 | + elif block_type == "variable" and len(labels) >= 1: | |
| 118 | + self._handle_variable(node, source, file_path, store, stats, labels, body_node) | |
| 119 | + elif block_type == "output" and len(labels) >= 1: | |
| 120 | + self._handle_output(node, source, file_path, store, stats, labels, body_node) | |
| 121 | + elif block_type == "module" and len(labels) >= 1: | |
| 122 | + self._handle_module(node, source, file_path, store, stats, labels, body_node) | |
| 123 | + elif block_type == "locals": | |
| 124 | + self._handle_locals(node, source, file_path, store, stats, body_node) | |
| 125 | + elif block_type == "terraform": | |
| 126 | + pass # Configuration block, skip | |
| 127 | + else: | |
| 128 | + logger.debug("Skipping unknown HCL block type: %s", block_type) | |
| 129 | + | |
| 130 | + # ── Handlers ────────────────────────────────────────────────────────────── | |
| 131 | + | |
| 132 | + def _handle_resource( | |
| 133 | + self, | |
| 134 | + node, | |
| 135 | + source: bytes, | |
| 136 | + file_path: str, | |
| 137 | + store: GraphStore, | |
| 138 | + stats: dict, | |
| 139 | + labels: list[str], | |
| 140 | + body_node, | |
| 141 | + ) -> None: | |
| 142 | + name = f"{labels[0]}.{labels[1]}" | |
| 143 | + store.create_node( | |
| 144 | + NodeLabel.Class, | |
| 145 | + { | |
| 146 | + "name": name, | |
| 147 | + "file_path": file_path, | |
| 148 | + "line_start": node.start_point[0] + 1, | |
| 149 | + "line_end": node.end_point[0] + 1, | |
| 150 | + "docstring": "", | |
| 151 | + "semantic_type": "terraform_resource", | |
| 152 | + }, | |
| 153 | + ) | |
| 154 | + store.create_edge( | |
| 155 | + NodeLabel.File, | |
| 156 | + {"path": file_path}, | |
| 157 | + EdgeType.CONTAINS, | |
| 158 | + NodeLabel.Class, | |
| 159 | + {"name": name, "file_path": file_path}, | |
| 160 | + ) | |
| 161 | + stats["classes"] += 1 | |
| 162 | + stats["edges"] += 1 | |
| 163 | + | |
| 164 | + if body_node: | |
| 165 | + self._extract_references( | |
| 166 | + body_node, source, file_path, name, NodeLabel.Class, store, stats | |
| 167 | + ) | |
| 168 | + | |
| 169 | + def _handle_data( | |
| 170 | + self, | |
| 171 | + node, | |
| 172 | + source: bytes, | |
| 173 | + file_path: str, | |
| 174 | + store: GraphStore, | |
| 175 | + stats: dict, | |
| 176 | + labels: list[str], | |
| 177 | + body_node, | |
| 178 | + ) -> None: | |
| 179 | + name = f"{labels[0]}.{labels[1]}" | |
| 180 | + store.create_node( | |
| 181 | + NodeLabel.Class, | |
| 182 | + { | |
| 183 | + "name": name, | |
| 184 | + "file_path": file_path, | |
| 185 | + "line_start": node.start_point[0] + 1, | |
| 186 | + "line_end": node.end_point[0] + 1, | |
| 187 | + "docstring": "", | |
| 188 | + "semantic_type": "terraform_data", | |
| 189 | + }, | |
| 190 | + ) | |
| 191 | + store.create_edge( | |
| 192 | + NodeLabel.File, | |
| 193 | + {"path": file_path}, | |
| 194 | + EdgeType.CONTAINS, | |
| 195 | + NodeLabel.Class, | |
| 196 | + {"name": name, "file_path": file_path}, | |
| 197 | + ) | |
| 198 | + stats["classes"] += 1 | |
| 199 | + stats["edges"] += 1 | |
| 200 | + | |
| 201 | + if body_node: | |
| 202 | + self._extract_references( | |
| 203 | + body_node, source, file_path, name, NodeLabel.Class, store, stats | |
| 204 | + ) | |
| 205 | + | |
| 206 | + def _handle_provider( | |
| 207 | + self, | |
| 208 | + node, | |
| 209 | + source: bytes, | |
| 210 | + file_path: str, | |
| 211 | + store: GraphStore, | |
| 212 | + stats: dict, | |
| 213 | + labels: list[str], | |
| 214 | + body_node, | |
| 215 | + ) -> None: | |
| 216 | + name = labels[0] | |
| 217 | + store.create_node( | |
| 218 | + NodeLabel.Class, | |
| 219 | + { | |
| 220 | + "name": name, | |
| 221 | + "file_path": file_path, | |
| 222 | + "line_start": node.start_point[0] + 1, | |
| 223 | + "line_end": node.end_point[0] + 1, | |
| 224 | + "docstring": "", | |
| 225 | + "semantic_type": "terraform_provider", | |
| 226 | + }, | |
| 227 | + ) | |
| 228 | + store.create_edge( | |
| 229 | + NodeLabel.File, | |
| 230 | + {"path": file_path}, | |
| 231 | + EdgeType.CONTAINS, | |
| 232 | + NodeLabel.Class, | |
| 233 | + {"name": name, "file_path": file_path}, | |
| 234 | + ) | |
| 235 | + stats["classes"] += 1 | |
| 236 | + stats["edges"] += 1 | |
| 237 | + | |
| 238 | + if body_node: | |
| 239 | + self._extract_references( | |
| 240 | + body_node, source, file_path, name, NodeLabel.Class, store, stats | |
| 241 | + ) | |
| 242 | + | |
| 243 | + def _handle_variable( | |
| 244 | + self, | |
| 245 | + node, | |
| 246 | + source: bytes, | |
| 247 | + file_path: str, | |
| 248 | + store: GraphStore, | |
| 249 | + stats: dict, | |
| 250 | + labels: list[str], | |
| 251 | + body_node, | |
| 252 | + ) -> None: | |
| 253 | + name = labels[0] | |
| 254 | + store.create_node( | |
| 255 | + NodeLabel.Variable, | |
| 256 | + { | |
| 257 | + "name": name, | |
| 258 | + "file_path": file_path, | |
| 259 | + "line_start": node.start_point[0] + 1, | |
| 260 | + "line_end": node.end_point[0] + 1, | |
| 261 | + "semantic_type": "terraform_variable", | |
| 262 | + }, | |
| 263 | + ) | |
| 264 | + store.create_edge( | |
| 265 | + NodeLabel.File, | |
| 266 | + {"path": file_path}, | |
| 267 | + EdgeType.CONTAINS, | |
| 268 | + NodeLabel.Variable, | |
| 269 | + {"name": name, "file_path": file_path}, | |
| 270 | + ) | |
| 271 | + stats["functions"] += 1 | |
| 272 | + stats["edges"] += 1 | |
| 273 | + | |
| 274 | + def _handle_output( | |
| 275 | + self, | |
| 276 | + node, | |
| 277 | + source: bytes, | |
| 278 | + file_path: str, | |
| 279 | + store: GraphStore, | |
| 280 | + stats: dict, | |
| 281 | + labels: list[str], | |
| 282 | + body_node, | |
| 283 | + ) -> None: | |
| 284 | + name = labels[0] | |
| 285 | + store.create_node( | |
| 286 | + NodeLabel.Variable, | |
| 287 | + { | |
| 288 | + "name": name, | |
| 289 | + "file_path": file_path, | |
| 290 | + "line_start": node.start_point[0] + 1, | |
| 291 | + "line_end": node.end_point[0] + 1, | |
| 292 | + "semantic_type": "terraform_output", | |
| 293 | + }, | |
| 294 | + ) | |
| 295 | + store.create_edge( | |
| 296 | + NodeLabel.File, | |
| 297 | + {"path": file_path}, | |
| 298 | + EdgeType.CONTAINS, | |
| 299 | + NodeLabel.Variable, | |
| 300 | + {"name": name, "file_path": file_path}, | |
| 301 | + ) | |
| 302 | + stats["functions"] += 1 | |
| 303 | + stats["edges"] += 1 | |
| 304 | + | |
| 305 | + if body_node: | |
| 306 | + self._extract_references( | |
| 307 | + body_node, source, file_path, name, NodeLabel.Variable, store, stats | |
| 308 | + ) | |
| 309 | + | |
| 310 | + def _handle_module( | |
| 311 | + self, | |
| 312 | + node, | |
| 313 | + source: bytes, | |
| 314 | + file_path: str, | |
| 315 | + store: GraphStore, | |
| 316 | + stats: dict, | |
| 317 | + labels: list[str], | |
| 318 | + body_node, | |
| 319 | + ) -> None: | |
| 320 | + name = labels[0] | |
| 321 | + source_attr = "" | |
| 322 | + if body_node: | |
| 323 | + source_attr = self._get_attribute_value(body_node, "source", source) | |
| 324 | + | |
| 325 | + store.create_node( | |
| 326 | + NodeLabel.Module, | |
| 327 | + { | |
| 328 | + "name": name, | |
| 329 | + "file_path": file_path, | |
| 330 | + "line_start": node.start_point[0] + 1, | |
| 331 | + "line_end": node.end_point[0] + 1, | |
| 332 | + "semantic_type": "terraform_module", | |
| 333 | + "source": source_attr, | |
| 334 | + }, | |
| 335 | + ) | |
| 336 | + store.create_edge( | |
| 337 | + NodeLabel.File, | |
| 338 | + {"path": file_path}, | |
| 339 | + EdgeType.CONTAINS, | |
| 340 | + NodeLabel.Module, | |
| 341 | + {"name": name, "file_path": file_path}, | |
| 342 | + ) | |
| 343 | + stats["classes"] += 1 | |
| 344 | + stats["edges"] += 1 | |
| 345 | + | |
| 346 | + if body_node: | |
| 347 | + self._extract_references( | |
| 348 | + body_node, source, file_path, name, NodeLabel.Module, store, stats | |
| 349 | + ) | |
| 350 | + | |
| 351 | + def _handle_locals( | |
| 352 | + self, | |
| 353 | + node, | |
| 354 | + source: bytes, | |
| 355 | + file_path: str, | |
| 356 | + store: GraphStore, | |
| 357 | + stats: dict, | |
| 358 | + body_node, | |
| 359 | + ) -> None: | |
| 360 | + if not body_node: | |
| 361 | + return | |
| 362 | + | |
| 363 | + for child in body_node.children: | |
| 364 | + if child.type == "attribute": | |
| 365 | + attr_name = None | |
| 366 | + for attr_child in child.children: | |
| 367 | + if attr_child.type == "identifier": | |
| 368 | + attr_name = _node_text(attr_child, source) | |
| 369 | + break | |
| 370 | + | |
| 371 | + if not attr_name: | |
| 372 | + continue | |
| 373 | + | |
| 374 | + store.create_node( | |
| 375 | + NodeLabel.Variable, | |
| 376 | + { | |
| 377 | + "name": attr_name, | |
| 378 | + "file_path": file_path, | |
| 379 | + "line_start": child.start_point[0] + 1, | |
| 380 | + "line_end": child.end_point[0] + 1, | |
| 381 | + "semantic_type": "terraform_local", | |
| 382 | + }, | |
| 383 | + ) | |
| 384 | + store.create_edge( | |
| 385 | + NodeLabel.File, | |
| 386 | + {"path": file_path}, | |
| 387 | + EdgeType.CONTAINS, | |
| 388 | + NodeLabel.Variable, | |
| 389 | + {"name": attr_name, "file_path": file_path}, | |
| 390 | + ) | |
| 391 | + stats["functions"] += 1 | |
| 392 | + stats["edges"] += 1 | |
| 393 | + | |
| 394 | + # Extract references from the attribute expression | |
| 395 | + self._extract_references( | |
| 396 | + child, source, file_path, attr_name, NodeLabel.Variable, store, stats | |
| 397 | + ) | |
| 398 | + | |
| 399 | + # ── Reference extraction ────────────────────────────────────────────────── | |
| 400 | + | |
| 401 | + def _extract_references( | |
| 402 | + self, | |
| 403 | + node, | |
| 404 | + source: bytes, | |
| 405 | + file_path: str, | |
| 406 | + from_name: str, | |
| 407 | + from_label: str, | |
| 408 | + store: GraphStore, | |
| 409 | + stats: dict, | |
| 410 | + ) -> None: | |
| 411 | + """Scan expression text for var.X, local.X, module.X, data.T.N, and resource references.""" | |
| 412 | + text = _node_text(node, source) | |
| 413 | + | |
| 414 | + # var.xxx → REFERENCES edge to terraform_variable | |
| 415 | + for match in _VAR_REF.finditer(text): | |
| 416 | + var_name = match.group(1) | |
| 417 | + store.create_edge( | |
| 418 | + from_label, | |
| 419 | + {"name": from_name, "file_path": file_path}, | |
| 420 | + EdgeType.REFERENCES, | |
| 421 | + NodeLabel.Variable, | |
| 422 | + {"name": var_name, "file_path": file_path}, | |
| 423 | + ) | |
| 424 | + stats["edges"] += 1 | |
| 425 | + | |
| 426 | + # local.xxx → REFERENCES edge to terraform_local | |
| 427 | + for match in _LOCAL_REF.finditer(text): | |
| 428 | + local_name = match.group(1) | |
| 429 | + store.create_edge( | |
| 430 | + from_label, | |
| 431 | + {"name": from_name, "file_path": file_path}, | |
| 432 | + EdgeType.REFERENCES, | |
| 433 | + NodeLabel.Variable, | |
| 434 | + {"name": local_name, "file_path": file_path}, | |
| 435 | + ) | |
| 436 | + stats["edges"] += 1 | |
| 437 | + | |
| 438 | + # module.xxx → REFERENCES edge to terraform_module | |
| 439 | + for match in _MODULE_REF.finditer(text): | |
| 440 | + mod_name = match.group(1) | |
| 441 | + store.create_edge( | |
| 442 | + from_label, | |
| 443 | + {"name": from_name, "file_path": file_path}, | |
| 444 | + EdgeType.REFERENCES, | |
| 445 | + NodeLabel.Module, | |
| 446 | + {"name": mod_name, "file_path": file_path}, | |
| 447 | + ) | |
| 448 | + stats["edges"] += 1 | |
| 449 | + | |
| 450 | + # data.type.name → DEPENDS_ON edge to terraform_data | |
| 451 | + for match in _DATA_REF.finditer(text): | |
| 452 | + data_name = f"{match.group(1)}.{match.group(2)}" | |
| 453 | + store.create_edge( | |
| 454 | + from_label, | |
| 455 | + {"name": from_name, "file_path": file_path}, | |
| 456 | + EdgeType.DEPENDS_ON, | |
| 457 | + NodeLabel.Class, | |
| 458 | + {"name": data_name, "file_path": file_path}, | |
| 459 | + ) | |
| 460 | + stats["edges"] += 1 | |
| 461 | + | |
| 462 | + # resource_type.resource_name → DEPENDS_ON edge to terraform_resource | |
| 463 | + for match in _RESOURCE_REF.finditer(text): | |
| 464 | + resource_name = f"{match.group(1)}.{match.group(2)}" | |
| 465 | + store.create_edge( | |
| 466 | + from_label, | |
| 467 | + {"name": from_name, "file_path": file_path}, | |
| 468 | + EdgeType.DEPENDS_ON, | |
| 469 | + NodeLabel.Class, | |
| 470 | + {"name": resource_name, "file_path": file_path}, | |
| 471 | + ) | |
| 472 | + stats["edges"] += 1 | |
| 473 | + | |
| 474 | + # ── Helpers ─────────────────────────────────────────────────────────────── | |
| 475 | + | |
| 476 | + def _get_attribute_value(self, body_node, attr_name: str, source: bytes) -> str: | |
| 477 | + """Extract the string value of a named attribute from a body node.""" | |
| 478 | + for child in body_node.children: | |
| 479 | + if child.type == "attribute": | |
| 480 | + ident = None | |
| 481 | + expr = None | |
| 482 | + for attr_child in child.children: | |
| 483 | + if attr_child.type == "identifier": | |
| 484 | + ident = _node_text(attr_child, source) | |
| 485 | + elif attr_child.type == "expression" or attr_child.is_named: | |
| 486 | + expr = attr_child | |
| 487 | + if ident == attr_name and expr is not None: | |
| 488 | + text = _node_text(expr, source).strip().strip('"').strip("'") | |
| 489 | + return text | |
| 490 | + return "" |
| --- a/navegador/ingestion/hcl.py | |
| +++ b/navegador/ingestion/hcl.py | |
| @@ -0,0 +1,490 @@ | |
| --- a/navegador/ingestion/hcl.py | |
| +++ b/navegador/ingestion/hcl.py | |
| @@ -0,0 +1,490 @@ | |
| 1 | """ |
| 2 | HCL/Terraform parser — extracts resources, data sources, providers, |
| 3 | variables, outputs, modules, and locals from .tf files using tree-sitter. |
| 4 | """ |
| 5 | |
| 6 | import logging |
| 7 | import re |
| 8 | from pathlib import Path |
| 9 | |
| 10 | from navegador.graph.schema import EdgeType, NodeLabel |
| 11 | from navegador.graph.store import GraphStore |
| 12 | from navegador.ingestion.parser import LanguageParser |
| 13 | |
| 14 | logger = logging.getLogger(__name__) |
| 15 | |
| 16 | # Patterns for reference extraction from expression text |
| 17 | _VAR_REF = re.compile(r"\bvar\.(\w+)") |
| 18 | _LOCAL_REF = re.compile(r"\blocal\.(\w+)") |
| 19 | _MODULE_REF = re.compile(r"\bmodule\.(\w+)") |
| 20 | _DATA_REF = re.compile(r"\bdata\.(\w+)\.(\w+)") |
| 21 | _RESOURCE_REF = re.compile( |
| 22 | r"(?<!\bdata\.)" # exclude data.resource_type references (handled by _DATA_REF) |
| 23 | r"\b(aws_\w+|google_\w+|azurerm_\w+|azuread_\w+|oci_\w+|digitalocean_\w+" |
| 24 | r"|cloudflare_\w+|helm_\w+|kubernetes_\w+|null_\w+|random_\w+" |
| 25 | r"|local_\w+|tls_\w+|template_\w+|archive_\w+|external_\w+)\.(\w+)" |
| 26 | ) |
| 27 | |
| 28 | |
| 29 | def _get_hcl_language(): |
| 30 | try: |
| 31 | import tree_sitter_hcl as tshcl # type: ignore[import] |
| 32 | from tree_sitter import Language |
| 33 | |
| 34 | return Language(tshcl.language()) |
| 35 | except ImportError as e: |
| 36 | raise ImportError("Install tree-sitter-hcl: pip install tree-sitter-hcl") from e |
| 37 | |
| 38 | |
| 39 | def _node_text(node, source: bytes) -> str: |
| 40 | return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace") |
| 41 | |
| 42 | |
| 43 | def _string_lit_text(node, source: bytes) -> str: |
| 44 | """Extract the inner text from a string_lit node (strips quotes).""" |
| 45 | for child in node.children: |
| 46 | if child.type == "template_literal": |
| 47 | return _node_text(child, source) |
| 48 | # Fallback: strip surrounding quotes from the full text |
| 49 | text = _node_text(node, source) |
| 50 | return text.strip('"').strip("'") |
| 51 | |
| 52 | |
| 53 | class HCLParser(LanguageParser): |
| 54 | """Parses HCL/Terraform files into the navegador graph.""" |
| 55 | |
| 56 | def __init__(self) -> None: |
| 57 | from tree_sitter import Parser # type: ignore[import] |
| 58 | |
| 59 | self._parser = Parser(_get_hcl_language()) |
| 60 | |
| 61 | def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: |
| 62 | source = path.read_bytes() |
| 63 | tree = self._parser.parse(source) |
| 64 | rel_path = str(path.relative_to(repo_root)) |
| 65 | |
| 66 | store.create_node( |
| 67 | NodeLabel.File, |
| 68 | { |
| 69 | "name": path.name, |
| 70 | "path": rel_path, |
| 71 | "language": "hcl", |
| 72 | "line_count": source.count(b"\n"), |
| 73 | }, |
| 74 | ) |
| 75 | |
| 76 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 77 | self._walk(tree.root_node, source, rel_path, store, stats) |
| 78 | return stats |
| 79 | |
| 80 | # ── AST walker ──────────────────────────────────────────────────────────── |
| 81 | |
| 82 | def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None: |
| 83 | """Walk the top-level body looking for block nodes.""" |
| 84 | for child in node.children: |
| 85 | if child.type == "body": |
| 86 | for body_child in child.children: |
| 87 | if body_child.type == "block": |
| 88 | self._handle_block(body_child, source, file_path, store, stats) |
| 89 | elif child.type == "block": |
| 90 | self._handle_block(child, source, file_path, store, stats) |
| 91 | |
| 92 | def _handle_block( |
| 93 | self, node, source: bytes, file_path: str, store: GraphStore, stats: dict |
| 94 | ) -> None: |
| 95 | """Dispatch a block based on its block-type identifier.""" |
| 96 | block_type = None |
| 97 | labels: list[str] = [] |
| 98 | body_node = None |
| 99 | |
| 100 | for child in node.children: |
| 101 | if child.type == "identifier" and block_type is None: |
| 102 | block_type = _node_text(child, source) |
| 103 | elif child.type == "string_lit": |
| 104 | labels.append(_string_lit_text(child, source)) |
| 105 | elif child.type == "body": |
| 106 | body_node = child |
| 107 | |
| 108 | if not block_type: |
| 109 | return |
| 110 | |
| 111 | if block_type == "resource" and len(labels) >= 2: |
| 112 | self._handle_resource(node, source, file_path, store, stats, labels, body_node) |
| 113 | elif block_type == "data" and len(labels) >= 2: |
| 114 | self._handle_data(node, source, file_path, store, stats, labels, body_node) |
| 115 | elif block_type == "provider" and len(labels) >= 1: |
| 116 | self._handle_provider(node, source, file_path, store, stats, labels, body_node) |
| 117 | elif block_type == "variable" and len(labels) >= 1: |
| 118 | self._handle_variable(node, source, file_path, store, stats, labels, body_node) |
| 119 | elif block_type == "output" and len(labels) >= 1: |
| 120 | self._handle_output(node, source, file_path, store, stats, labels, body_node) |
| 121 | elif block_type == "module" and len(labels) >= 1: |
| 122 | self._handle_module(node, source, file_path, store, stats, labels, body_node) |
| 123 | elif block_type == "locals": |
| 124 | self._handle_locals(node, source, file_path, store, stats, body_node) |
| 125 | elif block_type == "terraform": |
| 126 | pass # Configuration block, skip |
| 127 | else: |
| 128 | logger.debug("Skipping unknown HCL block type: %s", block_type) |
| 129 | |
| 130 | # ── Handlers ────────────────────────────────────────────────────────────── |
| 131 | |
| 132 | def _handle_resource( |
| 133 | self, |
| 134 | node, |
| 135 | source: bytes, |
| 136 | file_path: str, |
| 137 | store: GraphStore, |
| 138 | stats: dict, |
| 139 | labels: list[str], |
| 140 | body_node, |
| 141 | ) -> None: |
| 142 | name = f"{labels[0]}.{labels[1]}" |
| 143 | store.create_node( |
| 144 | NodeLabel.Class, |
| 145 | { |
| 146 | "name": name, |
| 147 | "file_path": file_path, |
| 148 | "line_start": node.start_point[0] + 1, |
| 149 | "line_end": node.end_point[0] + 1, |
| 150 | "docstring": "", |
| 151 | "semantic_type": "terraform_resource", |
| 152 | }, |
| 153 | ) |
| 154 | store.create_edge( |
| 155 | NodeLabel.File, |
| 156 | {"path": file_path}, |
| 157 | EdgeType.CONTAINS, |
| 158 | NodeLabel.Class, |
| 159 | {"name": name, "file_path": file_path}, |
| 160 | ) |
| 161 | stats["classes"] += 1 |
| 162 | stats["edges"] += 1 |
| 163 | |
| 164 | if body_node: |
| 165 | self._extract_references( |
| 166 | body_node, source, file_path, name, NodeLabel.Class, store, stats |
| 167 | ) |
| 168 | |
| 169 | def _handle_data( |
| 170 | self, |
| 171 | node, |
| 172 | source: bytes, |
| 173 | file_path: str, |
| 174 | store: GraphStore, |
| 175 | stats: dict, |
| 176 | labels: list[str], |
| 177 | body_node, |
| 178 | ) -> None: |
| 179 | name = f"{labels[0]}.{labels[1]}" |
| 180 | store.create_node( |
| 181 | NodeLabel.Class, |
| 182 | { |
| 183 | "name": name, |
| 184 | "file_path": file_path, |
| 185 | "line_start": node.start_point[0] + 1, |
| 186 | "line_end": node.end_point[0] + 1, |
| 187 | "docstring": "", |
| 188 | "semantic_type": "terraform_data", |
| 189 | }, |
| 190 | ) |
| 191 | store.create_edge( |
| 192 | NodeLabel.File, |
| 193 | {"path": file_path}, |
| 194 | EdgeType.CONTAINS, |
| 195 | NodeLabel.Class, |
| 196 | {"name": name, "file_path": file_path}, |
| 197 | ) |
| 198 | stats["classes"] += 1 |
| 199 | stats["edges"] += 1 |
| 200 | |
| 201 | if body_node: |
| 202 | self._extract_references( |
| 203 | body_node, source, file_path, name, NodeLabel.Class, store, stats |
| 204 | ) |
| 205 | |
| 206 | def _handle_provider( |
| 207 | self, |
| 208 | node, |
| 209 | source: bytes, |
| 210 | file_path: str, |
| 211 | store: GraphStore, |
| 212 | stats: dict, |
| 213 | labels: list[str], |
| 214 | body_node, |
| 215 | ) -> None: |
| 216 | name = labels[0] |
| 217 | store.create_node( |
| 218 | NodeLabel.Class, |
| 219 | { |
| 220 | "name": name, |
| 221 | "file_path": file_path, |
| 222 | "line_start": node.start_point[0] + 1, |
| 223 | "line_end": node.end_point[0] + 1, |
| 224 | "docstring": "", |
| 225 | "semantic_type": "terraform_provider", |
| 226 | }, |
| 227 | ) |
| 228 | store.create_edge( |
| 229 | NodeLabel.File, |
| 230 | {"path": file_path}, |
| 231 | EdgeType.CONTAINS, |
| 232 | NodeLabel.Class, |
| 233 | {"name": name, "file_path": file_path}, |
| 234 | ) |
| 235 | stats["classes"] += 1 |
| 236 | stats["edges"] += 1 |
| 237 | |
| 238 | if body_node: |
| 239 | self._extract_references( |
| 240 | body_node, source, file_path, name, NodeLabel.Class, store, stats |
| 241 | ) |
| 242 | |
| 243 | def _handle_variable( |
| 244 | self, |
| 245 | node, |
| 246 | source: bytes, |
| 247 | file_path: str, |
| 248 | store: GraphStore, |
| 249 | stats: dict, |
| 250 | labels: list[str], |
| 251 | body_node, |
| 252 | ) -> None: |
| 253 | name = labels[0] |
| 254 | store.create_node( |
| 255 | NodeLabel.Variable, |
| 256 | { |
| 257 | "name": name, |
| 258 | "file_path": file_path, |
| 259 | "line_start": node.start_point[0] + 1, |
| 260 | "line_end": node.end_point[0] + 1, |
| 261 | "semantic_type": "terraform_variable", |
| 262 | }, |
| 263 | ) |
| 264 | store.create_edge( |
| 265 | NodeLabel.File, |
| 266 | {"path": file_path}, |
| 267 | EdgeType.CONTAINS, |
| 268 | NodeLabel.Variable, |
| 269 | {"name": name, "file_path": file_path}, |
| 270 | ) |
| 271 | stats["functions"] += 1 |
| 272 | stats["edges"] += 1 |
| 273 | |
| 274 | def _handle_output( |
| 275 | self, |
| 276 | node, |
| 277 | source: bytes, |
| 278 | file_path: str, |
| 279 | store: GraphStore, |
| 280 | stats: dict, |
| 281 | labels: list[str], |
| 282 | body_node, |
| 283 | ) -> None: |
| 284 | name = labels[0] |
| 285 | store.create_node( |
| 286 | NodeLabel.Variable, |
| 287 | { |
| 288 | "name": name, |
| 289 | "file_path": file_path, |
| 290 | "line_start": node.start_point[0] + 1, |
| 291 | "line_end": node.end_point[0] + 1, |
| 292 | "semantic_type": "terraform_output", |
| 293 | }, |
| 294 | ) |
| 295 | store.create_edge( |
| 296 | NodeLabel.File, |
| 297 | {"path": file_path}, |
| 298 | EdgeType.CONTAINS, |
| 299 | NodeLabel.Variable, |
| 300 | {"name": name, "file_path": file_path}, |
| 301 | ) |
| 302 | stats["functions"] += 1 |
| 303 | stats["edges"] += 1 |
| 304 | |
| 305 | if body_node: |
| 306 | self._extract_references( |
| 307 | body_node, source, file_path, name, NodeLabel.Variable, store, stats |
| 308 | ) |
| 309 | |
| 310 | def _handle_module( |
| 311 | self, |
| 312 | node, |
| 313 | source: bytes, |
| 314 | file_path: str, |
| 315 | store: GraphStore, |
| 316 | stats: dict, |
| 317 | labels: list[str], |
| 318 | body_node, |
| 319 | ) -> None: |
| 320 | name = labels[0] |
| 321 | source_attr = "" |
| 322 | if body_node: |
| 323 | source_attr = self._get_attribute_value(body_node, "source", source) |
| 324 | |
| 325 | store.create_node( |
| 326 | NodeLabel.Module, |
| 327 | { |
| 328 | "name": name, |
| 329 | "file_path": file_path, |
| 330 | "line_start": node.start_point[0] + 1, |
| 331 | "line_end": node.end_point[0] + 1, |
| 332 | "semantic_type": "terraform_module", |
| 333 | "source": source_attr, |
| 334 | }, |
| 335 | ) |
| 336 | store.create_edge( |
| 337 | NodeLabel.File, |
| 338 | {"path": file_path}, |
| 339 | EdgeType.CONTAINS, |
| 340 | NodeLabel.Module, |
| 341 | {"name": name, "file_path": file_path}, |
| 342 | ) |
| 343 | stats["classes"] += 1 |
| 344 | stats["edges"] += 1 |
| 345 | |
| 346 | if body_node: |
| 347 | self._extract_references( |
| 348 | body_node, source, file_path, name, NodeLabel.Module, store, stats |
| 349 | ) |
| 350 | |
| 351 | def _handle_locals( |
| 352 | self, |
| 353 | node, |
| 354 | source: bytes, |
| 355 | file_path: str, |
| 356 | store: GraphStore, |
| 357 | stats: dict, |
| 358 | body_node, |
| 359 | ) -> None: |
| 360 | if not body_node: |
| 361 | return |
| 362 | |
| 363 | for child in body_node.children: |
| 364 | if child.type == "attribute": |
| 365 | attr_name = None |
| 366 | for attr_child in child.children: |
| 367 | if attr_child.type == "identifier": |
| 368 | attr_name = _node_text(attr_child, source) |
| 369 | break |
| 370 | |
| 371 | if not attr_name: |
| 372 | continue |
| 373 | |
| 374 | store.create_node( |
| 375 | NodeLabel.Variable, |
| 376 | { |
| 377 | "name": attr_name, |
| 378 | "file_path": file_path, |
| 379 | "line_start": child.start_point[0] + 1, |
| 380 | "line_end": child.end_point[0] + 1, |
| 381 | "semantic_type": "terraform_local", |
| 382 | }, |
| 383 | ) |
| 384 | store.create_edge( |
| 385 | NodeLabel.File, |
| 386 | {"path": file_path}, |
| 387 | EdgeType.CONTAINS, |
| 388 | NodeLabel.Variable, |
| 389 | {"name": attr_name, "file_path": file_path}, |
| 390 | ) |
| 391 | stats["functions"] += 1 |
| 392 | stats["edges"] += 1 |
| 393 | |
| 394 | # Extract references from the attribute expression |
| 395 | self._extract_references( |
| 396 | child, source, file_path, attr_name, NodeLabel.Variable, store, stats |
| 397 | ) |
| 398 | |
| 399 | # ── Reference extraction ────────────────────────────────────────────────── |
| 400 | |
| 401 | def _extract_references( |
| 402 | self, |
| 403 | node, |
| 404 | source: bytes, |
| 405 | file_path: str, |
| 406 | from_name: str, |
| 407 | from_label: str, |
| 408 | store: GraphStore, |
| 409 | stats: dict, |
| 410 | ) -> None: |
| 411 | """Scan expression text for var.X, local.X, module.X, data.T.N, and resource references.""" |
| 412 | text = _node_text(node, source) |
| 413 | |
| 414 | # var.xxx → REFERENCES edge to terraform_variable |
| 415 | for match in _VAR_REF.finditer(text): |
| 416 | var_name = match.group(1) |
| 417 | store.create_edge( |
| 418 | from_label, |
| 419 | {"name": from_name, "file_path": file_path}, |
| 420 | EdgeType.REFERENCES, |
| 421 | NodeLabel.Variable, |
| 422 | {"name": var_name, "file_path": file_path}, |
| 423 | ) |
| 424 | stats["edges"] += 1 |
| 425 | |
| 426 | # local.xxx → REFERENCES edge to terraform_local |
| 427 | for match in _LOCAL_REF.finditer(text): |
| 428 | local_name = match.group(1) |
| 429 | store.create_edge( |
| 430 | from_label, |
| 431 | {"name": from_name, "file_path": file_path}, |
| 432 | EdgeType.REFERENCES, |
| 433 | NodeLabel.Variable, |
| 434 | {"name": local_name, "file_path": file_path}, |
| 435 | ) |
| 436 | stats["edges"] += 1 |
| 437 | |
| 438 | # module.xxx → REFERENCES edge to terraform_module |
| 439 | for match in _MODULE_REF.finditer(text): |
| 440 | mod_name = match.group(1) |
| 441 | store.create_edge( |
| 442 | from_label, |
| 443 | {"name": from_name, "file_path": file_path}, |
| 444 | EdgeType.REFERENCES, |
| 445 | NodeLabel.Module, |
| 446 | {"name": mod_name, "file_path": file_path}, |
| 447 | ) |
| 448 | stats["edges"] += 1 |
| 449 | |
| 450 | # data.type.name → DEPENDS_ON edge to terraform_data |
| 451 | for match in _DATA_REF.finditer(text): |
| 452 | data_name = f"{match.group(1)}.{match.group(2)}" |
| 453 | store.create_edge( |
| 454 | from_label, |
| 455 | {"name": from_name, "file_path": file_path}, |
| 456 | EdgeType.DEPENDS_ON, |
| 457 | NodeLabel.Class, |
| 458 | {"name": data_name, "file_path": file_path}, |
| 459 | ) |
| 460 | stats["edges"] += 1 |
| 461 | |
| 462 | # resource_type.resource_name → DEPENDS_ON edge to terraform_resource |
| 463 | for match in _RESOURCE_REF.finditer(text): |
| 464 | resource_name = f"{match.group(1)}.{match.group(2)}" |
| 465 | store.create_edge( |
| 466 | from_label, |
| 467 | {"name": from_name, "file_path": file_path}, |
| 468 | EdgeType.DEPENDS_ON, |
| 469 | NodeLabel.Class, |
| 470 | {"name": resource_name, "file_path": file_path}, |
| 471 | ) |
| 472 | stats["edges"] += 1 |
| 473 | |
| 474 | # ── Helpers ─────────────────────────────────────────────────────────────── |
| 475 | |
| 476 | def _get_attribute_value(self, body_node, attr_name: str, source: bytes) -> str: |
| 477 | """Extract the string value of a named attribute from a body node.""" |
| 478 | for child in body_node.children: |
| 479 | if child.type == "attribute": |
| 480 | ident = None |
| 481 | expr = None |
| 482 | for attr_child in child.children: |
| 483 | if attr_child.type == "identifier": |
| 484 | ident = _node_text(attr_child, source) |
| 485 | elif attr_child.type == "expression" or attr_child.is_named: |
| 486 | expr = attr_child |
| 487 | if ident == attr_name and expr is not None: |
| 488 | text = _node_text(expr, source).strip().strip('"').strip("'") |
| 489 | return text |
| 490 | return "" |
| --- navegador/ingestion/parser.py | ||
| +++ navegador/ingestion/parser.py | ||
| @@ -14,10 +14,16 @@ | ||
| 14 | 14 | PHP .php |
| 15 | 15 | Ruby .rb |
| 16 | 16 | Swift .swift |
| 17 | 17 | C .c .h |
| 18 | 18 | C++ .cpp .hpp .cc .cxx |
| 19 | + | |
| 20 | +Infrastructure-as-Code: | |
| 21 | + HCL .tf .hcl (Terraform / OpenTofu) | |
| 22 | + Puppet .pp | |
| 23 | + Bash .sh .bash .zsh | |
| 24 | + Ansible .yml .yaml (detected heuristically, not via extension) | |
| 19 | 25 | """ |
| 20 | 26 | |
| 21 | 27 | import hashlib |
| 22 | 28 | import logging |
| 23 | 29 | import time |
| @@ -49,10 +55,16 @@ | ||
| 49 | 55 | ".h": "c", |
| 50 | 56 | ".cpp": "cpp", |
| 51 | 57 | ".hpp": "cpp", |
| 52 | 58 | ".cc": "cpp", |
| 53 | 59 | ".cxx": "cpp", |
| 60 | + ".tf": "hcl", | |
| 61 | + ".hcl": "hcl", | |
| 62 | + ".pp": "puppet", | |
| 63 | + ".sh": "bash", | |
| 64 | + ".bash": "bash", | |
| 65 | + ".zsh": "bash", | |
| 54 | 66 | } |
| 55 | 67 | |
| 56 | 68 | |
| 57 | 69 | class RepoIngester: |
| 58 | 70 | """ |
| @@ -154,10 +166,13 @@ | ||
| 154 | 166 | # Remove the temporary redacted directory if one was created |
| 155 | 167 | if effective_root is not repo_path: |
| 156 | 168 | import shutil |
| 157 | 169 | |
| 158 | 170 | shutil.rmtree(effective_root, ignore_errors=True) |
| 171 | + | |
| 172 | + # Ansible pass — heuristically detect and parse Ansible YAML files | |
| 173 | + self._ingest_ansible(repo_path, stats, incremental) | |
| 159 | 174 | |
| 160 | 175 | logger.info( |
| 161 | 176 | "Ingested %s: %d files, %d functions, %d classes, %d skipped", |
| 162 | 177 | repo_path.name, |
| 163 | 178 | stats["files"], |
| @@ -266,10 +281,79 @@ | ||
| 266 | 281 | for path in repo_path.rglob("*"): |
| 267 | 282 | if path.is_file() and path.suffix in LANGUAGE_MAP: |
| 268 | 283 | if not any(part in skip_dirs for part in path.parts): |
| 269 | 284 | yield path |
| 270 | 285 | |
| 286 | + def _ingest_ansible(self, repo_path: Path, stats: dict[str, int], incremental: bool) -> None: | |
| 287 | + """Detect and parse Ansible YAML files (playbooks, roles, tasks).""" | |
| 288 | + from navegador.ingestion.ansible import AnsibleParser | |
| 289 | + | |
| 290 | + is_ansible_file = AnsibleParser.is_ansible_file | |
| 291 | + | |
| 292 | + ansible_parser: AnsibleParser | None = None | |
| 293 | + | |
| 294 | + for path in repo_path.rglob("*.yml"): | |
| 295 | + if not path.is_file(): | |
| 296 | + continue | |
| 297 | + if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts): | |
| 298 | + continue | |
| 299 | + if not is_ansible_file(path, repo_path): | |
| 300 | + continue | |
| 301 | + | |
| 302 | + rel_path = str(path.relative_to(repo_path)) | |
| 303 | + content_hash = _file_hash(path) | |
| 304 | + | |
| 305 | + if incremental and self._file_unchanged(rel_path, content_hash): | |
| 306 | + stats["skipped"] += 1 | |
| 307 | + continue | |
| 308 | + | |
| 309 | + if incremental: | |
| 310 | + self._clear_file_subgraph(rel_path) | |
| 311 | + | |
| 312 | + if ansible_parser is None: | |
| 313 | + ansible_parser = AnsibleParser() | |
| 314 | + try: | |
| 315 | + file_stats = ansible_parser.parse_file(path, repo_path, self.store) | |
| 316 | + stats["files"] += 1 | |
| 317 | + stats["functions"] += file_stats.get("functions", 0) | |
| 318 | + stats["classes"] += file_stats.get("classes", 0) | |
| 319 | + stats["edges"] += file_stats.get("edges", 0) | |
| 320 | + self._store_file_hash(rel_path, content_hash) | |
| 321 | + except Exception: | |
| 322 | + logger.exception("Failed to parse Ansible file %s", path) | |
| 323 | + | |
| 324 | + # Also check .yaml extension | |
| 325 | + for path in repo_path.rglob("*.yaml"): | |
| 326 | + if not path.is_file(): | |
| 327 | + continue | |
| 328 | + if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts): | |
| 329 | + continue | |
| 330 | + if not is_ansible_file(path, repo_path): | |
| 331 | + continue | |
| 332 | + | |
| 333 | + rel_path = str(path.relative_to(repo_path)) | |
| 334 | + content_hash = _file_hash(path) | |
| 335 | + | |
| 336 | + if incremental and self._file_unchanged(rel_path, content_hash): | |
| 337 | + stats["skipped"] += 1 | |
| 338 | + continue | |
| 339 | + | |
| 340 | + if incremental: | |
| 341 | + self._clear_file_subgraph(rel_path) | |
| 342 | + | |
| 343 | + if ansible_parser is None: | |
| 344 | + ansible_parser = AnsibleParser() | |
| 345 | + try: | |
| 346 | + file_stats = ansible_parser.parse_file(path, repo_path, self.store) | |
| 347 | + stats["files"] += 1 | |
| 348 | + stats["functions"] += file_stats.get("functions", 0) | |
| 349 | + stats["classes"] += file_stats.get("classes", 0) | |
| 350 | + stats["edges"] += file_stats.get("edges", 0) | |
| 351 | + self._store_file_hash(rel_path, content_hash) | |
| 352 | + except Exception: | |
| 353 | + logger.exception("Failed to parse Ansible file %s", path) | |
| 354 | + | |
| 271 | 355 | def _get_parser(self, language: str) -> "LanguageParser": |
| 272 | 356 | if language not in self._parsers: |
| 273 | 357 | if language == "python": |
| 274 | 358 | from navegador.ingestion.python import PythonParser |
| 275 | 359 | |
| @@ -316,10 +400,22 @@ | ||
| 316 | 400 | self._parsers[language] = CParser() |
| 317 | 401 | elif language == "cpp": |
| 318 | 402 | from navegador.ingestion.cpp import CppParser |
| 319 | 403 | |
| 320 | 404 | self._parsers[language] = CppParser() |
| 405 | + elif language == "hcl": | |
| 406 | + from navegador.ingestion.hcl import HCLParser | |
| 407 | + | |
| 408 | + self._parsers[language] = HCLParser() | |
| 409 | + elif language == "puppet": | |
| 410 | + from navegador.ingestion.puppet import PuppetParser | |
| 411 | + | |
| 412 | + self._parsers[language] = PuppetParser() | |
| 413 | + elif language == "bash": | |
| 414 | + from navegador.ingestion.bash import BashParser | |
| 415 | + | |
| 416 | + self._parsers[language] = BashParser() | |
| 321 | 417 | else: |
| 322 | 418 | raise ValueError(f"Unsupported language: {language}") |
| 323 | 419 | return self._parsers[language] |
| 324 | 420 | |
| 325 | 421 | |
| 326 | 422 | |
| 327 | 423 | ADDED navegador/ingestion/puppet.py |
| --- navegador/ingestion/parser.py | |
| +++ navegador/ingestion/parser.py | |
| @@ -14,10 +14,16 @@ | |
| 14 | PHP .php |
| 15 | Ruby .rb |
| 16 | Swift .swift |
| 17 | C .c .h |
| 18 | C++ .cpp .hpp .cc .cxx |
| 19 | """ |
| 20 | |
| 21 | import hashlib |
| 22 | import logging |
| 23 | import time |
| @@ -49,10 +55,16 @@ | |
| 49 | ".h": "c", |
| 50 | ".cpp": "cpp", |
| 51 | ".hpp": "cpp", |
| 52 | ".cc": "cpp", |
| 53 | ".cxx": "cpp", |
| 54 | } |
| 55 | |
| 56 | |
| 57 | class RepoIngester: |
| 58 | """ |
| @@ -154,10 +166,13 @@ | |
| 154 | # Remove the temporary redacted directory if one was created |
| 155 | if effective_root is not repo_path: |
| 156 | import shutil |
| 157 | |
| 158 | shutil.rmtree(effective_root, ignore_errors=True) |
| 159 | |
| 160 | logger.info( |
| 161 | "Ingested %s: %d files, %d functions, %d classes, %d skipped", |
| 162 | repo_path.name, |
| 163 | stats["files"], |
| @@ -266,10 +281,79 @@ | |
| 266 | for path in repo_path.rglob("*"): |
| 267 | if path.is_file() and path.suffix in LANGUAGE_MAP: |
| 268 | if not any(part in skip_dirs for part in path.parts): |
| 269 | yield path |
| 270 | |
| 271 | def _get_parser(self, language: str) -> "LanguageParser": |
| 272 | if language not in self._parsers: |
| 273 | if language == "python": |
| 274 | from navegador.ingestion.python import PythonParser |
| 275 | |
| @@ -316,10 +400,22 @@ | |
| 316 | self._parsers[language] = CParser() |
| 317 | elif language == "cpp": |
| 318 | from navegador.ingestion.cpp import CppParser |
| 319 | |
| 320 | self._parsers[language] = CppParser() |
| 321 | else: |
| 322 | raise ValueError(f"Unsupported language: {language}") |
| 323 | return self._parsers[language] |
| 324 | |
| 325 | |
| 326 | |
| 327 | DDED navegador/ingestion/puppet.py |
| --- navegador/ingestion/parser.py | |
| +++ navegador/ingestion/parser.py | |
| @@ -14,10 +14,16 @@ | |
| 14 | PHP .php |
| 15 | Ruby .rb |
| 16 | Swift .swift |
| 17 | C .c .h |
| 18 | C++ .cpp .hpp .cc .cxx |
| 19 | |
| 20 | Infrastructure-as-Code: |
| 21 | HCL .tf .hcl (Terraform / OpenTofu) |
| 22 | Puppet .pp |
| 23 | Bash .sh .bash .zsh |
| 24 | Ansible .yml .yaml (detected heuristically, not via extension) |
| 25 | """ |
| 26 | |
| 27 | import hashlib |
| 28 | import logging |
| 29 | import time |
| @@ -49,10 +55,16 @@ | |
| 55 | ".h": "c", |
| 56 | ".cpp": "cpp", |
| 57 | ".hpp": "cpp", |
| 58 | ".cc": "cpp", |
| 59 | ".cxx": "cpp", |
| 60 | ".tf": "hcl", |
| 61 | ".hcl": "hcl", |
| 62 | ".pp": "puppet", |
| 63 | ".sh": "bash", |
| 64 | ".bash": "bash", |
| 65 | ".zsh": "bash", |
| 66 | } |
| 67 | |
| 68 | |
| 69 | class RepoIngester: |
| 70 | """ |
| @@ -154,10 +166,13 @@ | |
| 166 | # Remove the temporary redacted directory if one was created |
| 167 | if effective_root is not repo_path: |
| 168 | import shutil |
| 169 | |
| 170 | shutil.rmtree(effective_root, ignore_errors=True) |
| 171 | |
| 172 | # Ansible pass — heuristically detect and parse Ansible YAML files |
| 173 | self._ingest_ansible(repo_path, stats, incremental) |
| 174 | |
| 175 | logger.info( |
| 176 | "Ingested %s: %d files, %d functions, %d classes, %d skipped", |
| 177 | repo_path.name, |
| 178 | stats["files"], |
| @@ -266,10 +281,79 @@ | |
| 281 | for path in repo_path.rglob("*"): |
| 282 | if path.is_file() and path.suffix in LANGUAGE_MAP: |
| 283 | if not any(part in skip_dirs for part in path.parts): |
| 284 | yield path |
| 285 | |
| 286 | def _ingest_ansible(self, repo_path: Path, stats: dict[str, int], incremental: bool) -> None: |
| 287 | """Detect and parse Ansible YAML files (playbooks, roles, tasks).""" |
| 288 | from navegador.ingestion.ansible import AnsibleParser |
| 289 | |
| 290 | is_ansible_file = AnsibleParser.is_ansible_file |
| 291 | |
| 292 | ansible_parser: AnsibleParser | None = None |
| 293 | |
| 294 | for path in repo_path.rglob("*.yml"): |
| 295 | if not path.is_file(): |
| 296 | continue |
| 297 | if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts): |
| 298 | continue |
| 299 | if not is_ansible_file(path, repo_path): |
| 300 | continue |
| 301 | |
| 302 | rel_path = str(path.relative_to(repo_path)) |
| 303 | content_hash = _file_hash(path) |
| 304 | |
| 305 | if incremental and self._file_unchanged(rel_path, content_hash): |
| 306 | stats["skipped"] += 1 |
| 307 | continue |
| 308 | |
| 309 | if incremental: |
| 310 | self._clear_file_subgraph(rel_path) |
| 311 | |
| 312 | if ansible_parser is None: |
| 313 | ansible_parser = AnsibleParser() |
| 314 | try: |
| 315 | file_stats = ansible_parser.parse_file(path, repo_path, self.store) |
| 316 | stats["files"] += 1 |
| 317 | stats["functions"] += file_stats.get("functions", 0) |
| 318 | stats["classes"] += file_stats.get("classes", 0) |
| 319 | stats["edges"] += file_stats.get("edges", 0) |
| 320 | self._store_file_hash(rel_path, content_hash) |
| 321 | except Exception: |
| 322 | logger.exception("Failed to parse Ansible file %s", path) |
| 323 | |
| 324 | # Also check .yaml extension |
| 325 | for path in repo_path.rglob("*.yaml"): |
| 326 | if not path.is_file(): |
| 327 | continue |
| 328 | if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts): |
| 329 | continue |
| 330 | if not is_ansible_file(path, repo_path): |
| 331 | continue |
| 332 | |
| 333 | rel_path = str(path.relative_to(repo_path)) |
| 334 | content_hash = _file_hash(path) |
| 335 | |
| 336 | if incremental and self._file_unchanged(rel_path, content_hash): |
| 337 | stats["skipped"] += 1 |
| 338 | continue |
| 339 | |
| 340 | if incremental: |
| 341 | self._clear_file_subgraph(rel_path) |
| 342 | |
| 343 | if ansible_parser is None: |
| 344 | ansible_parser = AnsibleParser() |
| 345 | try: |
| 346 | file_stats = ansible_parser.parse_file(path, repo_path, self.store) |
| 347 | stats["files"] += 1 |
| 348 | stats["functions"] += file_stats.get("functions", 0) |
| 349 | stats["classes"] += file_stats.get("classes", 0) |
| 350 | stats["edges"] += file_stats.get("edges", 0) |
| 351 | self._store_file_hash(rel_path, content_hash) |
| 352 | except Exception: |
| 353 | logger.exception("Failed to parse Ansible file %s", path) |
| 354 | |
| 355 | def _get_parser(self, language: str) -> "LanguageParser": |
| 356 | if language not in self._parsers: |
| 357 | if language == "python": |
| 358 | from navegador.ingestion.python import PythonParser |
| 359 | |
| @@ -316,10 +400,22 @@ | |
| 400 | self._parsers[language] = CParser() |
| 401 | elif language == "cpp": |
| 402 | from navegador.ingestion.cpp import CppParser |
| 403 | |
| 404 | self._parsers[language] = CppParser() |
| 405 | elif language == "hcl": |
| 406 | from navegador.ingestion.hcl import HCLParser |
| 407 | |
| 408 | self._parsers[language] = HCLParser() |
| 409 | elif language == "puppet": |
| 410 | from navegador.ingestion.puppet import PuppetParser |
| 411 | |
| 412 | self._parsers[language] = PuppetParser() |
| 413 | elif language == "bash": |
| 414 | from navegador.ingestion.bash import BashParser |
| 415 | |
| 416 | self._parsers[language] = BashParser() |
| 417 | else: |
| 418 | raise ValueError(f"Unsupported language: {language}") |
| 419 | return self._parsers[language] |
| 420 | |
| 421 | |
| 422 | |
| 423 | DDED navegador/ingestion/puppet.py |
| --- a/navegador/ingestion/puppet.py | ||
| +++ b/navegador/ingestion/puppet.py | ||
| @@ -0,0 +1,339 @@ | ||
| 1 | +""" | |
| 2 | +Puppet manifest parser — extracts classes, defined types, node definitions, | |
| 3 | +resource declarations, includes, and parameters from .pp files using tree-sitter. | |
| 4 | +""" | |
| 5 | + | |
| 6 | +import logging | |
| 7 | +from pathlib import Path | |
| 8 | + | |
| 9 | +from navegador.graph.schema import EdgeType, NodeLabel | |
| 10 | +from navegador.graph.store import GraphStore | |
| 11 | +from navegador.ingestion.parser import LanguageParser | |
| 12 | + | |
| 13 | +logger = logging.getLogger(__name__) | |
| 14 | + | |
| 15 | + | |
| 16 | +def _get_puppet_language(): | |
| 17 | + try: | |
| 18 | + import tree_sitter_puppet as tspuppet # type: ignore[import] | |
| 19 | + from tree_sitter import Language | |
| 20 | + | |
| 21 | + return Language(tspuppet.language()) | |
| 22 | + except ImportError as e: | |
| 23 | + raise ImportError("Install tree-sitter-puppet: pip install tree-sitter-puppet") from e | |
| 24 | + | |
| 25 | + | |
| 26 | +def _node_text(node, source: bytes) -> str: | |
| 27 | + return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace") | |
| 28 | + | |
| 29 | + | |
| 30 | +def _class_identifier_text(node, source: bytes) -> str: | |
| 31 | + """Join identifier children of a class_identifier with '::'.""" | |
| 32 | + parts = [_node_text(child, source) for child in node.children if child.type == "identifier"] | |
| 33 | + return "::".join(parts) if parts else _node_text(node, source) | |
| 34 | + | |
| 35 | + | |
| 36 | +class PuppetParser(LanguageParser): | |
| 37 | + """Parses Puppet manifest files into the navegador graph.""" | |
| 38 | + | |
| 39 | + def __init__(self) -> None: | |
| 40 | + from tree_sitter import Parser # type: ignore[import] | |
| 41 | + | |
| 42 | + self._parser = Parser(_get_puppet_language()) | |
| 43 | + | |
| 44 | + def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: | |
| 45 | + source = path.read_bytes() | |
| 46 | + tree = self._parser.parse(source) | |
| 47 | + rel_path = str(path.relative_to(repo_root)) | |
| 48 | + | |
| 49 | + store.create_node( | |
| 50 | + NodeLabel.File, | |
| 51 | + { | |
| 52 | + "name": path.name, | |
| 53 | + "path": rel_path, | |
| 54 | + "language": "puppet", | |
| 55 | + "line_count": source.count(b"\n"), | |
| 56 | + }, | |
| 57 | + ) | |
| 58 | + | |
| 59 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 60 | + self._walk(tree.root_node, source, rel_path, store, stats) | |
| 61 | + return stats | |
| 62 | + | |
| 63 | + # ── AST walker ──────────────────────────────────────────────────────────── | |
| 64 | + | |
| 65 | + def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None: | |
| 66 | + if node.type == "class_definition": | |
| 67 | + self._handle_class(node, source, file_path, store, stats) | |
| 68 | + return | |
| 69 | + if node.type == "defined_resource_type": | |
| 70 | + self._handle_defined_type(node, source, file_path, store, stats) | |
| 71 | + return | |
| 72 | + if node.type == "node_definition": | |
| 73 | + self._handle_node(node, source, file_path, store, stats) | |
| 74 | + return | |
| 75 | + if node.type == "include_statement": | |
| 76 | + self._handle_include(node, source, file_path, store, stats) | |
| 77 | + return | |
| 78 | + for child in node.children: | |
| 79 | + self._walk(child, source, file_path, store, stats) | |
| 80 | + | |
| 81 | + # ── Handlers ────────────────────────────────────────────────────────────── | |
| 82 | + | |
| 83 | + def _handle_class( | |
| 84 | + self, node, source: bytes, file_path: str, store: GraphStore, stats: dict | |
| 85 | + ) -> None: | |
| 86 | + name = self._extract_class_identifier(node, source) | |
| 87 | + if not name: | |
| 88 | + return | |
| 89 | + | |
| 90 | + store.create_node( | |
| 91 | + NodeLabel.Class, | |
| 92 | + { | |
| 93 | + "name": name, | |
| 94 | + "file_path": file_path, | |
| 95 | + "line_start": node.start_point[0] + 1, | |
| 96 | + "line_end": node.end_point[0] + 1, | |
| 97 | + "docstring": "", | |
| 98 | + "semantic_type": "puppet_class", | |
| 99 | + }, | |
| 100 | + ) | |
| 101 | + store.create_edge( | |
| 102 | + NodeLabel.File, | |
| 103 | + {"path": file_path}, | |
| 104 | + EdgeType.CONTAINS, | |
| 105 | + NodeLabel.Class, | |
| 106 | + {"name": name, "file_path": file_path}, | |
| 107 | + ) | |
| 108 | + stats["classes"] += 1 | |
| 109 | + stats["edges"] += 1 | |
| 110 | + | |
| 111 | + self._extract_parameters(node, source, file_path, name, store, stats) | |
| 112 | + self._extract_resources(node, source, file_path, name, store, stats) | |
| 113 | + | |
| 114 | + def _handle_defined_type( | |
| 115 | + self, node, source: bytes, file_path: str, store: GraphStore, stats: dict | |
| 116 | + ) -> None: | |
| 117 | + name = self._extract_class_identifier(node, source) | |
| 118 | + if not name: | |
| 119 | + return | |
| 120 | + | |
| 121 | + store.create_node( | |
| 122 | + NodeLabel.Class, | |
| 123 | + { | |
| 124 | + "name": name, | |
| 125 | + "file_path": file_path, | |
| 126 | + "line_start": node.start_point[0] + 1, | |
| 127 | + "line_end": node.end_point[0] + 1, | |
| 128 | + "docstring": "", | |
| 129 | + "semantic_type": "puppet_defined_type", | |
| 130 | + }, | |
| 131 | + ) | |
| 132 | + store.create_edge( | |
| 133 | + NodeLabel.File, | |
| 134 | + {"path": file_path}, | |
| 135 | + EdgeType.CONTAINS, | |
| 136 | + NodeLabel.Class, | |
| 137 | + {"name": name, "file_path": file_path}, | |
| 138 | + ) | |
| 139 | + stats["classes"] += 1 | |
| 140 | + stats["edges"] += 1 | |
| 141 | + | |
| 142 | + self._extract_parameters(node, source, file_path, name, store, stats) | |
| 143 | + self._extract_resources(node, source, file_path, name, store, stats) | |
| 144 | + | |
| 145 | + def _handle_node( | |
| 146 | + self, node, source: bytes, file_path: str, store: GraphStore, stats: dict | |
| 147 | + ) -> None: | |
| 148 | + name = self._extract_node_name(node, source) | |
| 149 | + if not name: | |
| 150 | + return | |
| 151 | + | |
| 152 | + store.create_node( | |
| 153 | + NodeLabel.Class, | |
| 154 | + { | |
| 155 | + "name": name, | |
| 156 | + "file_path": file_path, | |
| 157 | + "line_start": node.start_point[0] + 1, | |
| 158 | + "line_end": node.end_point[0] + 1, | |
| 159 | + "docstring": "", | |
| 160 | + "semantic_type": "puppet_node", | |
| 161 | + }, | |
| 162 | + ) | |
| 163 | + store.create_edge( | |
| 164 | + NodeLabel.File, | |
| 165 | + {"path": file_path}, | |
| 166 | + EdgeType.CONTAINS, | |
| 167 | + NodeLabel.Class, | |
| 168 | + {"name": name, "file_path": file_path}, | |
| 169 | + ) | |
| 170 | + stats["classes"] += 1 | |
| 171 | + stats["edges"] += 1 | |
| 172 | + | |
| 173 | + self._extract_resources(node, source, file_path, name, store, stats) | |
| 174 | + | |
| 175 | + def _handle_include( | |
| 176 | + self, node, source: bytes, file_path: str, store: GraphStore, stats: dict | |
| 177 | + ) -> None: | |
| 178 | + ident_node = None | |
| 179 | + for child in node.children: | |
| 180 | + if child.type == "class_identifier": | |
| 181 | + ident_node = child | |
| 182 | + break | |
| 183 | + if not ident_node: | |
| 184 | + return | |
| 185 | + | |
| 186 | + module = _class_identifier_text(ident_node, source) | |
| 187 | + store.create_node( | |
| 188 | + NodeLabel.Import, | |
| 189 | + { | |
| 190 | + "name": module, | |
| 191 | + "file_path": file_path, | |
| 192 | + "line_start": node.start_point[0] + 1, | |
| 193 | + "module": module, | |
| 194 | + "semantic_type": "puppet_include", | |
| 195 | + }, | |
| 196 | + ) | |
| 197 | + store.create_edge( | |
| 198 | + NodeLabel.File, | |
| 199 | + {"path": file_path}, | |
| 200 | + EdgeType.IMPORTS, | |
| 201 | + NodeLabel.Import, | |
| 202 | + {"name": module, "file_path": file_path}, | |
| 203 | + ) | |
| 204 | + stats["edges"] += 1 | |
| 205 | + | |
| 206 | + # ── Extractors ──────────────────────────────────────────────────────────── | |
| 207 | + | |
| 208 | + def _extract_class_identifier(self, node, source: bytes) -> str | None: | |
| 209 | + """Find and return the class_identifier text from a class/define node.""" | |
| 210 | + for child in node.children: | |
| 211 | + if child.type == "class_identifier": | |
| 212 | + return _class_identifier_text(child, source) | |
| 213 | + return None | |
| 214 | + | |
| 215 | + def _extract_node_name(self, node, source: bytes) -> str | None: | |
| 216 | + """Extract the node name from a node_definition (string child of node_name).""" | |
| 217 | + for child in node.children: | |
| 218 | + if child.type == "node_name": | |
| 219 | + for grandchild in child.children: | |
| 220 | + if grandchild.type == "string": | |
| 221 | + return _node_text(grandchild, source).strip("'\"") | |
| 222 | + return _node_text(child, source).strip("'\"") | |
| 223 | + return None | |
| 224 | + | |
| 225 | + def _extract_parameters( | |
| 226 | + self, | |
| 227 | + node, | |
| 228 | + source: bytes, | |
| 229 | + file_path: str, | |
| 230 | + class_name: str, | |
| 231 | + store: GraphStore, | |
| 232 | + stats: dict, | |
| 233 | + ) -> None: | |
| 234 | + """Extract parameters from a parameter_list inside a class/define.""" | |
| 235 | + for child in node.children: | |
| 236 | + if child.type != "parameter_list": | |
| 237 | + continue | |
| 238 | + for param in child.children: | |
| 239 | + if param.type != "parameter": | |
| 240 | + continue | |
| 241 | + var_node = None | |
| 242 | + for pc in param.children: | |
| 243 | + if pc.type == "variable": | |
| 244 | + var_node = pc | |
| 245 | + break | |
| 246 | + if not var_node: | |
| 247 | + continue | |
| 248 | + var_name = _node_text(var_node, source).lstrip("$") | |
| 249 | + store.create_node( | |
| 250 | + NodeLabel.Variable, | |
| 251 | + { | |
| 252 | + "name": var_name, | |
| 253 | + "file_path": file_path, | |
| 254 | + "line_start": param.start_point[0] + 1, | |
| 255 | + "semantic_type": "puppet_parameter", | |
| 256 | + }, | |
| 257 | + ) | |
| 258 | + store.create_edge( | |
| 259 | + NodeLabel.Class, | |
| 260 | + {"name": class_name, "file_path": file_path}, | |
| 261 | + EdgeType.CONTAINS, | |
| 262 | + NodeLabel.Variable, | |
| 263 | + {"name": var_name, "file_path": file_path}, | |
| 264 | + ) | |
| 265 | + stats["edges"] += 1 | |
| 266 | + | |
| 267 | + def _extract_resources( | |
| 268 | + self, | |
| 269 | + node, | |
| 270 | + source: bytes, | |
| 271 | + file_path: str, | |
| 272 | + class_name: str, | |
| 273 | + store: GraphStore, | |
| 274 | + stats: dict, | |
| 275 | + ) -> None: | |
| 276 | + """Walk the block of a class/define/node to find resource declarations.""" | |
| 277 | + for child in node.children: | |
| 278 | + if child.type == "block": | |
| 279 | + self._walk_block_for_resources(child, source, file_path, class_name, store, stats) | |
| 280 | + break | |
| 281 | + | |
| 282 | + def _walk_block_for_resources( | |
| 283 | + self, | |
| 284 | + node, | |
| 285 | + source: bytes, | |
| 286 | + file_path: str, | |
| 287 | + class_name: str, | |
| 288 | + store: GraphStore, | |
| 289 | + stats: dict, | |
| 290 | + ) -> None: | |
| 291 | + """Recursively find resource_declaration nodes inside a block.""" | |
| 292 | + if node.type == "resource_declaration": | |
| 293 | + self._handle_resource(node, source, file_path, class_name, store, stats) | |
| 294 | + return | |
| 295 | + for child in node.children: | |
| 296 | + self._walk_block_for_resources(child, source, file_path, class_name, store, stats) | |
| 297 | + | |
| 298 | + def _handle_resource( | |
| 299 | + self, | |
| 300 | + node, | |
| 301 | + source: bytes, | |
| 302 | + file_path: str, | |
| 303 | + class_name: str, | |
| 304 | + store: GraphStore, | |
| 305 | + stats: dict, | |
| 306 | + ) -> None: | |
| 307 | + """Handle a resource_declaration: first identifier = type, first string = title.""" | |
| 308 | + res_type = None | |
| 309 | + res_title = None | |
| 310 | + for child in node.children: | |
| 311 | + if child.type == "identifier" and res_type is None: | |
| 312 | + res_type = _node_text(child, source) | |
| 313 | + if child.type == "string" and res_title is None: | |
| 314 | + res_title = _node_text(child, source).strip("'\"") | |
| 315 | + if not res_type: | |
| 316 | + return | |
| 317 | + | |
| 318 | + name = f"{res_type}[{res_title}]" if res_title else res_type | |
| 319 | + store.create_node( | |
| 320 | + NodeLabel.Function, | |
| 321 | + { | |
| 322 | + "name": name, | |
| 323 | + "file_path": file_path, | |
| 324 | + "line_start": node.start_point[0] + 1, | |
| 325 | + "line_end": node.end_point[0] + 1, | |
| 326 | + "docstring": "", | |
| 327 | + "class_name": class_name, | |
| 328 | + "semantic_type": "puppet_resource", | |
| 329 | + }, | |
| 330 | + ) | |
| 331 | + store.create_edge( | |
| 332 | + NodeLabel.Class, | |
| 333 | + {"name": class_name, "file_path": file_path}, | |
| 334 | + EdgeType.CONTAINS, | |
| 335 | + NodeLabel.Function, | |
| 336 | + {"name": name, "file_path": file_path}, | |
| 337 | + ) | |
| 338 | + stats["functions"] += 1 | |
| 339 | + stats["edges"] += 1 |
| --- a/navegador/ingestion/puppet.py | |
| +++ b/navegador/ingestion/puppet.py | |
| @@ -0,0 +1,339 @@ | |
| --- a/navegador/ingestion/puppet.py | |
| +++ b/navegador/ingestion/puppet.py | |
| @@ -0,0 +1,339 @@ | |
| 1 | """ |
| 2 | Puppet manifest parser — extracts classes, defined types, node definitions, |
| 3 | resource declarations, includes, and parameters from .pp files using tree-sitter. |
| 4 | """ |
| 5 | |
| 6 | import logging |
| 7 | from pathlib import Path |
| 8 | |
| 9 | from navegador.graph.schema import EdgeType, NodeLabel |
| 10 | from navegador.graph.store import GraphStore |
| 11 | from navegador.ingestion.parser import LanguageParser |
| 12 | |
| 13 | logger = logging.getLogger(__name__) |
| 14 | |
| 15 | |
| 16 | def _get_puppet_language(): |
| 17 | try: |
| 18 | import tree_sitter_puppet as tspuppet # type: ignore[import] |
| 19 | from tree_sitter import Language |
| 20 | |
| 21 | return Language(tspuppet.language()) |
| 22 | except ImportError as e: |
| 23 | raise ImportError("Install tree-sitter-puppet: pip install tree-sitter-puppet") from e |
| 24 | |
| 25 | |
| 26 | def _node_text(node, source: bytes) -> str: |
| 27 | return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace") |
| 28 | |
| 29 | |
| 30 | def _class_identifier_text(node, source: bytes) -> str: |
| 31 | """Join identifier children of a class_identifier with '::'.""" |
| 32 | parts = [_node_text(child, source) for child in node.children if child.type == "identifier"] |
| 33 | return "::".join(parts) if parts else _node_text(node, source) |
| 34 | |
| 35 | |
| 36 | class PuppetParser(LanguageParser): |
| 37 | """Parses Puppet manifest files into the navegador graph.""" |
| 38 | |
| 39 | def __init__(self) -> None: |
| 40 | from tree_sitter import Parser # type: ignore[import] |
| 41 | |
| 42 | self._parser = Parser(_get_puppet_language()) |
| 43 | |
| 44 | def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]: |
| 45 | source = path.read_bytes() |
| 46 | tree = self._parser.parse(source) |
| 47 | rel_path = str(path.relative_to(repo_root)) |
| 48 | |
| 49 | store.create_node( |
| 50 | NodeLabel.File, |
| 51 | { |
| 52 | "name": path.name, |
| 53 | "path": rel_path, |
| 54 | "language": "puppet", |
| 55 | "line_count": source.count(b"\n"), |
| 56 | }, |
| 57 | ) |
| 58 | |
| 59 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 60 | self._walk(tree.root_node, source, rel_path, store, stats) |
| 61 | return stats |
| 62 | |
| 63 | # ── AST walker ──────────────────────────────────────────────────────────── |
| 64 | |
| 65 | def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None: |
| 66 | if node.type == "class_definition": |
| 67 | self._handle_class(node, source, file_path, store, stats) |
| 68 | return |
| 69 | if node.type == "defined_resource_type": |
| 70 | self._handle_defined_type(node, source, file_path, store, stats) |
| 71 | return |
| 72 | if node.type == "node_definition": |
| 73 | self._handle_node(node, source, file_path, store, stats) |
| 74 | return |
| 75 | if node.type == "include_statement": |
| 76 | self._handle_include(node, source, file_path, store, stats) |
| 77 | return |
| 78 | for child in node.children: |
| 79 | self._walk(child, source, file_path, store, stats) |
| 80 | |
| 81 | # ── Handlers ────────────────────────────────────────────────────────────── |
| 82 | |
| 83 | def _handle_class( |
| 84 | self, node, source: bytes, file_path: str, store: GraphStore, stats: dict |
| 85 | ) -> None: |
| 86 | name = self._extract_class_identifier(node, source) |
| 87 | if not name: |
| 88 | return |
| 89 | |
| 90 | store.create_node( |
| 91 | NodeLabel.Class, |
| 92 | { |
| 93 | "name": name, |
| 94 | "file_path": file_path, |
| 95 | "line_start": node.start_point[0] + 1, |
| 96 | "line_end": node.end_point[0] + 1, |
| 97 | "docstring": "", |
| 98 | "semantic_type": "puppet_class", |
| 99 | }, |
| 100 | ) |
| 101 | store.create_edge( |
| 102 | NodeLabel.File, |
| 103 | {"path": file_path}, |
| 104 | EdgeType.CONTAINS, |
| 105 | NodeLabel.Class, |
| 106 | {"name": name, "file_path": file_path}, |
| 107 | ) |
| 108 | stats["classes"] += 1 |
| 109 | stats["edges"] += 1 |
| 110 | |
| 111 | self._extract_parameters(node, source, file_path, name, store, stats) |
| 112 | self._extract_resources(node, source, file_path, name, store, stats) |
| 113 | |
| 114 | def _handle_defined_type( |
| 115 | self, node, source: bytes, file_path: str, store: GraphStore, stats: dict |
| 116 | ) -> None: |
| 117 | name = self._extract_class_identifier(node, source) |
| 118 | if not name: |
| 119 | return |
| 120 | |
| 121 | store.create_node( |
| 122 | NodeLabel.Class, |
| 123 | { |
| 124 | "name": name, |
| 125 | "file_path": file_path, |
| 126 | "line_start": node.start_point[0] + 1, |
| 127 | "line_end": node.end_point[0] + 1, |
| 128 | "docstring": "", |
| 129 | "semantic_type": "puppet_defined_type", |
| 130 | }, |
| 131 | ) |
| 132 | store.create_edge( |
| 133 | NodeLabel.File, |
| 134 | {"path": file_path}, |
| 135 | EdgeType.CONTAINS, |
| 136 | NodeLabel.Class, |
| 137 | {"name": name, "file_path": file_path}, |
| 138 | ) |
| 139 | stats["classes"] += 1 |
| 140 | stats["edges"] += 1 |
| 141 | |
| 142 | self._extract_parameters(node, source, file_path, name, store, stats) |
| 143 | self._extract_resources(node, source, file_path, name, store, stats) |
| 144 | |
| 145 | def _handle_node( |
| 146 | self, node, source: bytes, file_path: str, store: GraphStore, stats: dict |
| 147 | ) -> None: |
| 148 | name = self._extract_node_name(node, source) |
| 149 | if not name: |
| 150 | return |
| 151 | |
| 152 | store.create_node( |
| 153 | NodeLabel.Class, |
| 154 | { |
| 155 | "name": name, |
| 156 | "file_path": file_path, |
| 157 | "line_start": node.start_point[0] + 1, |
| 158 | "line_end": node.end_point[0] + 1, |
| 159 | "docstring": "", |
| 160 | "semantic_type": "puppet_node", |
| 161 | }, |
| 162 | ) |
| 163 | store.create_edge( |
| 164 | NodeLabel.File, |
| 165 | {"path": file_path}, |
| 166 | EdgeType.CONTAINS, |
| 167 | NodeLabel.Class, |
| 168 | {"name": name, "file_path": file_path}, |
| 169 | ) |
| 170 | stats["classes"] += 1 |
| 171 | stats["edges"] += 1 |
| 172 | |
| 173 | self._extract_resources(node, source, file_path, name, store, stats) |
| 174 | |
| 175 | def _handle_include( |
| 176 | self, node, source: bytes, file_path: str, store: GraphStore, stats: dict |
| 177 | ) -> None: |
| 178 | ident_node = None |
| 179 | for child in node.children: |
| 180 | if child.type == "class_identifier": |
| 181 | ident_node = child |
| 182 | break |
| 183 | if not ident_node: |
| 184 | return |
| 185 | |
| 186 | module = _class_identifier_text(ident_node, source) |
| 187 | store.create_node( |
| 188 | NodeLabel.Import, |
| 189 | { |
| 190 | "name": module, |
| 191 | "file_path": file_path, |
| 192 | "line_start": node.start_point[0] + 1, |
| 193 | "module": module, |
| 194 | "semantic_type": "puppet_include", |
| 195 | }, |
| 196 | ) |
| 197 | store.create_edge( |
| 198 | NodeLabel.File, |
| 199 | {"path": file_path}, |
| 200 | EdgeType.IMPORTS, |
| 201 | NodeLabel.Import, |
| 202 | {"name": module, "file_path": file_path}, |
| 203 | ) |
| 204 | stats["edges"] += 1 |
| 205 | |
| 206 | # ── Extractors ──────────────────────────────────────────────────────────── |
| 207 | |
| 208 | def _extract_class_identifier(self, node, source: bytes) -> str | None: |
| 209 | """Find and return the class_identifier text from a class/define node.""" |
| 210 | for child in node.children: |
| 211 | if child.type == "class_identifier": |
| 212 | return _class_identifier_text(child, source) |
| 213 | return None |
| 214 | |
| 215 | def _extract_node_name(self, node, source: bytes) -> str | None: |
| 216 | """Extract the node name from a node_definition (string child of node_name).""" |
| 217 | for child in node.children: |
| 218 | if child.type == "node_name": |
| 219 | for grandchild in child.children: |
| 220 | if grandchild.type == "string": |
| 221 | return _node_text(grandchild, source).strip("'\"") |
| 222 | return _node_text(child, source).strip("'\"") |
| 223 | return None |
| 224 | |
| 225 | def _extract_parameters( |
| 226 | self, |
| 227 | node, |
| 228 | source: bytes, |
| 229 | file_path: str, |
| 230 | class_name: str, |
| 231 | store: GraphStore, |
| 232 | stats: dict, |
| 233 | ) -> None: |
| 234 | """Extract parameters from a parameter_list inside a class/define.""" |
| 235 | for child in node.children: |
| 236 | if child.type != "parameter_list": |
| 237 | continue |
| 238 | for param in child.children: |
| 239 | if param.type != "parameter": |
| 240 | continue |
| 241 | var_node = None |
| 242 | for pc in param.children: |
| 243 | if pc.type == "variable": |
| 244 | var_node = pc |
| 245 | break |
| 246 | if not var_node: |
| 247 | continue |
| 248 | var_name = _node_text(var_node, source).lstrip("$") |
| 249 | store.create_node( |
| 250 | NodeLabel.Variable, |
| 251 | { |
| 252 | "name": var_name, |
| 253 | "file_path": file_path, |
| 254 | "line_start": param.start_point[0] + 1, |
| 255 | "semantic_type": "puppet_parameter", |
| 256 | }, |
| 257 | ) |
| 258 | store.create_edge( |
| 259 | NodeLabel.Class, |
| 260 | {"name": class_name, "file_path": file_path}, |
| 261 | EdgeType.CONTAINS, |
| 262 | NodeLabel.Variable, |
| 263 | {"name": var_name, "file_path": file_path}, |
| 264 | ) |
| 265 | stats["edges"] += 1 |
| 266 | |
| 267 | def _extract_resources( |
| 268 | self, |
| 269 | node, |
| 270 | source: bytes, |
| 271 | file_path: str, |
| 272 | class_name: str, |
| 273 | store: GraphStore, |
| 274 | stats: dict, |
| 275 | ) -> None: |
| 276 | """Walk the block of a class/define/node to find resource declarations.""" |
| 277 | for child in node.children: |
| 278 | if child.type == "block": |
| 279 | self._walk_block_for_resources(child, source, file_path, class_name, store, stats) |
| 280 | break |
| 281 | |
| 282 | def _walk_block_for_resources( |
| 283 | self, |
| 284 | node, |
| 285 | source: bytes, |
| 286 | file_path: str, |
| 287 | class_name: str, |
| 288 | store: GraphStore, |
| 289 | stats: dict, |
| 290 | ) -> None: |
| 291 | """Recursively find resource_declaration nodes inside a block.""" |
| 292 | if node.type == "resource_declaration": |
| 293 | self._handle_resource(node, source, file_path, class_name, store, stats) |
| 294 | return |
| 295 | for child in node.children: |
| 296 | self._walk_block_for_resources(child, source, file_path, class_name, store, stats) |
| 297 | |
| 298 | def _handle_resource( |
| 299 | self, |
| 300 | node, |
| 301 | source: bytes, |
| 302 | file_path: str, |
| 303 | class_name: str, |
| 304 | store: GraphStore, |
| 305 | stats: dict, |
| 306 | ) -> None: |
| 307 | """Handle a resource_declaration: first identifier = type, first string = title.""" |
| 308 | res_type = None |
| 309 | res_title = None |
| 310 | for child in node.children: |
| 311 | if child.type == "identifier" and res_type is None: |
| 312 | res_type = _node_text(child, source) |
| 313 | if child.type == "string" and res_title is None: |
| 314 | res_title = _node_text(child, source).strip("'\"") |
| 315 | if not res_type: |
| 316 | return |
| 317 | |
| 318 | name = f"{res_type}[{res_title}]" if res_title else res_type |
| 319 | store.create_node( |
| 320 | NodeLabel.Function, |
| 321 | { |
| 322 | "name": name, |
| 323 | "file_path": file_path, |
| 324 | "line_start": node.start_point[0] + 1, |
| 325 | "line_end": node.end_point[0] + 1, |
| 326 | "docstring": "", |
| 327 | "class_name": class_name, |
| 328 | "semantic_type": "puppet_resource", |
| 329 | }, |
| 330 | ) |
| 331 | store.create_edge( |
| 332 | NodeLabel.Class, |
| 333 | {"name": class_name, "file_path": file_path}, |
| 334 | EdgeType.CONTAINS, |
| 335 | NodeLabel.Function, |
| 336 | {"name": name, "file_path": file_path}, |
| 337 | ) |
| 338 | stats["functions"] += 1 |
| 339 | stats["edges"] += 1 |
| --- pyproject.toml | ||
| +++ pyproject.toml | ||
| @@ -2,19 +2,19 @@ | ||
| 2 | 2 | requires = ["setuptools>=69.0", "wheel"] |
| 3 | 3 | build-backend = "setuptools.build_meta" |
| 4 | 4 | |
| 5 | 5 | [project] |
| 6 | 6 | name = "navegador" |
| 7 | -version = "0.7.4" | |
| 7 | +version = "0.8.0" | |
| 8 | 8 | description = "AST + knowledge graph context engine for AI coding agents" |
| 9 | 9 | readme = "README.md" |
| 10 | 10 | license = "MIT" |
| 11 | 11 | requires-python = ">=3.12" |
| 12 | 12 | authors = [ |
| 13 | 13 | { name = "CONFLICT LLC" }, |
| 14 | 14 | ] |
| 15 | -keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp"] | |
| 15 | +keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp", "terraform", "hcl", "puppet", "ansible", "chef", "bash", "iac"] | |
| 16 | 16 | classifiers = [ |
| 17 | 17 | "Development Status :: 3 - Alpha", |
| 18 | 18 | "Intended Audience :: Developers", |
| 19 | 19 | "Operating System :: OS Independent", |
| 20 | 20 | "Programming Language :: Python :: 3", |
| @@ -61,10 +61,16 @@ | ||
| 61 | 61 | "tree-sitter-ruby>=0.23.0", |
| 62 | 62 | "tree-sitter-swift>=0.23.0", |
| 63 | 63 | "tree-sitter-c>=0.23.0", |
| 64 | 64 | "tree-sitter-cpp>=0.23.0", |
| 65 | 65 | ] |
| 66 | +iac = [ | |
| 67 | + # Infrastructure-as-Code tree-sitter grammars | |
| 68 | + "tree-sitter-hcl>=1.2.0", | |
| 69 | + "tree-sitter-puppet>=1.3.0", | |
| 70 | + "tree-sitter-bash>=0.25.0", | |
| 71 | +] | |
| 66 | 72 | llm = [ |
| 67 | 73 | # LLM provider SDKs (install the ones you use) |
| 68 | 74 | "anthropic>=0.39.0", |
| 69 | 75 | "openai>=1.0.0", |
| 70 | 76 | ] |
| @@ -81,10 +87,11 @@ | ||
| 81 | 87 | "pymdown-extensions>=10.0", |
| 82 | 88 | ] |
| 83 | 89 | all = [ |
| 84 | 90 | "navegador[redis]", |
| 85 | 91 | "navegador[languages]", |
| 92 | + "navegador[iac]", | |
| 86 | 93 | "navegador[llm]", |
| 87 | 94 | "navegador[dev]", |
| 88 | 95 | "navegador[docs]", |
| 89 | 96 | ] |
| 90 | 97 | |
| 91 | 98 | |
| 92 | 99 | ADDED tests/test_ansible_parser.py |
| 93 | 100 | ADDED tests/test_bash_parser.py |
| 94 | 101 | ADDED tests/test_chef_enricher.py |
| 95 | 102 | ADDED tests/test_hcl_parser.py |
| 96 | 103 | ADDED tests/test_puppet_parser.py |
| --- pyproject.toml | |
| +++ pyproject.toml | |
| @@ -2,19 +2,19 @@ | |
| 2 | requires = ["setuptools>=69.0", "wheel"] |
| 3 | build-backend = "setuptools.build_meta" |
| 4 | |
| 5 | [project] |
| 6 | name = "navegador" |
| 7 | version = "0.7.4" |
| 8 | description = "AST + knowledge graph context engine for AI coding agents" |
| 9 | readme = "README.md" |
| 10 | license = "MIT" |
| 11 | requires-python = ">=3.12" |
| 12 | authors = [ |
| 13 | { name = "CONFLICT LLC" }, |
| 14 | ] |
| 15 | keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp"] |
| 16 | classifiers = [ |
| 17 | "Development Status :: 3 - Alpha", |
| 18 | "Intended Audience :: Developers", |
| 19 | "Operating System :: OS Independent", |
| 20 | "Programming Language :: Python :: 3", |
| @@ -61,10 +61,16 @@ | |
| 61 | "tree-sitter-ruby>=0.23.0", |
| 62 | "tree-sitter-swift>=0.23.0", |
| 63 | "tree-sitter-c>=0.23.0", |
| 64 | "tree-sitter-cpp>=0.23.0", |
| 65 | ] |
| 66 | llm = [ |
| 67 | # LLM provider SDKs (install the ones you use) |
| 68 | "anthropic>=0.39.0", |
| 69 | "openai>=1.0.0", |
| 70 | ] |
| @@ -81,10 +87,11 @@ | |
| 81 | "pymdown-extensions>=10.0", |
| 82 | ] |
| 83 | all = [ |
| 84 | "navegador[redis]", |
| 85 | "navegador[languages]", |
| 86 | "navegador[llm]", |
| 87 | "navegador[dev]", |
| 88 | "navegador[docs]", |
| 89 | ] |
| 90 | |
| 91 | |
| 92 | DDED tests/test_ansible_parser.py |
| 93 | DDED tests/test_bash_parser.py |
| 94 | DDED tests/test_chef_enricher.py |
| 95 | DDED tests/test_hcl_parser.py |
| 96 | DDED tests/test_puppet_parser.py |
| --- pyproject.toml | |
| +++ pyproject.toml | |
| @@ -2,19 +2,19 @@ | |
| 2 | requires = ["setuptools>=69.0", "wheel"] |
| 3 | build-backend = "setuptools.build_meta" |
| 4 | |
| 5 | [project] |
| 6 | name = "navegador" |
| 7 | version = "0.8.0" |
| 8 | description = "AST + knowledge graph context engine for AI coding agents" |
| 9 | readme = "README.md" |
| 10 | license = "MIT" |
| 11 | requires-python = ">=3.12" |
| 12 | authors = [ |
| 13 | { name = "CONFLICT LLC" }, |
| 14 | ] |
| 15 | keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp", "terraform", "hcl", "puppet", "ansible", "chef", "bash", "iac"] |
| 16 | classifiers = [ |
| 17 | "Development Status :: 3 - Alpha", |
| 18 | "Intended Audience :: Developers", |
| 19 | "Operating System :: OS Independent", |
| 20 | "Programming Language :: Python :: 3", |
| @@ -61,10 +61,16 @@ | |
| 61 | "tree-sitter-ruby>=0.23.0", |
| 62 | "tree-sitter-swift>=0.23.0", |
| 63 | "tree-sitter-c>=0.23.0", |
| 64 | "tree-sitter-cpp>=0.23.0", |
| 65 | ] |
| 66 | iac = [ |
| 67 | # Infrastructure-as-Code tree-sitter grammars |
| 68 | "tree-sitter-hcl>=1.2.0", |
| 69 | "tree-sitter-puppet>=1.3.0", |
| 70 | "tree-sitter-bash>=0.25.0", |
| 71 | ] |
| 72 | llm = [ |
| 73 | # LLM provider SDKs (install the ones you use) |
| 74 | "anthropic>=0.39.0", |
| 75 | "openai>=1.0.0", |
| 76 | ] |
| @@ -81,10 +87,11 @@ | |
| 87 | "pymdown-extensions>=10.0", |
| 88 | ] |
| 89 | all = [ |
| 90 | "navegador[redis]", |
| 91 | "navegador[languages]", |
| 92 | "navegador[iac]", |
| 93 | "navegador[llm]", |
| 94 | "navegador[dev]", |
| 95 | "navegador[docs]", |
| 96 | ] |
| 97 | |
| 98 | |
| 99 | DDED tests/test_ansible_parser.py |
| 100 | DDED tests/test_bash_parser.py |
| 101 | DDED tests/test_chef_enricher.py |
| 102 | DDED tests/test_hcl_parser.py |
| 103 | DDED tests/test_puppet_parser.py |
| --- a/tests/test_ansible_parser.py | ||
| +++ b/tests/test_ansible_parser.py | ||
| @@ -0,0 +1,234 @@ | ||
| 1 | +"""Tests for navegador.ingestion.ansible — AnsibleParser.""" | |
| 2 | + | |
| 3 | +import tempfile | |
| 4 | +from pathlib import Path | |
| 5 | +from unittest.mock import MagicMock | |
| 6 | + | |
| 7 | +from navegador.graph.schema | |
| 8 | +from navegador.ingestion.ansible import AnsibleParserAnsibleParser # noqa: E402 | |
| 9 | + | |
| 10 | + | |
| 11 | +def _make_store(): | |
| 12 | + store = MagicMock() | |
| 13 | + store.query.return_value = MagicMock(result_set=[]) | |
| 14 | + return store | |
| 15 | + | |
| 16 | + | |
| 17 | +class TestIsAnsibleFile: | |
| 18 | + """Tests for AnsibleParser.is_ansible_file() path detection.""" | |
| 19 | + | |
| 20 | + def test_role_tasks_detected(self): | |
| 21 | + with tempfile.TemporaryDirectory() as tmp: | |
| 22 | + p = Path(tmp) / "roles" / "webserver" / "tasks" / "main.yml" | |
| 23 | + p.parent.mkdir(parents=True) | |
| 24 | + p.write_text("---\n- name: test\n debug:\n") | |
| 25 | + assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True | |
| 26 | + | |
| 27 | + def test_role_handlers_detected(self): | |
| 28 | + with tempfile.TemporaryDirectory() as tmp: | |
| 29 | + p = Path(tmp) / "roles" / "webserver" / "handlers" / "main.yml" | |
| 30 | + p.parent.mkdir(parents=True) | |
| 31 | + p.write_text("---\n- name: restart nginx\n service:\n") | |
| 32 | + assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True | |
| 33 | + | |
| 34 | + def test_playbooks_dir_detected(self): | |
| 35 | + with tempfile.TemporaryDirectory() as tmp: | |
| 36 | + p = Path(tmp) / "playbooks" / "deploy.yml" | |
| 37 | + p.parent.mkdir(parents=True) | |
| 38 | + p.write_text("---\n- hosts: all\n tasks: []\n") | |
| 39 | + assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True | |
| 40 | + | |
| 41 | + def test_group_vars_detected(self): | |
| 42 | + with tempfile.TemporaryDirectory() as tmp: | |
| 43 | + p = Path(tmp) / "group_vars" / "all.yml" | |
| 44 | + p.parent.mkdir(parents=True) | |
| 45 | + p.write_text("---\nhttp_port: 80\n") | |
| 46 | + assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True | |
| 47 | + | |
| 48 | + def test_random_yaml_not_detected(self): | |
| 49 | + with tempfile.TemporaryDirectory() as tmp: | |
| 50 | + p = Path(tmp) / "random" / "config.yml" | |
| 51 | + p.parent.mkdir(parents=True) | |
| 52 | + p.write_text("---\nkey: value\n") | |
| 53 | + assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False | |
| 54 | + | |
| 55 | + def test_non_yaml_not_detected(self): | |
| 56 | + with tempfile.TemporaryDirectory() as tmp: | |
| 57 | + p = Path(tmp) / "some_file.py" | |
| 58 | + p.write_text("print('hello')\n") | |
| 59 | + assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False | |
| 60 | + | |
| 61 | + | |
| 62 | +class TestParsePlaybook: | |
| 63 | + """Tests for parse_file() with a full playbook (list with hosts).""" | |
| 64 | + | |
| 65 | + def test_creates_module_class_and_function_nodes(self): | |
| 66 | + store = _make_store() | |
| 67 | + parser = AnsibleParser() | |
| 68 | + with tempfile.TemporaryDirectory() as tmp: | |
| 69 | + tmp_path = Path(tmp) | |
| 70 | + playbook = tmp_path / "playbooks" / "deploy.yml" | |
| 71 | + playbook.parent.mkdir(parents=True) | |
| 72 | + playbook.write_text( | |
| 73 | + "---\n" | |
| 74 | + "- name: Deploy web app\n" | |
| 75 | + " hosts: webservers\n" | |
| 76 | + " tasks:\n" | |
| 77 | + " - name: Install nginx\n" | |
| 78 | + " apt:\n" | |
| 79 | + " name: nginx\n" | |
| 80 | + " state: present\n" | |
| 81 | + " - name: Start nginx\n" | |
| 82 | + " service:\n" | |
| 83 | + " name: nginx\n" | |
| 84 | + " state: started\n" | |
| 85 | + ) | |
| 86 | + stats = parser.parse_file(playbook, tmp_path, store) | |
| 87 | + | |
| 88 | + assert stats["functions"] >= 2 | |
| 89 | + assert stats["classes"] >= 1 | |
| 90 | + | |
| 91 | + # Verify Module node created for playbook | |
| 92 | + create_calls = store.create_node.call_args_list | |
| 93 | + labels = [c[0][0] for c in create_calls] | |
| 94 | + assert NodeLabel.Module in labels | |
| 95 | + assert NodeLabel.Class in labels | |
| 96 | + assert NodeLabel.Function in labels | |
| 97 | + | |
| 98 | + def test_edges_created_for_containment(self): | |
| 99 | + store = _make_store() | |
| 100 | + parser = AnsibleParser() | |
| 101 | + with tempfile.TemporaryDirectory() as tmp: | |
| 102 | + tmp_path = Path(tmp) | |
| 103 | + playbook = tmp_path / "playbooks" / "site.yml" | |
| 104 | + playbook.parent.mkdir(parents=True) | |
| 105 | + playbook.write_text( | |
| 106 | + "---\n- name: Main play\n hosts: all\n tasks:\n - name: Ping\n ping:\n" | |
| 107 | + ) | |
| 108 | + stats = parser.parse_file(playbook, tmp_path, store) | |
| 109 | + | |
| 110 | + assert stats["edges"] >= 3 # File->Module, Module->Class, Class->Func | |
| 111 | + | |
| 112 | + | |
| 113 | +class TestParseTaskFile: | |
| 114 | + """Tests for parse_file() with a standalone task file.""" | |
| 115 | + | |
| 116 | + def test_task_file_creates_class_and_functions(self): | |
| 117 | + store = _make_store() | |
| 118 | + parser = AnsibleParser() | |
| 119 | + with tempfile.TemporaryDirectory() as tmp: | |
| 120 | + tmp_path = Path(tmp) | |
| 121 | + task_file = tmp_path / "roles" / "web" / "tasks" / "main.yml" | |
| 122 | + task_file.parent.mkdir(parents=True) | |
| 123 | + task_file.write_text( | |
| 124 | + "---\n" | |
| 125 | + "- name: Install packages\n" | |
| 126 | + " apt:\n" | |
| 127 | + " name: curl\n" | |
| 128 | + "- name: Copy config\n" | |
| 129 | + " copy:\n" | |
| 130 | + " src: app.conf\n" | |
| 131 | + " dest: /etc/app.conf\n" | |
| 132 | + ) | |
| 133 | + stats = parser.parse_file(task_file, tmp_path, store) | |
| 134 | + | |
| 135 | + assert stats["classes"] == 1 # synthetic parent | |
| 136 | + assert stats["functions"] == 2 | |
| 137 | + | |
| 138 | + | |
| 139 | +class TestParseVariableFile: | |
| 140 | + """Tests for parse_file() with a variable file.""" | |
| 141 | + | |
| 142 | + def test_variable_file_creates_variables(self): | |
| 143 | + store = _make_store() | |
| 144 | + parser = AnsibleParser() | |
| 145 | + with tempfile.TemporaryDirectory() as tmp: | |
| 146 | + tmp_path = Path(tmp) | |
| 147 | + var_file = tmp_path / "roles" / "web" / "defaults" / "main.yml" | |
| 148 | + var_file.parent.mkdir(parents=True) | |
| 149 | + var_file.write_text("---\nhttp_port: 80\nmax_clients: 200\napp_env: production\n") | |
| 150 | + stats = parser.parse_file(var_file, tmp_path, store) | |
| 151 | + | |
| 152 | + # Each variable creates a CONTAINS edge | |
| 153 | + assert stats["edges"] >= 3 | |
| 154 | + create_calls = store.create_node.call_args_list | |
| 155 | + labels = [c[0][0] for c in create_calls] | |
| 156 | + assert labels.count(NodeLabel.Variable) == 3 | |
| 157 | + | |
| 158 | + | |
| 159 | +class TestHandlerAndNotify: | |
| 160 | + """Tests for handler detection and CALLS edges from notify.""" | |
| 161 | + | |
| 162 | + def test_notify_creates_calls_edge(self): | |
| 163 | + store = _make_store() | |
| 164 | + parser = AnsibleParser() | |
| 165 | + with tempfile.TemporaryDirectory() as tmp: | |
| 166 | + tmp_path = Path(tmp) | |
| 167 | + playbook = tmp_path / "playbooks" / "handlers.yml" | |
| 168 | + playbook.parent.mkdir(parents=True) | |
| 169 | + playbook.write_text( | |
| 170 | + "---\n" | |
| 171 | + "- name: Handler play\n" | |
| 172 | + " hosts: all\n" | |
| 173 | + " tasks:\n" | |
| 174 | + " - name: Update config\n" | |
| 175 | + " copy:\n" | |
| 176 | + " src: app.conf\n" | |
| 177 | + " dest: /etc/app.conf\n" | |
| 178 | + " notify: Restart app\n" | |
| 179 | + " handlers:\n" | |
| 180 | + " - name: Restart app\n" | |
| 181 | + " service:\n" | |
| 182 | + " name: app\n" | |
| 183 | + " state: restarted\n" | |
| 184 | + ) | |
| 185 | + parser.parse_file(playbook, tmp_path, store) | |
| 186 | + | |
| 187 | + # Should have a CALLS edge from task to handler | |
| 188 | + edge_calls = store.create_edge.call_args_list | |
| 189 | + calls_edges = [c for c in edge_calls if c[0][2] == EdgeType.CALLS] | |
| 190 | + assert len(calls_edges) >= 1 | |
| 191 | + # The CALLS edge target should be the handler name | |
| 192 | + target_props = calls_edges[0][0][4] | |
| 193 | + assert target_props["name"] == "Restart app" | |
| 194 | + | |
| 195 | + def test_handler_file_creates_handler_functions(self): | |
| 196 | + store = _make_store() | |
| 197 | + parser = AnsibleParser() | |
| 198 | + with tempfile.TemporaryDirectory() as tmp: | |
| 199 | + tmp_path = Path(tmp) | |
| 200 | + handler_file = tmp_path / "roles" / "web" / "handlers" / "main.yml" | |
| 201 | + handler_file.parent.mkdir(parents=True) | |
| 202 | + handler_file.write_text( | |
| 203 | + "---\n" | |
| 204 | + "- name: Restart nginx\n" | |
| 205 | + " service:\n" | |
| 206 | + " name: nginx\n" | |
| 207 | + " state: restarted\n" | |
| 208 | + "- name: Reload nginx\n" | |
| 209 | + " service:\n" | |
| 210 | + " name: nginx\n" | |
| 211 | + " state: reloaded\n" | |
| 212 | + ) | |
| 213 | + stats = parser.parse_file(handler_file, tmp_path, store) | |
| 214 | + | |
| 215 | + assert stats["functions"] == 2 | |
| 216 | + assert stats["classes"] == 1 | |
| 217 | + | |
| 218 | + | |
| 219 | +class TestRoleImport: | |
| 220 | + """Tests for role import extraction.""" | |
| 221 | + | |
| 222 | + def test_role_references_create_import_nodes(self): | |
| 223 | + store = _make_store() | |
| 224 | + parser = AnsibleParser() | |
| 225 | + with tempfile.TemporaryDirectory() as tmp: | |
| 226 | + tmp_path = Path(tmp) | |
| 227 | + playbook = tmp_path / "playbooks" / "roles.yml" | |
| 228 | + playbook.parent.mkdir(parents=True) | |
| 229 | + playbook.write_text( | |
| 230 | + "---\n" | |
| 231 | + "- name: Apply roles\n" | |
| 232 | + " hosts: all\n" | |
| 233 | + " roles:\n" | |
| 234 | + " - |
| --- a/tests/test_ansible_parser.py | |
| +++ b/tests/test_ansible_parser.py | |
| @@ -0,0 +1,234 @@ | |
| --- a/tests/test_ansible_parser.py | |
| +++ b/tests/test_ansible_parser.py | |
| @@ -0,0 +1,234 @@ | |
| 1 | """Tests for navegador.ingestion.ansible — AnsibleParser.""" |
| 2 | |
| 3 | import tempfile |
| 4 | from pathlib import Path |
| 5 | from unittest.mock import MagicMock |
| 6 | |
| 7 | from navegador.graph.schema |
| 8 | from navegador.ingestion.ansible import AnsibleParserAnsibleParser # noqa: E402 |
| 9 | |
| 10 | |
| 11 | def _make_store(): |
| 12 | store = MagicMock() |
| 13 | store.query.return_value = MagicMock(result_set=[]) |
| 14 | return store |
| 15 | |
| 16 | |
| 17 | class TestIsAnsibleFile: |
| 18 | """Tests for AnsibleParser.is_ansible_file() path detection.""" |
| 19 | |
| 20 | def test_role_tasks_detected(self): |
| 21 | with tempfile.TemporaryDirectory() as tmp: |
| 22 | p = Path(tmp) / "roles" / "webserver" / "tasks" / "main.yml" |
| 23 | p.parent.mkdir(parents=True) |
| 24 | p.write_text("---\n- name: test\n debug:\n") |
| 25 | assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True |
| 26 | |
| 27 | def test_role_handlers_detected(self): |
| 28 | with tempfile.TemporaryDirectory() as tmp: |
| 29 | p = Path(tmp) / "roles" / "webserver" / "handlers" / "main.yml" |
| 30 | p.parent.mkdir(parents=True) |
| 31 | p.write_text("---\n- name: restart nginx\n service:\n") |
| 32 | assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True |
| 33 | |
| 34 | def test_playbooks_dir_detected(self): |
| 35 | with tempfile.TemporaryDirectory() as tmp: |
| 36 | p = Path(tmp) / "playbooks" / "deploy.yml" |
| 37 | p.parent.mkdir(parents=True) |
| 38 | p.write_text("---\n- hosts: all\n tasks: []\n") |
| 39 | assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True |
| 40 | |
| 41 | def test_group_vars_detected(self): |
| 42 | with tempfile.TemporaryDirectory() as tmp: |
| 43 | p = Path(tmp) / "group_vars" / "all.yml" |
| 44 | p.parent.mkdir(parents=True) |
| 45 | p.write_text("---\nhttp_port: 80\n") |
| 46 | assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True |
| 47 | |
| 48 | def test_random_yaml_not_detected(self): |
| 49 | with tempfile.TemporaryDirectory() as tmp: |
| 50 | p = Path(tmp) / "random" / "config.yml" |
| 51 | p.parent.mkdir(parents=True) |
| 52 | p.write_text("---\nkey: value\n") |
| 53 | assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False |
| 54 | |
| 55 | def test_non_yaml_not_detected(self): |
| 56 | with tempfile.TemporaryDirectory() as tmp: |
| 57 | p = Path(tmp) / "some_file.py" |
| 58 | p.write_text("print('hello')\n") |
| 59 | assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False |
| 60 | |
| 61 | |
| 62 | class TestParsePlaybook: |
| 63 | """Tests for parse_file() with a full playbook (list with hosts).""" |
| 64 | |
| 65 | def test_creates_module_class_and_function_nodes(self): |
| 66 | store = _make_store() |
| 67 | parser = AnsibleParser() |
| 68 | with tempfile.TemporaryDirectory() as tmp: |
| 69 | tmp_path = Path(tmp) |
| 70 | playbook = tmp_path / "playbooks" / "deploy.yml" |
| 71 | playbook.parent.mkdir(parents=True) |
| 72 | playbook.write_text( |
| 73 | "---\n" |
| 74 | "- name: Deploy web app\n" |
| 75 | " hosts: webservers\n" |
| 76 | " tasks:\n" |
| 77 | " - name: Install nginx\n" |
| 78 | " apt:\n" |
| 79 | " name: nginx\n" |
| 80 | " state: present\n" |
| 81 | " - name: Start nginx\n" |
| 82 | " service:\n" |
| 83 | " name: nginx\n" |
| 84 | " state: started\n" |
| 85 | ) |
| 86 | stats = parser.parse_file(playbook, tmp_path, store) |
| 87 | |
| 88 | assert stats["functions"] >= 2 |
| 89 | assert stats["classes"] >= 1 |
| 90 | |
| 91 | # Verify Module node created for playbook |
| 92 | create_calls = store.create_node.call_args_list |
| 93 | labels = [c[0][0] for c in create_calls] |
| 94 | assert NodeLabel.Module in labels |
| 95 | assert NodeLabel.Class in labels |
| 96 | assert NodeLabel.Function in labels |
| 97 | |
| 98 | def test_edges_created_for_containment(self): |
| 99 | store = _make_store() |
| 100 | parser = AnsibleParser() |
| 101 | with tempfile.TemporaryDirectory() as tmp: |
| 102 | tmp_path = Path(tmp) |
| 103 | playbook = tmp_path / "playbooks" / "site.yml" |
| 104 | playbook.parent.mkdir(parents=True) |
| 105 | playbook.write_text( |
| 106 | "---\n- name: Main play\n hosts: all\n tasks:\n - name: Ping\n ping:\n" |
| 107 | ) |
| 108 | stats = parser.parse_file(playbook, tmp_path, store) |
| 109 | |
| 110 | assert stats["edges"] >= 3 # File->Module, Module->Class, Class->Func |
| 111 | |
| 112 | |
| 113 | class TestParseTaskFile: |
| 114 | """Tests for parse_file() with a standalone task file.""" |
| 115 | |
| 116 | def test_task_file_creates_class_and_functions(self): |
| 117 | store = _make_store() |
| 118 | parser = AnsibleParser() |
| 119 | with tempfile.TemporaryDirectory() as tmp: |
| 120 | tmp_path = Path(tmp) |
| 121 | task_file = tmp_path / "roles" / "web" / "tasks" / "main.yml" |
| 122 | task_file.parent.mkdir(parents=True) |
| 123 | task_file.write_text( |
| 124 | "---\n" |
| 125 | "- name: Install packages\n" |
| 126 | " apt:\n" |
| 127 | " name: curl\n" |
| 128 | "- name: Copy config\n" |
| 129 | " copy:\n" |
| 130 | " src: app.conf\n" |
| 131 | " dest: /etc/app.conf\n" |
| 132 | ) |
| 133 | stats = parser.parse_file(task_file, tmp_path, store) |
| 134 | |
| 135 | assert stats["classes"] == 1 # synthetic parent |
| 136 | assert stats["functions"] == 2 |
| 137 | |
| 138 | |
| 139 | class TestParseVariableFile: |
| 140 | """Tests for parse_file() with a variable file.""" |
| 141 | |
| 142 | def test_variable_file_creates_variables(self): |
| 143 | store = _make_store() |
| 144 | parser = AnsibleParser() |
| 145 | with tempfile.TemporaryDirectory() as tmp: |
| 146 | tmp_path = Path(tmp) |
| 147 | var_file = tmp_path / "roles" / "web" / "defaults" / "main.yml" |
| 148 | var_file.parent.mkdir(parents=True) |
| 149 | var_file.write_text("---\nhttp_port: 80\nmax_clients: 200\napp_env: production\n") |
| 150 | stats = parser.parse_file(var_file, tmp_path, store) |
| 151 | |
| 152 | # Each variable creates a CONTAINS edge |
| 153 | assert stats["edges"] >= 3 |
| 154 | create_calls = store.create_node.call_args_list |
| 155 | labels = [c[0][0] for c in create_calls] |
| 156 | assert labels.count(NodeLabel.Variable) == 3 |
| 157 | |
| 158 | |
| 159 | class TestHandlerAndNotify: |
| 160 | """Tests for handler detection and CALLS edges from notify.""" |
| 161 | |
| 162 | def test_notify_creates_calls_edge(self): |
| 163 | store = _make_store() |
| 164 | parser = AnsibleParser() |
| 165 | with tempfile.TemporaryDirectory() as tmp: |
| 166 | tmp_path = Path(tmp) |
| 167 | playbook = tmp_path / "playbooks" / "handlers.yml" |
| 168 | playbook.parent.mkdir(parents=True) |
| 169 | playbook.write_text( |
| 170 | "---\n" |
| 171 | "- name: Handler play\n" |
| 172 | " hosts: all\n" |
| 173 | " tasks:\n" |
| 174 | " - name: Update config\n" |
| 175 | " copy:\n" |
| 176 | " src: app.conf\n" |
| 177 | " dest: /etc/app.conf\n" |
| 178 | " notify: Restart app\n" |
| 179 | " handlers:\n" |
| 180 | " - name: Restart app\n" |
| 181 | " service:\n" |
| 182 | " name: app\n" |
| 183 | " state: restarted\n" |
| 184 | ) |
| 185 | parser.parse_file(playbook, tmp_path, store) |
| 186 | |
| 187 | # Should have a CALLS edge from task to handler |
| 188 | edge_calls = store.create_edge.call_args_list |
| 189 | calls_edges = [c for c in edge_calls if c[0][2] == EdgeType.CALLS] |
| 190 | assert len(calls_edges) >= 1 |
| 191 | # The CALLS edge target should be the handler name |
| 192 | target_props = calls_edges[0][0][4] |
| 193 | assert target_props["name"] == "Restart app" |
| 194 | |
| 195 | def test_handler_file_creates_handler_functions(self): |
| 196 | store = _make_store() |
| 197 | parser = AnsibleParser() |
| 198 | with tempfile.TemporaryDirectory() as tmp: |
| 199 | tmp_path = Path(tmp) |
| 200 | handler_file = tmp_path / "roles" / "web" / "handlers" / "main.yml" |
| 201 | handler_file.parent.mkdir(parents=True) |
| 202 | handler_file.write_text( |
| 203 | "---\n" |
| 204 | "- name: Restart nginx\n" |
| 205 | " service:\n" |
| 206 | " name: nginx\n" |
| 207 | " state: restarted\n" |
| 208 | "- name: Reload nginx\n" |
| 209 | " service:\n" |
| 210 | " name: nginx\n" |
| 211 | " state: reloaded\n" |
| 212 | ) |
| 213 | stats = parser.parse_file(handler_file, tmp_path, store) |
| 214 | |
| 215 | assert stats["functions"] == 2 |
| 216 | assert stats["classes"] == 1 |
| 217 | |
| 218 | |
| 219 | class TestRoleImport: |
| 220 | """Tests for role import extraction.""" |
| 221 | |
| 222 | def test_role_references_create_import_nodes(self): |
| 223 | store = _make_store() |
| 224 | parser = AnsibleParser() |
| 225 | with tempfile.TemporaryDirectory() as tmp: |
| 226 | tmp_path = Path(tmp) |
| 227 | playbook = tmp_path / "playbooks" / "roles.yml" |
| 228 | playbook.parent.mkdir(parents=True) |
| 229 | playbook.write_text( |
| 230 | "---\n" |
| 231 | "- name: Apply roles\n" |
| 232 | " hosts: all\n" |
| 233 | " roles:\n" |
| 234 | " - |
| --- a/tests/test_bash_parser.py | ||
| +++ b/tests/test_bash_parser.py | ||
| @@ -0,0 +1,534 @@ | ||
| 1 | +"""Tests for navegador.ingestion.bash — BashParser internal methods.""" | |
| 2 | + | |
| 3 | +from unittest.mock import MagicMock, patch | |
| 4 | + | |
| 5 | +import pytest | |
| 6 | + | |
| 7 | +from navegador.graph.schema import EdgeType, NodeLabel | |
| 8 | + | |
| 9 | + | |
| 10 | +class MockNode: | |
| 11 | + _id_counter = 0 | |
| 12 | + | |
| 13 | + def __init__( | |
| 14 | + self, | |
| 15 | + type_: str, | |
| 16 | + text: bytes = b"", | |
| 17 | + children: list = None, | |
| 18 | + start_byte: int = 0, | |
| 19 | + end_byte: int = 0, | |
| 20 | + start_point: tuple = (0, 0), | |
| 21 | + end_point: tuple = (0, 0), | |
| 22 | + parent=None, | |
| 23 | + ): | |
| 24 | + MockNode._id_counter += 1 | |
| 25 | + self.id = MockNode._id_counter | |
| 26 | + self.type = type_ | |
| 27 | + self._text = text | |
| 28 | + self.children = children or [] | |
| 29 | + self.start_byte = start_byte | |
| 30 | + self.end_byte = end_byte | |
| 31 | + self.start_point = start_point | |
| 32 | + self.end_point = end_point | |
| 33 | + self.parent = parent | |
| 34 | + self._fields: dict = {} | |
| 35 | + for child in self.children: | |
| 36 | + child.parent = self | |
| 37 | + | |
| 38 | + def child_by_field_name(self, name: str): | |
| 39 | + return self._fields.get(name) | |
| 40 | + | |
| 41 | + def set_field(self, name: str, node): | |
| 42 | + self._fields[name] = node | |
| 43 | + node.parent = self | |
| 44 | + return self | |
| 45 | + | |
| 46 | + | |
| 47 | +def _text_node(text: bytes, type_: str = "identifier") -> MockNode: | |
| 48 | + return MockNode(type_, text, start_byte=0, end_byte=len(text)) | |
| 49 | + | |
| 50 | + | |
| 51 | +def _make_store(): | |
| 52 | + store = MagicMock() | |
| 53 | + store.query.return_value = MagicMock(result_set=[]) | |
| 54 | + return store | |
| 55 | + | |
| 56 | + | |
| 57 | +def _make_parser(): | |
| 58 | + from navegador.ingestion.bash import BashParser | |
| 59 | + | |
| 60 | + parser = BashParser.__new__(BashParser) | |
| 61 | + parser._parser = MagicMock() | |
| 62 | + return parser | |
| 63 | + | |
| 64 | + | |
| 65 | +class TestBashGetLanguage: | |
| 66 | + def test_raises_when_not_installed(self): | |
| 67 | + from navegador.ingestion.bash import _get_bash_language | |
| 68 | + | |
| 69 | + with patch.dict( | |
| 70 | + "sys.modules", | |
| 71 | + { | |
| 72 | + "tree_sitter_bash": None, | |
| 73 | + "tree_sitter": None, | |
| 74 | + }, | |
| 75 | + ): | |
| 76 | + with pytest.raises(ImportError, match="tree-sitter-bash"): | |
| 77 | + _get_bash_language() | |
| 78 | + | |
| 79 | + def test_returns_language_object(self): | |
| 80 | + from navegador.ingestion.bash import _get_bash_language | |
| 81 | + | |
| 82 | + mock_tsbash = MagicMock() | |
| 83 | + mock_ts = MagicMock() | |
| 84 | + with patch.dict( | |
| 85 | + "sys.modules", | |
| 86 | + { | |
| 87 | + "tree_sitter_bash": mock_tsbash, | |
| 88 | + "tree_sitter": mock_ts, | |
| 89 | + }, | |
| 90 | + ): | |
| 91 | + result = _get_bash_language() | |
| 92 | + assert result is mock_ts.Language.return_value | |
| 93 | + | |
| 94 | + | |
| 95 | +class TestBashNodeText: | |
| 96 | + def test_extracts_bytes(self): | |
| 97 | + from navegador.ingestion.bash import _node_text | |
| 98 | + | |
| 99 | + source = b"#!/bin/bash\nmy_func() {" | |
| 100 | + node = MockNode( | |
| 101 | + "identifier", | |
| 102 | + start_byte=12, | |
| 103 | + end_byte=19, | |
| 104 | + ) | |
| 105 | + assert _node_text(node, source) == "my_func" | |
| 106 | + | |
| 107 | + | |
| 108 | +class TestBashHandleFunction: | |
| 109 | + def test_creates_function_node(self): | |
| 110 | + parser = _make_parser() | |
| 111 | + store = _make_store() | |
| 112 | + source = b"deploy" | |
| 113 | + name_node = MockNode( | |
| 114 | + "word", | |
| 115 | + start_byte=0, | |
| 116 | + end_byte=6, | |
| 117 | + ) | |
| 118 | + node = MockNode( | |
| 119 | + "function_definition", | |
| 120 | + start_point=(0, 0), | |
| 121 | + end_point=(5, 1), | |
| 122 | + ) | |
| 123 | + node.set_field("name", name_node) | |
| 124 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 125 | + parser._handle_function(node, source, "deploy.sh", store, stats) | |
| 126 | + assert stats["functions"] == 1 | |
| 127 | + assert stats["edges"] == 1 | |
| 128 | + label = store.create_node.call_args[0][0] | |
| 129 | + props = store.create_node.call_args[0][1] | |
| 130 | + assert label == NodeLabel.Function | |
| 131 | + assert props["name"] == "deploy" | |
| 132 | + assert props["semantic_type"] == "shell_function" | |
| 133 | + | |
| 134 | + def test_skips_if_no_name_node(self): | |
| 135 | + parser = _make_parser() | |
| 136 | + store = _make_store() | |
| 137 | + node = MockNode( | |
| 138 | + "function_definition", | |
| 139 | + start_point=(0, 0), | |
| 140 | + end_point=(0, 5), | |
| 141 | + ) | |
| 142 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 143 | + parser._handle_function(node, b"", "test.sh", store, stats) | |
| 144 | + assert stats["functions"] == 0 | |
| 145 | + store.create_node.assert_not_called() | |
| 146 | + | |
| 147 | + def test_extracts_calls_from_body(self): | |
| 148 | + parser = _make_parser() | |
| 149 | + store = _make_store() | |
| 150 | + source = b"deploy helper" | |
| 151 | + name_node = MockNode( | |
| 152 | + "word", | |
| 153 | + start_byte=0, | |
| 154 | + end_byte=6, | |
| 155 | + ) | |
| 156 | + callee_name = MockNode( | |
| 157 | + "word", | |
| 158 | + start_byte=7, | |
| 159 | + end_byte=13, | |
| 160 | + ) | |
| 161 | + cmd = MockNode("command") | |
| 162 | + cmd.set_field("name", callee_name) | |
| 163 | + body = MockNode( | |
| 164 | + "compound_statement", | |
| 165 | + children=[cmd], | |
| 166 | + ) | |
| 167 | + node = MockNode( | |
| 168 | + "function_definition", | |
| 169 | + start_point=(0, 0), | |
| 170 | + end_point=(5, 1), | |
| 171 | + ) | |
| 172 | + node.set_field("name", name_node) | |
| 173 | + node.set_field("body", body) | |
| 174 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 175 | + parser._handle_function(node, source, "deploy.sh", store, stats) | |
| 176 | + # 1 CONTAINS edge + 1 CALLS edge | |
| 177 | + assert stats["edges"] == 2 | |
| 178 | + | |
| 179 | + | |
| 180 | +class TestBashHandleVariable: | |
| 181 | + def test_creates_variable_node_for_top_level(self): | |
| 182 | + parser = _make_parser() | |
| 183 | + store = _make_store() | |
| 184 | + source = b'VERSION="1.0"' | |
| 185 | + name_node = MockNode( | |
| 186 | + "variable_name", | |
| 187 | + start_byte=0, | |
| 188 | + end_byte=7, | |
| 189 | + ) | |
| 190 | + value_node = MockNode( | |
| 191 | + "string", | |
| 192 | + start_byte=8, | |
| 193 | + end_byte=13, | |
| 194 | + ) | |
| 195 | + program = MockNode("program") | |
| 196 | + node = MockNode( | |
| 197 | + "variable_assignment", | |
| 198 | + start_point=(0, 0), | |
| 199 | + end_point=(0, 13), | |
| 200 | + parent=program, | |
| 201 | + ) | |
| 202 | + node.set_field("name", name_node) | |
| 203 | + node.set_field("value", value_node) | |
| 204 | + # Re-set parent after construction since constructor | |
| 205 | + # overwrites it | |
| 206 | + node.parent = program | |
| 207 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 208 | + parser._handle_variable(node, source, "env.sh", store, stats) | |
| 209 | + assert stats["edges"] == 1 | |
| 210 | + label = store.create_node.call_args[0][0] | |
| 211 | + props = store.create_node.call_args[0][1] | |
| 212 | + assert label == NodeLabel.Variable | |
| 213 | + assert props["name"] == "VERSION" | |
| 214 | + assert props["semantic_type"] == "shell_variable" | |
| 215 | + | |
| 216 | + def test_skips_non_top_level_variable(self): | |
| 217 | + parser = _make_parser() | |
| 218 | + store = _make_store() | |
| 219 | + source = b"x=1" | |
| 220 | + name_node = MockNode( | |
| 221 | + "variable_name", | |
| 222 | + start_byte=0, | |
| 223 | + end_byte=1, | |
| 224 | + ) | |
| 225 | + func_parent = MockNode("function_definition") | |
| 226 | + node = MockNode( | |
| 227 | + "variable_assignment", | |
| 228 | + start_point=(0, 0), | |
| 229 | + end_point=(0, 3), | |
| 230 | + parent=func_parent, | |
| 231 | + ) | |
| 232 | + node.set_field("name", name_node) | |
| 233 | + node.parent = func_parent | |
| 234 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 235 | + parser._handle_variable(node, source, "test.sh", store, stats) | |
| 236 | + assert stats["edges"] == 0 | |
| 237 | + store.create_node.assert_not_called() | |
| 238 | + | |
| 239 | + def test_skips_variable_without_name(self): | |
| 240 | + parser = _make_parser() | |
| 241 | + store = _make_store() | |
| 242 | + program = MockNode("program") | |
| 243 | + node = MockNode( | |
| 244 | + "variable_assignment", | |
| 245 | + start_point=(0, 0), | |
| 246 | + end_point=(0, 3), | |
| 247 | + parent=program, | |
| 248 | + ) | |
| 249 | + node.parent = program | |
| 250 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 251 | + parser._handle_variable(node, b"", "test.sh", store, stats) | |
| 252 | + store.create_node.assert_not_called() | |
| 253 | + | |
| 254 | + | |
| 255 | +class TestBashHandleSource: | |
| 256 | + def test_creates_import_for_source_command(self): | |
| 257 | + parser = _make_parser() | |
| 258 | + store = _make_store() | |
| 259 | + source = b"source ./lib.sh" | |
| 260 | + name_node = MockNode( | |
| 261 | + "word", | |
| 262 | + start_byte=0, | |
| 263 | + end_byte=6, | |
| 264 | + ) | |
| 265 | + arg_node = MockNode( | |
| 266 | + "word", | |
| 267 | + start_byte=7, | |
| 268 | + end_byte=15, | |
| 269 | + ) | |
| 270 | + node = MockNode( | |
| 271 | + "command", | |
| 272 | + children=[name_node, arg_node], | |
| 273 | + start_point=(0, 0), | |
| 274 | + end_point=(0, 15), | |
| 275 | + ) | |
| 276 | + node.set_field("name", name_node) | |
| 277 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 278 | + parser._handle_command(node, source, "main.sh", store, stats) | |
| 279 | + assert stats["edges"] == 1 | |
| 280 | + label = store.create_node.call_args[0][0] | |
| 281 | + props = store.create_node.call_args[0][1] | |
| 282 | + assert label == NodeLabel.Import | |
| 283 | + assert props["name"] == "./lib.sh" | |
| 284 | + assert props["semantic_type"] == "shell_source" | |
| 285 | + | |
| 286 | + def test_creates_import_for_dot_command(self): | |
| 287 | + parser = _make_parser() | |
| 288 | + store = _make_store() | |
| 289 | + source = b". /etc/profile" | |
| 290 | + name_node = MockNode( | |
| 291 | + "word", | |
| 292 | + start_byte=0, | |
| 293 | + end_byte=1, | |
| 294 | + ) | |
| 295 | + arg_node = MockNode( | |
| 296 | + "word", | |
| 297 | + start_byte=2, | |
| 298 | + end_byte=14, | |
| 299 | + ) | |
| 300 | + node = MockNode( | |
| 301 | + "command", | |
| 302 | + children=[name_node, arg_node], | |
| 303 | + start_point=(0, 0), | |
| 304 | + end_point=(0, 14), | |
| 305 | + ) | |
| 306 | + node.set_field("name", name_node) | |
| 307 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 308 | + parser._handle_command(node, source, "main.sh", store, stats) | |
| 309 | + assert stats["edges"] == 1 | |
| 310 | + props = store.create_node.call_args[0][1] | |
| 311 | + assert props["name"] == "/etc/profile" | |
| 312 | + | |
| 313 | + def test_ignores_non_source_commands(self): | |
| 314 | + parser = _make_parser() | |
| 315 | + store = _make_store() | |
| 316 | + source = b"echo hello" | |
| 317 | + name_node = MockNode( | |
| 318 | + "word", | |
| 319 | + start_byte=0, | |
| 320 | + end_byte=4, | |
| 321 | + ) | |
| 322 | + node = MockNode( | |
| 323 | + "command", | |
| 324 | + children=[name_node], | |
| 325 | + start_point=(0, 0), | |
| 326 | + end_point=(0, 10), | |
| 327 | + ) | |
| 328 | + node.set_field("name", name_node) | |
| 329 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 330 | + parser._handle_command(node, source, "main.sh", store, stats) | |
| 331 | + assert stats["edges"] == 0 | |
| 332 | + store.create_node.assert_not_called() | |
| 333 | + | |
| 334 | + def test_skips_source_without_arguments(self): | |
| 335 | + parser = _make_parser() | |
| 336 | + store = _make_store() | |
| 337 | + source = b"source" | |
| 338 | + name_node = MockNode( | |
| 339 | + "word", | |
| 340 | + start_byte=0, | |
| 341 | + end_byte=6, | |
| 342 | + ) | |
| 343 | + node = MockNode( | |
| 344 | + "command", | |
| 345 | + children=[name_node], | |
| 346 | + start_point=(0, 0), | |
| 347 | + end_point=(0, 6), | |
| 348 | + ) | |
| 349 | + node.set_field("name", name_node) | |
| 350 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 351 | + parser._handle_command(node, source, "main.sh", store, stats) | |
| 352 | + assert stats["edges"] == 0 | |
| 353 | + store.create_node.assert_not_called() | |
| 354 | + | |
| 355 | + | |
| 356 | +class TestBashExtractCalls: | |
| 357 | + def test_finds_command_calls(self): | |
| 358 | + parser = _make_parser() | |
| 359 | + store = _make_store() | |
| 360 | + source = b"build_app" | |
| 361 | + callee = MockNode( | |
| 362 | + "word", | |
| 363 | + start_byte=0, | |
| 364 | + end_byte=9, | |
| 365 | + ) | |
| 366 | + cmd = MockNode("command") | |
| 367 | + cmd.set_field("name", callee) | |
| 368 | + body = MockNode( | |
| 369 | + "compound_statement", | |
| 370 | + children=[cmd], | |
| 371 | + ) | |
| 372 | + fn_node = MockNode("function_definition") | |
| 373 | + fn_node.set_field("body", body) | |
| 374 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 375 | + parser._extract_calls(fn_node, source, "deploy.sh", "deploy", store, stats) | |
| 376 | + assert stats["edges"] == 1 | |
| 377 | + edge_call = store.create_edge.call_args[0] | |
| 378 | + assert edge_call[2] == EdgeType.CALLS | |
| 379 | + assert edge_call[4]["name"] == "build_app" | |
| 380 | + | |
| 381 | + def test_skips_builtins(self): | |
| 382 | + parser = _make_parser() | |
| 383 | + store = _make_store() | |
| 384 | + source = b"echo" | |
| 385 | + callee = MockNode( | |
| 386 | + "word", | |
| 387 | + start_byte=0, | |
| 388 | + end_byte=4, | |
| 389 | + ) | |
| 390 | + cmd = MockNode("command") | |
| 391 | + cmd.set_field("name", callee) | |
| 392 | + body = MockNode( | |
| 393 | + "compound_statement", | |
| 394 | + children=[cmd], | |
| 395 | + ) | |
| 396 | + fn_node = MockNode("function_definition") | |
| 397 | + fn_node.set_field("body", body) | |
| 398 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 399 | + parser._extract_calls(fn_node, source, "test.sh", "myfunc", store, stats) | |
| 400 | + assert stats["edges"] == 0 | |
| 401 | + | |
| 402 | + def test_no_calls_in_empty_body(self): | |
| 403 | + parser = _make_parser() | |
| 404 | + store = _make_store() | |
| 405 | + fn_node = MockNode("function_definition") | |
| 406 | + fn_node.set_field("body", MockNode("compound_statement")) | |
| 407 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 408 | + parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats) | |
| 409 | + assert stats["edges"] == 0 | |
| 410 | + | |
| 411 | + def test_no_body_means_no_calls(self): | |
| 412 | + parser = _make_parser() | |
| 413 | + store = _make_store() | |
| 414 | + fn_node = MockNode("function_definition") | |
| 415 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 416 | + parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats) | |
| 417 | + assert stats["edges"] == 0 | |
| 418 | + | |
| 419 | + | |
| 420 | +class TestBashWalkDispatch: | |
| 421 | + def test_walk_handles_function_definition(self): | |
| 422 | + parser = _make_parser() | |
| 423 | + store = _make_store() | |
| 424 | + source = b"deploy" | |
| 425 | + name_node = MockNode( | |
| 426 | + "word", | |
| 427 | + start_byte=0, | |
| 428 | + end_byte=6, | |
| 429 | + ) | |
| 430 | + fn = MockNode( | |
| 431 | + "function_definition", | |
| 432 | + start_point=(0, 0), | |
| 433 | + end_point=(5, 1), | |
| 434 | + ) | |
| 435 | + fn.set_field("name", name_node) | |
| 436 | + root = MockNode("program", children=[fn]) | |
| 437 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 438 | + parser._walk(root, source, "deploy.sh", store, stats) | |
| 439 | + assert stats["functions"] == 1 | |
| 440 | + | |
| 441 | + def test_walk_handles_variable_assignment(self): | |
| 442 | + parser = _make_parser() | |
| 443 | + store = _make_store() | |
| 444 | + source = b"VERSION" | |
| 445 | + name_node = MockNode( | |
| 446 | + "variable_name", | |
| 447 | + start_byte=0, | |
| 448 | + end_byte=7, | |
| 449 | + ) | |
| 450 | + program = MockNode("program") | |
| 451 | + var = MockNode( | |
| 452 | + "variable_assignment", | |
| 453 | + start_point=(0, 0), | |
| 454 | + end_point=(0, 13), | |
| 455 | + ) | |
| 456 | + var.set_field("name", name_node) | |
| 457 | + program.children = [var] | |
| 458 | + for child in program.children: | |
| 459 | + child.parent = program | |
| 460 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 461 | + parser._walk(program, source, "env.sh", store, stats) | |
| 462 | + assert stats["edges"] == 1 | |
| 463 | + | |
| 464 | + def test_walk_handles_source_command(self): | |
| 465 | + parser = _make_parser() | |
| 466 | + store = _make_store() | |
| 467 | + source = b"source ./lib.sh" | |
| 468 | + name_node = MockNode( | |
| 469 | + "word", | |
| 470 | + start_byte=0, | |
| 471 | + end_byte=6, | |
| 472 | + ) | |
| 473 | + arg_node = MockNode( | |
| 474 | + "word", | |
| 475 | + start_byte=7, | |
| 476 | + end_byte=15, | |
| 477 | + ) | |
| 478 | + cmd = MockNode( | |
| 479 | + "command", | |
| 480 | + children=[name_node, arg_node], | |
| 481 | + start_point=(0, 0), | |
| 482 | + end_point=(0, 15), | |
| 483 | + ) | |
| 484 | + cmd.set_field("name", name_node) | |
| 485 | + root = MockNode("program", children=[cmd]) | |
| 486 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 487 | + parser._walk(root, source, "main.sh", store, stats) | |
| 488 | + assert stats["edges"] == 1 | |
| 489 | + | |
| 490 | + def test_walk_recurses_into_children(self): | |
| 491 | + parser = _make_parser() | |
| 492 | + store = _make_store() | |
| 493 | + source = b"deploy" | |
| 494 | + name_node = MockNode( | |
| 495 | + "word", | |
| 496 | + start_byte=0, | |
| 497 | + end_byte=6, | |
| 498 | + ) | |
| 499 | + fn = MockNode( | |
| 500 | + "function_definition", | |
| 501 | + start_point=(0, 0), | |
| 502 | + end_point=(5, 1), | |
| 503 | + ) | |
| 504 | + fn.set_field("name", name_node) | |
| 505 | + wrapper = MockNode("if_statement", children=[fn]) | |
| 506 | + root = MockNode("program", children=[wrapper]) | |
| 507 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 508 | + parser._walk(root, source, "deploy.sh", store, stats) | |
| 509 | + assert stats["functions"] == 1 | |
| 510 | + | |
| 511 | + | |
| 512 | +class TestBashParseFile: | |
| 513 | + def test_creates_file_node(self): | |
| 514 | + import tempfile | |
| 515 | + from pathlib import Path | |
| 516 | + | |
| 517 | + parser = _make_parser() | |
| 518 | + store = _make_store() | |
| 519 | + mock_tree = MagicMock() | |
| 520 | + mock_tree.root_node.type = "program" | |
| 521 | + mock_tree.root_node.children = [] | |
| 522 | + parser._parser.parse.return_value = mock_tree | |
| 523 | + with tempfile.NamedTemporaryFile(suffix=".sh", delete=False) as f: | |
| 524 | + f.write(b"#!/bin/bash\necho hello\n") | |
| 525 | + fpath = Path(f.name) | |
| 526 | + try: | |
| 527 | + parser.parse_file(fpath, fpath.parent, store) | |
| 528 | + store.create_node.assert_called_once() | |
| 529 | + label = store.create_node.call_args[0][0] | |
| 530 | + props = store.create_node.call_args[0][1] | |
| 531 | + assert label == NodeLabel.File | |
| 532 | + assert props["language"] == "bash" | |
| 533 | + finally: | |
| 534 | + fpath.unlink() |
| --- a/tests/test_bash_parser.py | |
| +++ b/tests/test_bash_parser.py | |
| @@ -0,0 +1,534 @@ | |
| --- a/tests/test_bash_parser.py | |
| +++ b/tests/test_bash_parser.py | |
| @@ -0,0 +1,534 @@ | |
| 1 | """Tests for navegador.ingestion.bash — BashParser internal methods.""" |
| 2 | |
| 3 | from unittest.mock import MagicMock, patch |
| 4 | |
| 5 | import pytest |
| 6 | |
| 7 | from navegador.graph.schema import EdgeType, NodeLabel |
| 8 | |
| 9 | |
| 10 | class MockNode: |
| 11 | _id_counter = 0 |
| 12 | |
| 13 | def __init__( |
| 14 | self, |
| 15 | type_: str, |
| 16 | text: bytes = b"", |
| 17 | children: list = None, |
| 18 | start_byte: int = 0, |
| 19 | end_byte: int = 0, |
| 20 | start_point: tuple = (0, 0), |
| 21 | end_point: tuple = (0, 0), |
| 22 | parent=None, |
| 23 | ): |
| 24 | MockNode._id_counter += 1 |
| 25 | self.id = MockNode._id_counter |
| 26 | self.type = type_ |
| 27 | self._text = text |
| 28 | self.children = children or [] |
| 29 | self.start_byte = start_byte |
| 30 | self.end_byte = end_byte |
| 31 | self.start_point = start_point |
| 32 | self.end_point = end_point |
| 33 | self.parent = parent |
| 34 | self._fields: dict = {} |
| 35 | for child in self.children: |
| 36 | child.parent = self |
| 37 | |
| 38 | def child_by_field_name(self, name: str): |
| 39 | return self._fields.get(name) |
| 40 | |
| 41 | def set_field(self, name: str, node): |
| 42 | self._fields[name] = node |
| 43 | node.parent = self |
| 44 | return self |
| 45 | |
| 46 | |
| 47 | def _text_node(text: bytes, type_: str = "identifier") -> MockNode: |
| 48 | return MockNode(type_, text, start_byte=0, end_byte=len(text)) |
| 49 | |
| 50 | |
| 51 | def _make_store(): |
| 52 | store = MagicMock() |
| 53 | store.query.return_value = MagicMock(result_set=[]) |
| 54 | return store |
| 55 | |
| 56 | |
| 57 | def _make_parser(): |
| 58 | from navegador.ingestion.bash import BashParser |
| 59 | |
| 60 | parser = BashParser.__new__(BashParser) |
| 61 | parser._parser = MagicMock() |
| 62 | return parser |
| 63 | |
| 64 | |
| 65 | class TestBashGetLanguage: |
| 66 | def test_raises_when_not_installed(self): |
| 67 | from navegador.ingestion.bash import _get_bash_language |
| 68 | |
| 69 | with patch.dict( |
| 70 | "sys.modules", |
| 71 | { |
| 72 | "tree_sitter_bash": None, |
| 73 | "tree_sitter": None, |
| 74 | }, |
| 75 | ): |
| 76 | with pytest.raises(ImportError, match="tree-sitter-bash"): |
| 77 | _get_bash_language() |
| 78 | |
| 79 | def test_returns_language_object(self): |
| 80 | from navegador.ingestion.bash import _get_bash_language |
| 81 | |
| 82 | mock_tsbash = MagicMock() |
| 83 | mock_ts = MagicMock() |
| 84 | with patch.dict( |
| 85 | "sys.modules", |
| 86 | { |
| 87 | "tree_sitter_bash": mock_tsbash, |
| 88 | "tree_sitter": mock_ts, |
| 89 | }, |
| 90 | ): |
| 91 | result = _get_bash_language() |
| 92 | assert result is mock_ts.Language.return_value |
| 93 | |
| 94 | |
| 95 | class TestBashNodeText: |
| 96 | def test_extracts_bytes(self): |
| 97 | from navegador.ingestion.bash import _node_text |
| 98 | |
| 99 | source = b"#!/bin/bash\nmy_func() {" |
| 100 | node = MockNode( |
| 101 | "identifier", |
| 102 | start_byte=12, |
| 103 | end_byte=19, |
| 104 | ) |
| 105 | assert _node_text(node, source) == "my_func" |
| 106 | |
| 107 | |
| 108 | class TestBashHandleFunction: |
| 109 | def test_creates_function_node(self): |
| 110 | parser = _make_parser() |
| 111 | store = _make_store() |
| 112 | source = b"deploy" |
| 113 | name_node = MockNode( |
| 114 | "word", |
| 115 | start_byte=0, |
| 116 | end_byte=6, |
| 117 | ) |
| 118 | node = MockNode( |
| 119 | "function_definition", |
| 120 | start_point=(0, 0), |
| 121 | end_point=(5, 1), |
| 122 | ) |
| 123 | node.set_field("name", name_node) |
| 124 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 125 | parser._handle_function(node, source, "deploy.sh", store, stats) |
| 126 | assert stats["functions"] == 1 |
| 127 | assert stats["edges"] == 1 |
| 128 | label = store.create_node.call_args[0][0] |
| 129 | props = store.create_node.call_args[0][1] |
| 130 | assert label == NodeLabel.Function |
| 131 | assert props["name"] == "deploy" |
| 132 | assert props["semantic_type"] == "shell_function" |
| 133 | |
| 134 | def test_skips_if_no_name_node(self): |
| 135 | parser = _make_parser() |
| 136 | store = _make_store() |
| 137 | node = MockNode( |
| 138 | "function_definition", |
| 139 | start_point=(0, 0), |
| 140 | end_point=(0, 5), |
| 141 | ) |
| 142 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 143 | parser._handle_function(node, b"", "test.sh", store, stats) |
| 144 | assert stats["functions"] == 0 |
| 145 | store.create_node.assert_not_called() |
| 146 | |
| 147 | def test_extracts_calls_from_body(self): |
| 148 | parser = _make_parser() |
| 149 | store = _make_store() |
| 150 | source = b"deploy helper" |
| 151 | name_node = MockNode( |
| 152 | "word", |
| 153 | start_byte=0, |
| 154 | end_byte=6, |
| 155 | ) |
| 156 | callee_name = MockNode( |
| 157 | "word", |
| 158 | start_byte=7, |
| 159 | end_byte=13, |
| 160 | ) |
| 161 | cmd = MockNode("command") |
| 162 | cmd.set_field("name", callee_name) |
| 163 | body = MockNode( |
| 164 | "compound_statement", |
| 165 | children=[cmd], |
| 166 | ) |
| 167 | node = MockNode( |
| 168 | "function_definition", |
| 169 | start_point=(0, 0), |
| 170 | end_point=(5, 1), |
| 171 | ) |
| 172 | node.set_field("name", name_node) |
| 173 | node.set_field("body", body) |
| 174 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 175 | parser._handle_function(node, source, "deploy.sh", store, stats) |
| 176 | # 1 CONTAINS edge + 1 CALLS edge |
| 177 | assert stats["edges"] == 2 |
| 178 | |
| 179 | |
| 180 | class TestBashHandleVariable: |
| 181 | def test_creates_variable_node_for_top_level(self): |
| 182 | parser = _make_parser() |
| 183 | store = _make_store() |
| 184 | source = b'VERSION="1.0"' |
| 185 | name_node = MockNode( |
| 186 | "variable_name", |
| 187 | start_byte=0, |
| 188 | end_byte=7, |
| 189 | ) |
| 190 | value_node = MockNode( |
| 191 | "string", |
| 192 | start_byte=8, |
| 193 | end_byte=13, |
| 194 | ) |
| 195 | program = MockNode("program") |
| 196 | node = MockNode( |
| 197 | "variable_assignment", |
| 198 | start_point=(0, 0), |
| 199 | end_point=(0, 13), |
| 200 | parent=program, |
| 201 | ) |
| 202 | node.set_field("name", name_node) |
| 203 | node.set_field("value", value_node) |
| 204 | # Re-set parent after construction since constructor |
| 205 | # overwrites it |
| 206 | node.parent = program |
| 207 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 208 | parser._handle_variable(node, source, "env.sh", store, stats) |
| 209 | assert stats["edges"] == 1 |
| 210 | label = store.create_node.call_args[0][0] |
| 211 | props = store.create_node.call_args[0][1] |
| 212 | assert label == NodeLabel.Variable |
| 213 | assert props["name"] == "VERSION" |
| 214 | assert props["semantic_type"] == "shell_variable" |
| 215 | |
| 216 | def test_skips_non_top_level_variable(self): |
| 217 | parser = _make_parser() |
| 218 | store = _make_store() |
| 219 | source = b"x=1" |
| 220 | name_node = MockNode( |
| 221 | "variable_name", |
| 222 | start_byte=0, |
| 223 | end_byte=1, |
| 224 | ) |
| 225 | func_parent = MockNode("function_definition") |
| 226 | node = MockNode( |
| 227 | "variable_assignment", |
| 228 | start_point=(0, 0), |
| 229 | end_point=(0, 3), |
| 230 | parent=func_parent, |
| 231 | ) |
| 232 | node.set_field("name", name_node) |
| 233 | node.parent = func_parent |
| 234 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 235 | parser._handle_variable(node, source, "test.sh", store, stats) |
| 236 | assert stats["edges"] == 0 |
| 237 | store.create_node.assert_not_called() |
| 238 | |
| 239 | def test_skips_variable_without_name(self): |
| 240 | parser = _make_parser() |
| 241 | store = _make_store() |
| 242 | program = MockNode("program") |
| 243 | node = MockNode( |
| 244 | "variable_assignment", |
| 245 | start_point=(0, 0), |
| 246 | end_point=(0, 3), |
| 247 | parent=program, |
| 248 | ) |
| 249 | node.parent = program |
| 250 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 251 | parser._handle_variable(node, b"", "test.sh", store, stats) |
| 252 | store.create_node.assert_not_called() |
| 253 | |
| 254 | |
| 255 | class TestBashHandleSource: |
| 256 | def test_creates_import_for_source_command(self): |
| 257 | parser = _make_parser() |
| 258 | store = _make_store() |
| 259 | source = b"source ./lib.sh" |
| 260 | name_node = MockNode( |
| 261 | "word", |
| 262 | start_byte=0, |
| 263 | end_byte=6, |
| 264 | ) |
| 265 | arg_node = MockNode( |
| 266 | "word", |
| 267 | start_byte=7, |
| 268 | end_byte=15, |
| 269 | ) |
| 270 | node = MockNode( |
| 271 | "command", |
| 272 | children=[name_node, arg_node], |
| 273 | start_point=(0, 0), |
| 274 | end_point=(0, 15), |
| 275 | ) |
| 276 | node.set_field("name", name_node) |
| 277 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 278 | parser._handle_command(node, source, "main.sh", store, stats) |
| 279 | assert stats["edges"] == 1 |
| 280 | label = store.create_node.call_args[0][0] |
| 281 | props = store.create_node.call_args[0][1] |
| 282 | assert label == NodeLabel.Import |
| 283 | assert props["name"] == "./lib.sh" |
| 284 | assert props["semantic_type"] == "shell_source" |
| 285 | |
| 286 | def test_creates_import_for_dot_command(self): |
| 287 | parser = _make_parser() |
| 288 | store = _make_store() |
| 289 | source = b". /etc/profile" |
| 290 | name_node = MockNode( |
| 291 | "word", |
| 292 | start_byte=0, |
| 293 | end_byte=1, |
| 294 | ) |
| 295 | arg_node = MockNode( |
| 296 | "word", |
| 297 | start_byte=2, |
| 298 | end_byte=14, |
| 299 | ) |
| 300 | node = MockNode( |
| 301 | "command", |
| 302 | children=[name_node, arg_node], |
| 303 | start_point=(0, 0), |
| 304 | end_point=(0, 14), |
| 305 | ) |
| 306 | node.set_field("name", name_node) |
| 307 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 308 | parser._handle_command(node, source, "main.sh", store, stats) |
| 309 | assert stats["edges"] == 1 |
| 310 | props = store.create_node.call_args[0][1] |
| 311 | assert props["name"] == "/etc/profile" |
| 312 | |
| 313 | def test_ignores_non_source_commands(self): |
| 314 | parser = _make_parser() |
| 315 | store = _make_store() |
| 316 | source = b"echo hello" |
| 317 | name_node = MockNode( |
| 318 | "word", |
| 319 | start_byte=0, |
| 320 | end_byte=4, |
| 321 | ) |
| 322 | node = MockNode( |
| 323 | "command", |
| 324 | children=[name_node], |
| 325 | start_point=(0, 0), |
| 326 | end_point=(0, 10), |
| 327 | ) |
| 328 | node.set_field("name", name_node) |
| 329 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 330 | parser._handle_command(node, source, "main.sh", store, stats) |
| 331 | assert stats["edges"] == 0 |
| 332 | store.create_node.assert_not_called() |
| 333 | |
| 334 | def test_skips_source_without_arguments(self): |
| 335 | parser = _make_parser() |
| 336 | store = _make_store() |
| 337 | source = b"source" |
| 338 | name_node = MockNode( |
| 339 | "word", |
| 340 | start_byte=0, |
| 341 | end_byte=6, |
| 342 | ) |
| 343 | node = MockNode( |
| 344 | "command", |
| 345 | children=[name_node], |
| 346 | start_point=(0, 0), |
| 347 | end_point=(0, 6), |
| 348 | ) |
| 349 | node.set_field("name", name_node) |
| 350 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 351 | parser._handle_command(node, source, "main.sh", store, stats) |
| 352 | assert stats["edges"] == 0 |
| 353 | store.create_node.assert_not_called() |
| 354 | |
| 355 | |
| 356 | class TestBashExtractCalls: |
| 357 | def test_finds_command_calls(self): |
| 358 | parser = _make_parser() |
| 359 | store = _make_store() |
| 360 | source = b"build_app" |
| 361 | callee = MockNode( |
| 362 | "word", |
| 363 | start_byte=0, |
| 364 | end_byte=9, |
| 365 | ) |
| 366 | cmd = MockNode("command") |
| 367 | cmd.set_field("name", callee) |
| 368 | body = MockNode( |
| 369 | "compound_statement", |
| 370 | children=[cmd], |
| 371 | ) |
| 372 | fn_node = MockNode("function_definition") |
| 373 | fn_node.set_field("body", body) |
| 374 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 375 | parser._extract_calls(fn_node, source, "deploy.sh", "deploy", store, stats) |
| 376 | assert stats["edges"] == 1 |
| 377 | edge_call = store.create_edge.call_args[0] |
| 378 | assert edge_call[2] == EdgeType.CALLS |
| 379 | assert edge_call[4]["name"] == "build_app" |
| 380 | |
| 381 | def test_skips_builtins(self): |
| 382 | parser = _make_parser() |
| 383 | store = _make_store() |
| 384 | source = b"echo" |
| 385 | callee = MockNode( |
| 386 | "word", |
| 387 | start_byte=0, |
| 388 | end_byte=4, |
| 389 | ) |
| 390 | cmd = MockNode("command") |
| 391 | cmd.set_field("name", callee) |
| 392 | body = MockNode( |
| 393 | "compound_statement", |
| 394 | children=[cmd], |
| 395 | ) |
| 396 | fn_node = MockNode("function_definition") |
| 397 | fn_node.set_field("body", body) |
| 398 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 399 | parser._extract_calls(fn_node, source, "test.sh", "myfunc", store, stats) |
| 400 | assert stats["edges"] == 0 |
| 401 | |
| 402 | def test_no_calls_in_empty_body(self): |
| 403 | parser = _make_parser() |
| 404 | store = _make_store() |
| 405 | fn_node = MockNode("function_definition") |
| 406 | fn_node.set_field("body", MockNode("compound_statement")) |
| 407 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 408 | parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats) |
| 409 | assert stats["edges"] == 0 |
| 410 | |
| 411 | def test_no_body_means_no_calls(self): |
| 412 | parser = _make_parser() |
| 413 | store = _make_store() |
| 414 | fn_node = MockNode("function_definition") |
| 415 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 416 | parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats) |
| 417 | assert stats["edges"] == 0 |
| 418 | |
| 419 | |
| 420 | class TestBashWalkDispatch: |
| 421 | def test_walk_handles_function_definition(self): |
| 422 | parser = _make_parser() |
| 423 | store = _make_store() |
| 424 | source = b"deploy" |
| 425 | name_node = MockNode( |
| 426 | "word", |
| 427 | start_byte=0, |
| 428 | end_byte=6, |
| 429 | ) |
| 430 | fn = MockNode( |
| 431 | "function_definition", |
| 432 | start_point=(0, 0), |
| 433 | end_point=(5, 1), |
| 434 | ) |
| 435 | fn.set_field("name", name_node) |
| 436 | root = MockNode("program", children=[fn]) |
| 437 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 438 | parser._walk(root, source, "deploy.sh", store, stats) |
| 439 | assert stats["functions"] == 1 |
| 440 | |
| 441 | def test_walk_handles_variable_assignment(self): |
| 442 | parser = _make_parser() |
| 443 | store = _make_store() |
| 444 | source = b"VERSION" |
| 445 | name_node = MockNode( |
| 446 | "variable_name", |
| 447 | start_byte=0, |
| 448 | end_byte=7, |
| 449 | ) |
| 450 | program = MockNode("program") |
| 451 | var = MockNode( |
| 452 | "variable_assignment", |
| 453 | start_point=(0, 0), |
| 454 | end_point=(0, 13), |
| 455 | ) |
| 456 | var.set_field("name", name_node) |
| 457 | program.children = [var] |
| 458 | for child in program.children: |
| 459 | child.parent = program |
| 460 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 461 | parser._walk(program, source, "env.sh", store, stats) |
| 462 | assert stats["edges"] == 1 |
| 463 | |
| 464 | def test_walk_handles_source_command(self): |
| 465 | parser = _make_parser() |
| 466 | store = _make_store() |
| 467 | source = b"source ./lib.sh" |
| 468 | name_node = MockNode( |
| 469 | "word", |
| 470 | start_byte=0, |
| 471 | end_byte=6, |
| 472 | ) |
| 473 | arg_node = MockNode( |
| 474 | "word", |
| 475 | start_byte=7, |
| 476 | end_byte=15, |
| 477 | ) |
| 478 | cmd = MockNode( |
| 479 | "command", |
| 480 | children=[name_node, arg_node], |
| 481 | start_point=(0, 0), |
| 482 | end_point=(0, 15), |
| 483 | ) |
| 484 | cmd.set_field("name", name_node) |
| 485 | root = MockNode("program", children=[cmd]) |
| 486 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 487 | parser._walk(root, source, "main.sh", store, stats) |
| 488 | assert stats["edges"] == 1 |
| 489 | |
| 490 | def test_walk_recurses_into_children(self): |
| 491 | parser = _make_parser() |
| 492 | store = _make_store() |
| 493 | source = b"deploy" |
| 494 | name_node = MockNode( |
| 495 | "word", |
| 496 | start_byte=0, |
| 497 | end_byte=6, |
| 498 | ) |
| 499 | fn = MockNode( |
| 500 | "function_definition", |
| 501 | start_point=(0, 0), |
| 502 | end_point=(5, 1), |
| 503 | ) |
| 504 | fn.set_field("name", name_node) |
| 505 | wrapper = MockNode("if_statement", children=[fn]) |
| 506 | root = MockNode("program", children=[wrapper]) |
| 507 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 508 | parser._walk(root, source, "deploy.sh", store, stats) |
| 509 | assert stats["functions"] == 1 |
| 510 | |
| 511 | |
| 512 | class TestBashParseFile: |
| 513 | def test_creates_file_node(self): |
| 514 | import tempfile |
| 515 | from pathlib import Path |
| 516 | |
| 517 | parser = _make_parser() |
| 518 | store = _make_store() |
| 519 | mock_tree = MagicMock() |
| 520 | mock_tree.root_node.type = "program" |
| 521 | mock_tree.root_node.children = [] |
| 522 | parser._parser.parse.return_value = mock_tree |
| 523 | with tempfile.NamedTemporaryFile(suffix=".sh", delete=False) as f: |
| 524 | f.write(b"#!/bin/bash\necho hello\n") |
| 525 | fpath = Path(f.name) |
| 526 | try: |
| 527 | parser.parse_file(fpath, fpath.parent, store) |
| 528 | store.create_node.assert_called_once() |
| 529 | label = store.create_node.call_args[0][0] |
| 530 | props = store.create_node.call_args[0][1] |
| 531 | assert label == NodeLabel.File |
| 532 | assert props["language"] == "bash" |
| 533 | finally: |
| 534 | fpath.unlink() |
| --- a/tests/test_chef_enricher.py | ||
| +++ b/tests/test_chef_enricher.py | ||
| @@ -0,0 +1,210 @@ | ||
| 1 | +"""Tests for navegador.enrichment.chef — ChefEnricher.""" | |
| 2 | + | |
| 3 | +from unittest.mock import MagicMock | |
| 4 | + | |
| 5 | +from navegador.enrichment.chef import ChefEnricher | |
| 6 | + | |
| 7 | + | |
| 8 | +def _make_store(query_results=None): | |
| 9 | + """Create a mock GraphStore. | |
| 10 | + | |
| 11 | + *query_results* maps Cypher query substrings to result_set lists. | |
| 12 | + Unmatched queries return an empty result_set. | |
| 13 | + """ | |
| 14 | + store = MagicMock() | |
| 15 | + mapping = query_results or {} | |
| 16 | + | |
| 17 | + def _side_effect(query, params=None): | |
| 18 | + result = MagicMock() | |
| 19 | + for substr, rows in mapping.items(): | |
| 20 | + if substr in query: | |
| 21 | + result.result_set = rows | |
| 22 | + return result | |
| 23 | + result.result_set = [] | |
| 24 | + return result | |
| 25 | + | |
| 26 | + store.query.side_effect = _side_effect | |
| 27 | + return store | |
| 28 | + | |
| 29 | + | |
| 30 | +class TestIdentity: | |
| 31 | + """Framework identity properties.""" | |
| 32 | + | |
| 33 | + def test_framework_name(self): | |
| 34 | + store = _make_store() | |
| 35 | + enricher = ChefEnricher(store) | |
| 36 | + assert enricher.framework_name == "chef" | |
| 37 | + | |
| 38 | + def test_detection_files(self): | |
| 39 | + store = _make_store() | |
| 40 | + enricher = ChefEnricher(store) | |
| 41 | + assert "metadata.rb" in enricher.detection_files | |
| 42 | + assert "Berksfile" in enricher.detection_files | |
| 43 | + | |
| 44 | + def test_detection_patterns(self): | |
| 45 | + store = _make_store() | |
| 46 | + enricher = ChefEnricher(store) | |
| 47 | + assert "chef" in enricher.detection_patterns | |
| 48 | + | |
| 49 | + | |
| 50 | +class TestDetect: | |
| 51 | + """Tests for detect() — framework presence detection.""" | |
| 52 | + | |
| 53 | + def test_detect_true_when_metadata_rb_exists(self): | |
| 54 | + store = _make_store( | |
| 55 | + { | |
| 56 | + "f.name = $name": [[1]], | |
| 57 | + } | |
| 58 | + ) | |
| 59 | + enricher = ChefEnricher(store) | |
| 60 | + assert enricher.detect() is True | |
| 61 | + | |
| 62 | + def test_detect_false_when_no_markers(self): | |
| 63 | + store = _make_store() | |
| 64 | + enricher = ChefEnricher(store) | |
| 65 | + assert enricher.detect() is False | |
| 66 | + | |
| 67 | + def test_detect_true_via_import_pattern(self): | |
| 68 | + store = _make_store( | |
| 69 | + { | |
| 70 | + "n.name = $name OR n.module = $name": [[1]], | |
| 71 | + } | |
| 72 | + ) | |
| 73 | + enricher = ChefEnricher(store) | |
| 74 | + assert enricher.detect() is True | |
| 75 | + | |
| 76 | + | |
| 77 | +class TestEnrichRecipes: | |
| 78 | + """Tests for enrich() promoting recipe files.""" | |
| 79 | + | |
| 80 | + def test_promotes_recipe_files(self): | |
| 81 | + store = _make_store( | |
| 82 | + { | |
| 83 | + "n.file_path CONTAINS $pattern": [ | |
| 84 | + ["default.rb", "cookbooks/web/recipes/default.rb"], | |
| 85 | + ["install.rb", "cookbooks/web/recipes/install.rb"], | |
| 86 | + ], | |
| 87 | + } | |
| 88 | + ) | |
| 89 | + enricher = ChefEnricher(store) | |
| 90 | + result = enricher.enrich() | |
| 91 | + | |
| 92 | + assert result.patterns_found["recipes"] == 2 | |
| 93 | + assert result.promoted >= 2 | |
| 94 | + | |
| 95 | + # Verify _promote_node was called via store.query SET | |
| 96 | + set_calls = [c for c in store.query.call_args_list if "SET n.semantic_type" in str(c)] | |
| 97 | + assert len(set_calls) >= 2 | |
| 98 | + | |
| 99 | + | |
| 100 | +class TestEnrichResources: | |
| 101 | + """Tests for enrich() promoting Chef resource calls.""" | |
| 102 | + | |
| 103 | + def test_promotes_resource_functions(self): | |
| 104 | + # _enrich_resources queries twice (recipes/ and libraries/), | |
| 105 | + # so we use a custom side_effect to return data only once. | |
| 106 | + call_count = {"resource": 0} | |
| 107 | + original_results = [ | |
| 108 | + ["package", "cookbooks/web/recipes/default.rb"], | |
| 109 | + ["template", "cookbooks/web/recipes/default.rb"], | |
| 110 | + ["not_a_resource", "cookbooks/web/recipes/default.rb"], | |
| 111 | + ] | |
| 112 | + | |
| 113 | + def _side_effect(query, params=None): | |
| 114 | + result = MagicMock() | |
| 115 | + if "(n:Function OR n:Method)" in query: | |
| 116 | + call_count["resource"] += 1 | |
| 117 | + if call_count["resource"] == 1: | |
| 118 | + result.result_set = original_results | |
| 119 | + else: | |
| 120 | + result.result_set = [] | |
| 121 | + else: | |
| 122 | + result.result_set = [] | |
| 123 | + return result | |
| 124 | + | |
| 125 | + store = MagicMock() | |
| 126 | + store.query.side_effect = _side_effect | |
| 127 | + enricher = ChefEnricher(store) | |
| 128 | + result = enricher.enrich() | |
| 129 | + | |
| 130 | + # "package" and "template" match, "not_a_resource" does not | |
| 131 | + assert result.patterns_found["resources"] == 2 | |
| 132 | + | |
| 133 | + def test_skips_non_resource_functions(self): | |
| 134 | + store = _make_store( | |
| 135 | + { | |
| 136 | + "(n:Function OR n:Method)": [ | |
| 137 | + ["my_helper", "cookbooks/web/libraries/helpers.rb"], | |
| 138 | + ], | |
| 139 | + } | |
| 140 | + ) | |
| 141 | + enricher = ChefEnricher(store) | |
| 142 | + result = enricher.enrich() | |
| 143 | + | |
| 144 | + assert result.patterns_found["resources"] == 0 | |
| 145 | + | |
| 146 | + | |
| 147 | +class TestEnrichIncludeRecipe: | |
| 148 | + """Tests for enrich() handling include_recipe edges.""" | |
| 149 | + | |
| 150 | + def test_creates_depends_on_edge(self): | |
| 151 | + # Strategy 1: follow CALLS edges from include_recipe nodes | |
| 152 | + def _query_side_effect(query, params=None): | |
| 153 | + result = MagicMock() | |
| 154 | + if "[:CALLS]" in query and "n.name = $name" in query: | |
| 155 | + result.result_set = [ | |
| 156 | + [ | |
| 157 | + "cookbooks/web/recipes/default.rb", | |
| 158 | + "database::install", | |
| 159 | + ], | |
| 160 | + ] | |
| 161 | + elif "f.file_path CONTAINS $recipes" in query: | |
| 162 | + result.result_set = [["install.rb"]] | |
| 163 | + elif "f.file_path = $path" in query: | |
| 164 | + result.result_set = [["default.rb"]] | |
| 165 | + elif "MERGE" in query: | |
| 166 | + result.result_set = [] | |
| 167 | + else: | |
| 168 | + result.result_set = [] | |
| 169 | + return result | |
| 170 | + | |
| 171 | + store = MagicMock() | |
| 172 | + store.query.side_effect = _query_side_effect | |
| 173 | + enricher = ChefEnricher(store) | |
| 174 | + result = enricher.enrich() | |
| 175 | + | |
| 176 | + assert result.edges_added >= 1 | |
| 177 | + assert result.patterns_found["include_recipe"] >= 1 | |
| 178 | + | |
| 179 | + # Verify MERGE query was issued for the DEPENDS_ON edge | |
| 180 | + merge_calls = [ | |
| 181 | + c for c in store.query.call_args_list if "MERGE" in str(c) and "DEPENDS_ON" in str(c) | |
| 182 | + ] | |
| 183 | + assert len(merge_calls) >= 1 | |
| 184 | + | |
| 185 | + def test_no_edges_when_no_include_recipe(self): | |
| 186 | + store = _make_store() | |
| 187 | + enricher = ChefEnricher(store) | |
| 188 | + result = enricher.enrich() | |
| 189 | + | |
| 190 | + assert result.edges_added == 0 | |
| 191 | + assert result.patterns_found["include_recipe"] == 0 | |
| 192 | + | |
| 193 | + | |
| 194 | +class TestEnrichCookbooks: | |
| 195 | + """Tests for enrich() promoting cookbook metadata files.""" | |
| 196 | + | |
| 197 | + def test_promotes_metadata_rb(self): | |
| 198 | + store = _make_store( | |
| 199 | + { | |
| 200 | + "n.name = $name": [ | |
| 201 | + ["metadata.rb", "cookbooks/web/metadata.rb"], | |
| 202 | + ], | |
| 203 | + } | |
| 204 | + ) | |
| 205 | + enricher = ChefEnricher(store) | |
| 206 | + result = enricher.enrich() | |
| 207 | + | |
| 208 | + assert result.patterns_found["cookbooks"] == 1 | |
| 209 | + set_calls = [c for c in store.query.call_args_list if "chef_cookbook" in str(c)] | |
| 210 | + assert len(set_calls) >= 1 |
| --- a/tests/test_chef_enricher.py | |
| +++ b/tests/test_chef_enricher.py | |
| @@ -0,0 +1,210 @@ | |
| --- a/tests/test_chef_enricher.py | |
| +++ b/tests/test_chef_enricher.py | |
| @@ -0,0 +1,210 @@ | |
| 1 | """Tests for navegador.enrichment.chef — ChefEnricher.""" |
| 2 | |
| 3 | from unittest.mock import MagicMock |
| 4 | |
| 5 | from navegador.enrichment.chef import ChefEnricher |
| 6 | |
| 7 | |
| 8 | def _make_store(query_results=None): |
| 9 | """Create a mock GraphStore. |
| 10 | |
| 11 | *query_results* maps Cypher query substrings to result_set lists. |
| 12 | Unmatched queries return an empty result_set. |
| 13 | """ |
| 14 | store = MagicMock() |
| 15 | mapping = query_results or {} |
| 16 | |
| 17 | def _side_effect(query, params=None): |
| 18 | result = MagicMock() |
| 19 | for substr, rows in mapping.items(): |
| 20 | if substr in query: |
| 21 | result.result_set = rows |
| 22 | return result |
| 23 | result.result_set = [] |
| 24 | return result |
| 25 | |
| 26 | store.query.side_effect = _side_effect |
| 27 | return store |
| 28 | |
| 29 | |
| 30 | class TestIdentity: |
| 31 | """Framework identity properties.""" |
| 32 | |
| 33 | def test_framework_name(self): |
| 34 | store = _make_store() |
| 35 | enricher = ChefEnricher(store) |
| 36 | assert enricher.framework_name == "chef" |
| 37 | |
| 38 | def test_detection_files(self): |
| 39 | store = _make_store() |
| 40 | enricher = ChefEnricher(store) |
| 41 | assert "metadata.rb" in enricher.detection_files |
| 42 | assert "Berksfile" in enricher.detection_files |
| 43 | |
| 44 | def test_detection_patterns(self): |
| 45 | store = _make_store() |
| 46 | enricher = ChefEnricher(store) |
| 47 | assert "chef" in enricher.detection_patterns |
| 48 | |
| 49 | |
| 50 | class TestDetect: |
| 51 | """Tests for detect() — framework presence detection.""" |
| 52 | |
| 53 | def test_detect_true_when_metadata_rb_exists(self): |
| 54 | store = _make_store( |
| 55 | { |
| 56 | "f.name = $name": [[1]], |
| 57 | } |
| 58 | ) |
| 59 | enricher = ChefEnricher(store) |
| 60 | assert enricher.detect() is True |
| 61 | |
| 62 | def test_detect_false_when_no_markers(self): |
| 63 | store = _make_store() |
| 64 | enricher = ChefEnricher(store) |
| 65 | assert enricher.detect() is False |
| 66 | |
| 67 | def test_detect_true_via_import_pattern(self): |
| 68 | store = _make_store( |
| 69 | { |
| 70 | "n.name = $name OR n.module = $name": [[1]], |
| 71 | } |
| 72 | ) |
| 73 | enricher = ChefEnricher(store) |
| 74 | assert enricher.detect() is True |
| 75 | |
| 76 | |
| 77 | class TestEnrichRecipes: |
| 78 | """Tests for enrich() promoting recipe files.""" |
| 79 | |
| 80 | def test_promotes_recipe_files(self): |
| 81 | store = _make_store( |
| 82 | { |
| 83 | "n.file_path CONTAINS $pattern": [ |
| 84 | ["default.rb", "cookbooks/web/recipes/default.rb"], |
| 85 | ["install.rb", "cookbooks/web/recipes/install.rb"], |
| 86 | ], |
| 87 | } |
| 88 | ) |
| 89 | enricher = ChefEnricher(store) |
| 90 | result = enricher.enrich() |
| 91 | |
| 92 | assert result.patterns_found["recipes"] == 2 |
| 93 | assert result.promoted >= 2 |
| 94 | |
| 95 | # Verify _promote_node was called via store.query SET |
| 96 | set_calls = [c for c in store.query.call_args_list if "SET n.semantic_type" in str(c)] |
| 97 | assert len(set_calls) >= 2 |
| 98 | |
| 99 | |
| 100 | class TestEnrichResources: |
| 101 | """Tests for enrich() promoting Chef resource calls.""" |
| 102 | |
| 103 | def test_promotes_resource_functions(self): |
| 104 | # _enrich_resources queries twice (recipes/ and libraries/), |
| 105 | # so we use a custom side_effect to return data only once. |
| 106 | call_count = {"resource": 0} |
| 107 | original_results = [ |
| 108 | ["package", "cookbooks/web/recipes/default.rb"], |
| 109 | ["template", "cookbooks/web/recipes/default.rb"], |
| 110 | ["not_a_resource", "cookbooks/web/recipes/default.rb"], |
| 111 | ] |
| 112 | |
| 113 | def _side_effect(query, params=None): |
| 114 | result = MagicMock() |
| 115 | if "(n:Function OR n:Method)" in query: |
| 116 | call_count["resource"] += 1 |
| 117 | if call_count["resource"] == 1: |
| 118 | result.result_set = original_results |
| 119 | else: |
| 120 | result.result_set = [] |
| 121 | else: |
| 122 | result.result_set = [] |
| 123 | return result |
| 124 | |
| 125 | store = MagicMock() |
| 126 | store.query.side_effect = _side_effect |
| 127 | enricher = ChefEnricher(store) |
| 128 | result = enricher.enrich() |
| 129 | |
| 130 | # "package" and "template" match, "not_a_resource" does not |
| 131 | assert result.patterns_found["resources"] == 2 |
| 132 | |
| 133 | def test_skips_non_resource_functions(self): |
| 134 | store = _make_store( |
| 135 | { |
| 136 | "(n:Function OR n:Method)": [ |
| 137 | ["my_helper", "cookbooks/web/libraries/helpers.rb"], |
| 138 | ], |
| 139 | } |
| 140 | ) |
| 141 | enricher = ChefEnricher(store) |
| 142 | result = enricher.enrich() |
| 143 | |
| 144 | assert result.patterns_found["resources"] == 0 |
| 145 | |
| 146 | |
| 147 | class TestEnrichIncludeRecipe: |
| 148 | """Tests for enrich() handling include_recipe edges.""" |
| 149 | |
| 150 | def test_creates_depends_on_edge(self): |
| 151 | # Strategy 1: follow CALLS edges from include_recipe nodes |
| 152 | def _query_side_effect(query, params=None): |
| 153 | result = MagicMock() |
| 154 | if "[:CALLS]" in query and "n.name = $name" in query: |
| 155 | result.result_set = [ |
| 156 | [ |
| 157 | "cookbooks/web/recipes/default.rb", |
| 158 | "database::install", |
| 159 | ], |
| 160 | ] |
| 161 | elif "f.file_path CONTAINS $recipes" in query: |
| 162 | result.result_set = [["install.rb"]] |
| 163 | elif "f.file_path = $path" in query: |
| 164 | result.result_set = [["default.rb"]] |
| 165 | elif "MERGE" in query: |
| 166 | result.result_set = [] |
| 167 | else: |
| 168 | result.result_set = [] |
| 169 | return result |
| 170 | |
| 171 | store = MagicMock() |
| 172 | store.query.side_effect = _query_side_effect |
| 173 | enricher = ChefEnricher(store) |
| 174 | result = enricher.enrich() |
| 175 | |
| 176 | assert result.edges_added >= 1 |
| 177 | assert result.patterns_found["include_recipe"] >= 1 |
| 178 | |
| 179 | # Verify MERGE query was issued for the DEPENDS_ON edge |
| 180 | merge_calls = [ |
| 181 | c for c in store.query.call_args_list if "MERGE" in str(c) and "DEPENDS_ON" in str(c) |
| 182 | ] |
| 183 | assert len(merge_calls) >= 1 |
| 184 | |
| 185 | def test_no_edges_when_no_include_recipe(self): |
| 186 | store = _make_store() |
| 187 | enricher = ChefEnricher(store) |
| 188 | result = enricher.enrich() |
| 189 | |
| 190 | assert result.edges_added == 0 |
| 191 | assert result.patterns_found["include_recipe"] == 0 |
| 192 | |
| 193 | |
| 194 | class TestEnrichCookbooks: |
| 195 | """Tests for enrich() promoting cookbook metadata files.""" |
| 196 | |
| 197 | def test_promotes_metadata_rb(self): |
| 198 | store = _make_store( |
| 199 | { |
| 200 | "n.name = $name": [ |
| 201 | ["metadata.rb", "cookbooks/web/metadata.rb"], |
| 202 | ], |
| 203 | } |
| 204 | ) |
| 205 | enricher = ChefEnricher(store) |
| 206 | result = enricher.enrich() |
| 207 | |
| 208 | assert result.patterns_found["cookbooks"] == 1 |
| 209 | set_calls = [c for c in store.query.call_args_list if "chef_cookbook" in str(c)] |
| 210 | assert len(set_calls) >= 1 |
| --- a/tests/test_hcl_parser.py | ||
| +++ b/tests/test_hcl_parser.py | ||
| @@ -0,0 +1,503 @@ | ||
| 1 | +"""Tests for navegador.ingestion.hcl — HCLParser internal methods.""" | |
| 2 | + | |
| 3 | +from unittest.mock import MagicMock, patch | |
| 4 | + | |
| 5 | +import pytest | |
| 6 | + | |
| 7 | +from navegador.graph.schema import EdgeType, NodeLabel | |
| 8 | + | |
| 9 | + | |
| 10 | +class MockNode: | |
| 11 | + _id_counter = 0 | |
| 12 | + | |
| 13 | + def __init__( | |
| 14 | + self, | |
| 15 | + type_: str, | |
| 16 | + text: bytes = b"", | |
| 17 | + children: list = None, | |
| 18 | + start_byte: int = 0, | |
| 19 | + end_byte: int = 0, | |
| 20 | + start_point: tuple = (0, 0), | |
| 21 | + end_point: tuple = (0, 0), | |
| 22 | + parent=None, | |
| 23 | + ): | |
| 24 | + MockNode._id_counter += 1 | |
| 25 | + self.id = MockNode._id_counter | |
| 26 | + self.type = type_ | |
| 27 | + self._text = text | |
| 28 | + self.children = children or [] | |
| 29 | + self.start_byte = start_byte | |
| 30 | + self.end_byte = end_byte | |
| 31 | + self.start_point = start_point | |
| 32 | + self.end_point = end_point | |
| 33 | + self.parent = parent | |
| 34 | + self._fields: dict = {} | |
| 35 | + for child in self.children: | |
| 36 | + child.parent = self | |
| 37 | + | |
| 38 | + def child_by_field_name(self, name: str): | |
| 39 | + return self._fields.get(name) | |
| 40 | + | |
| 41 | + def set_field(self, name: str, node): | |
| 42 | + self._fields[name] = node | |
| 43 | + node.parent = self | |
| 44 | + return self | |
| 45 | + | |
| 46 | + | |
| 47 | +def _text_node(text: bytes, type_: str = "identifier") -> MockNode: | |
| 48 | + return MockNode(type_, text, start_byte=0, end_byte=len(text)) | |
| 49 | + | |
| 50 | + | |
| 51 | +def _make_store(): | |
| 52 | + store = MagicMock() | |
| 53 | + store.query.return_value = MagicMock(result_set=[]) | |
| 54 | + return store | |
| 55 | + | |
| 56 | + | |
| 57 | +def _make_parser(): | |
| 58 | + from navegador.ingestion.hcl import HCLParser | |
| 59 | + | |
| 60 | + parser = HCLParser.__new__(HCLParser) | |
| 61 | + parser._parser = MagicMock() | |
| 62 | + return parser | |
| 63 | + | |
| 64 | + | |
| 65 | +class TestHCLGetLanguage: | |
| 66 | + def test_raises_when_not_installed(self): | |
| 67 | + from navegador.ingestion.hcl import _get_hcl_language | |
| 68 | + | |
| 69 | + with patch.dict( | |
| 70 | + "sys.modules", | |
| 71 | + { | |
| 72 | + "tree_sitter_hcl": None, | |
| 73 | + "tree_sitter": None, | |
| 74 | + }, | |
| 75 | + ): | |
| 76 | + with pytest.raises(ImportError, match="tree-sitter-hcl"): | |
| 77 | + _get_hcl_language() | |
| 78 | + | |
| 79 | + def test_returns_language_object(self): | |
| 80 | + from navegador.ingestion.hcl import _get_hcl_language | |
| 81 | + | |
| 82 | + mock_tshcl = MagicMock() | |
| 83 | + mock_ts = MagicMock() | |
| 84 | + with patch.dict( | |
| 85 | + "sys.modules", | |
| 86 | + { | |
| 87 | + "tree_sitter_hcl": mock_tshcl, | |
| 88 | + "tree_sitter": mock_ts, | |
| 89 | + }, | |
| 90 | + ): | |
| 91 | + result = _get_hcl_language() | |
| 92 | + assert result is mock_ts.Language.return_value | |
| 93 | + | |
| 94 | + | |
| 95 | +class TestHCLNodeText: | |
| 96 | + def test_extracts_bytes(self): | |
| 97 | + from navegador.ingestion.hcl import _node_text | |
| 98 | + | |
| 99 | + source = b'resource "aws_instance" "web" {}' | |
| 100 | + node = MockNode("identifier", start_byte=10, end_byte=22) | |
| 101 | + assert _node_text(node, source) == "aws_instance" | |
| 102 | + | |
| 103 | + | |
| 104 | +class TestHCLHandleResource: | |
| 105 | + def test_creates_class_node_with_semantic_type(self): | |
| 106 | + parser = _make_parser() | |
| 107 | + store = _make_store() | |
| 108 | + source = b'resource "aws_instance" "web" {}' | |
| 109 | + node = MockNode( | |
| 110 | + "block", | |
| 111 | + start_point=(0, 0), | |
| 112 | + end_point=(0, 30), | |
| 113 | + ) | |
| 114 | + labels = ["aws_instance", "web"] | |
| 115 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 116 | + parser._handle_resource(node, source, "main.tf", store, stats, labels, None) | |
| 117 | + assert stats["classes"] == 1 | |
| 118 | + assert stats["edges"] == 1 | |
| 119 | + store.create_node.assert_called_once() | |
| 120 | + label = store.create_node.call_args[0][0] | |
| 121 | + props = store.create_node.call_args[0][1] | |
| 122 | + assert label == NodeLabel.Class | |
| 123 | + assert props["name"] == "aws_instance.web" | |
| 124 | + assert props["semantic_type"] == "terraform_resource" | |
| 125 | + | |
| 126 | + def test_extracts_references_from_body(self): | |
| 127 | + parser = _make_parser() | |
| 128 | + store = _make_store() | |
| 129 | + source = b"var.region" | |
| 130 | + body = MockNode("body", start_byte=0, end_byte=10) | |
| 131 | + node = MockNode( | |
| 132 | + "block", | |
| 133 | + start_point=(0, 0), | |
| 134 | + end_point=(0, 30), | |
| 135 | + ) | |
| 136 | + labels = ["aws_instance", "web"] | |
| 137 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 138 | + parser._handle_resource(node, source, "main.tf", store, stats, labels, body) | |
| 139 | + # 1 CONTAINS edge + 1 REFERENCES edge from var.region | |
| 140 | + assert stats["edges"] == 2 | |
| 141 | + | |
| 142 | + | |
| 143 | +class TestHCLHandleVariable: | |
| 144 | + def test_creates_variable_node(self): | |
| 145 | + parser = _make_parser() | |
| 146 | + store = _make_store() | |
| 147 | + source = b'variable "region" {}' | |
| 148 | + node = MockNode( | |
| 149 | + "block", | |
| 150 | + start_point=(0, 0), | |
| 151 | + end_point=(0, 19), | |
| 152 | + ) | |
| 153 | + labels = ["region"] | |
| 154 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 155 | + parser._handle_variable(node, source, "vars.tf", store, stats, labels, None) | |
| 156 | + assert stats["functions"] == 1 | |
| 157 | + assert stats["edges"] == 1 | |
| 158 | + label = store.create_node.call_args[0][0] | |
| 159 | + props = store.create_node.call_args[0][1] | |
| 160 | + assert label == NodeLabel.Variable | |
| 161 | + assert props["name"] == "region" | |
| 162 | + assert props["semantic_type"] == "terraform_variable" | |
| 163 | + | |
| 164 | + | |
| 165 | +class TestHCLHandleModule: | |
| 166 | + def test_creates_module_node(self): | |
| 167 | + parser = _make_parser() | |
| 168 | + store = _make_store() | |
| 169 | + source = b'module "vpc" {}' | |
| 170 | + node = MockNode( | |
| 171 | + "block", | |
| 172 | + start_point=(0, 0), | |
| 173 | + end_point=(0, 14), | |
| 174 | + ) | |
| 175 | + labels = ["vpc"] | |
| 176 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 177 | + parser._handle_module(node, source, "main.tf", store, stats, labels, None) | |
| 178 | + assert stats["classes"] == 1 | |
| 179 | + assert stats["edges"] == 1 | |
| 180 | + label = store.create_node.call_args[0][0] | |
| 181 | + props = store.create_node.call_args[0][1] | |
| 182 | + assert label == NodeLabel.Module | |
| 183 | + assert props["name"] == "vpc" | |
| 184 | + assert props["semantic_type"] == "terraform_module" | |
| 185 | + | |
| 186 | + def test_extracts_source_attribute(self): | |
| 187 | + parser = _make_parser() | |
| 188 | + store = _make_store() | |
| 189 | + full_src = b"source./modules/vpc" | |
| 190 | + ident_node = MockNode( | |
| 191 | + "identifier", | |
| 192 | + start_byte=0, | |
| 193 | + end_byte=6, | |
| 194 | + ) | |
| 195 | + expr_node = MockNode( | |
| 196 | + "expression", | |
| 197 | + start_byte=6, | |
| 198 | + end_byte=19, | |
| 199 | + ) | |
| 200 | + expr_node.is_named = True | |
| 201 | + attr_node = MockNode( | |
| 202 | + "attribute", | |
| 203 | + children=[ident_node, expr_node], | |
| 204 | + ) | |
| 205 | + body_node = MockNode("body", children=[attr_node]) | |
| 206 | + node = MockNode( | |
| 207 | + "block", | |
| 208 | + start_point=(0, 0), | |
| 209 | + end_point=(0, 30), | |
| 210 | + ) | |
| 211 | + labels = ["vpc"] | |
| 212 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 213 | + parser._handle_module(node, full_src, "main.tf", store, stats, labels, body_node) | |
| 214 | + props = store.create_node.call_args[0][1] | |
| 215 | + assert props["source"] == "./modules/vpc" | |
| 216 | + | |
| 217 | + | |
| 218 | +class TestHCLHandleOutput: | |
| 219 | + def test_creates_variable_node(self): | |
| 220 | + parser = _make_parser() | |
| 221 | + store = _make_store() | |
| 222 | + source = b'output "vpc_id" {}' | |
| 223 | + node = MockNode( | |
| 224 | + "block", | |
| 225 | + start_point=(0, 0), | |
| 226 | + end_point=(0, 17), | |
| 227 | + ) | |
| 228 | + labels = ["vpc_id"] | |
| 229 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 230 | + parser._handle_output(node, source, "outputs.tf", store, stats, labels, None) | |
| 231 | + assert stats["functions"] == 1 | |
| 232 | + assert stats["edges"] == 1 | |
| 233 | + label = store.create_node.call_args[0][0] | |
| 234 | + props = store.create_node.call_args[0][1] | |
| 235 | + assert label == NodeLabel.Variable | |
| 236 | + assert props["semantic_type"] == "terraform_output" | |
| 237 | + | |
| 238 | + def test_extracts_references_from_body(self): | |
| 239 | + parser = _make_parser() | |
| 240 | + store = _make_store() | |
| 241 | + source = b"module.vpc" | |
| 242 | + body = MockNode("body", start_byte=0, end_byte=10) | |
| 243 | + node = MockNode( | |
| 244 | + "block", | |
| 245 | + start_point=(0, 0), | |
| 246 | + end_point=(0, 17), | |
| 247 | + ) | |
| 248 | + labels = ["vpc_id"] | |
| 249 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 250 | + parser._handle_output(node, source, "outputs.tf", store, stats, labels, body) | |
| 251 | + # 1 CONTAINS + 1 REFERENCES (module.vpc) | |
| 252 | + assert stats["edges"] == 2 | |
| 253 | + | |
| 254 | + | |
| 255 | +class TestHCLHandleProvider: | |
| 256 | + def test_creates_class_node(self): | |
| 257 | + parser = _make_parser() | |
| 258 | + store = _make_store() | |
| 259 | + source = b'provider "aws" {}' | |
| 260 | + node = MockNode( | |
| 261 | + "block", | |
| 262 | + start_point=(0, 0), | |
| 263 | + end_point=(0, 16), | |
| 264 | + ) | |
| 265 | + labels = ["aws"] | |
| 266 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 267 | + parser._handle_provider(node, source, "provider.tf", store, stats, labels, None) | |
| 268 | + assert stats["classes"] == 1 | |
| 269 | + assert stats["edges"] == 1 | |
| 270 | + label = store.create_node.call_args[0][0] | |
| 271 | + props = store.create_node.call_args[0][1] | |
| 272 | + assert label == NodeLabel.Class | |
| 273 | + assert props["name"] == "aws" | |
| 274 | + assert props["semantic_type"] == "terraform_provider" | |
| 275 | + | |
| 276 | + | |
| 277 | +class TestHCLHandleLocals: | |
| 278 | + def test_creates_variable_nodes(self): | |
| 279 | + parser = _make_parser() | |
| 280 | + store = _make_store() | |
| 281 | + source = b"region" | |
| 282 | + ident = MockNode( | |
| 283 | + "identifier", | |
| 284 | + start_byte=0, | |
| 285 | + end_byte=6, | |
| 286 | + ) | |
| 287 | + attr = MockNode( | |
| 288 | + "attribute", | |
| 289 | + children=[ident], | |
| 290 | + start_point=(1, 0), | |
| 291 | + end_point=(1, 20), | |
| 292 | + ) | |
| 293 | + body = MockNode("body", children=[attr]) | |
| 294 | + node = MockNode( | |
| 295 | + "block", | |
| 296 | + start_point=(0, 0), | |
| 297 | + end_point=(2, 1), | |
| 298 | + ) | |
| 299 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 300 | + parser._handle_locals(node, source, "locals.tf", store, stats, body) | |
| 301 | + assert stats["functions"] == 1 | |
| 302 | + assert stats["edges"] >= 1 | |
| 303 | + label = store.create_node.call_args[0][0] | |
| 304 | + props = store.create_node.call_args[0][1] | |
| 305 | + assert label == NodeLabel.Variable | |
| 306 | + assert props["semantic_type"] == "terraform_local" | |
| 307 | + | |
| 308 | + def test_skips_when_no_body(self): | |
| 309 | + parser = _make_parser() | |
| 310 | + store = _make_store() | |
| 311 | + node = MockNode("block", start_point=(0, 0), end_point=(0, 5)) | |
| 312 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 313 | + parser._handle_locals(node, b"", "locals.tf", store, stats, None) | |
| 314 | + assert stats["functions"] == 0 | |
| 315 | + store.create_node.assert_not_called() | |
| 316 | + | |
| 317 | + | |
| 318 | +class TestHCLWalkDispatch: | |
| 319 | + def test_walk_dispatches_block_in_body(self): | |
| 320 | + parser = _make_parser() | |
| 321 | + store = _make_store() | |
| 322 | + # Build: root > body > block(variable "region") | |
| 323 | + source = b'variable "region" {}' | |
| 324 | + ident = MockNode( | |
| 325 | + "identifier", | |
| 326 | + start_byte=0, | |
| 327 | + end_byte=8, | |
| 328 | + ) | |
| 329 | + string_lit_inner = MockNode( | |
| 330 | + "template_literal", | |
| 331 | + start_byte=10, | |
| 332 | + end_byte=16, | |
| 333 | + ) | |
| 334 | + string_lit = MockNode( | |
| 335 | + "string_lit", | |
| 336 | + children=[string_lit_inner], | |
| 337 | + start_byte=9, | |
| 338 | + end_byte=17, | |
| 339 | + ) | |
| 340 | + block = MockNode( | |
| 341 | + "block", | |
| 342 | + children=[ident, string_lit], | |
| 343 | + start_point=(0, 0), | |
| 344 | + end_point=(0, 19), | |
| 345 | + ) | |
| 346 | + body = MockNode("body", children=[block]) | |
| 347 | + root = MockNode("config_file", children=[body]) | |
| 348 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 349 | + parser._walk(root, source, "vars.tf", store, stats) | |
| 350 | + assert stats["functions"] == 1 | |
| 351 | + | |
| 352 | + def test_walk_dispatches_top_level_block(self): | |
| 353 | + parser = _make_parser() | |
| 354 | + store = _make_store() | |
| 355 | + source = b'provider "aws" {}' | |
| 356 | + ident = MockNode( | |
| 357 | + "identifier", | |
| 358 | + start_byte=0, | |
| 359 | + end_byte=8, | |
| 360 | + ) | |
| 361 | + string_lit_inner = MockNode( | |
| 362 | + "template_literal", | |
| 363 | + start_byte=10, | |
| 364 | + end_byte=13, | |
| 365 | + ) | |
| 366 | + string_lit = MockNode( | |
| 367 | + "string_lit", | |
| 368 | + children=[string_lit_inner], | |
| 369 | + start_byte=9, | |
| 370 | + end_byte=14, | |
| 371 | + ) | |
| 372 | + block = MockNode( | |
| 373 | + "block", | |
| 374 | + children=[ident, string_lit], | |
| 375 | + start_point=(0, 0), | |
| 376 | + end_point=(0, 16), | |
| 377 | + ) | |
| 378 | + root = MockNode("config_file", children=[block]) | |
| 379 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 380 | + parser._walk(root, source, "main.tf", store, stats) | |
| 381 | + assert stats["classes"] == 1 | |
| 382 | + | |
| 383 | + | |
| 384 | +class TestHCLExtractReferences: | |
| 385 | + def test_finds_var_reference(self): | |
| 386 | + parser = _make_parser() | |
| 387 | + store = _make_store() | |
| 388 | + source = b"var.region" | |
| 389 | + node = MockNode("body", start_byte=0, end_byte=10) | |
| 390 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 391 | + parser._extract_references( | |
| 392 | + node, | |
| 393 | + source, | |
| 394 | + "main.tf", | |
| 395 | + "aws_instance.web", | |
| 396 | + NodeLabel.Class, | |
| 397 | + store, | |
| 398 | + stats, | |
| 399 | + ) | |
| 400 | + assert stats["edges"] == 1 | |
| 401 | + edge_call = store.create_edge.call_args[0] | |
| 402 | + assert edge_call[2] == EdgeType.REFERENCES | |
| 403 | + assert edge_call[4]["name"] == "region" | |
| 404 | + | |
| 405 | + def test_finds_resource_reference(self): | |
| 406 | + parser = _make_parser() | |
| 407 | + store = _make_store() | |
| 408 | + source = b"aws_security_group.default" | |
| 409 | + node = MockNode("body", start_byte=0, end_byte=25) | |
| 410 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 411 | + parser._extract_references( | |
| 412 | + node, | |
| 413 | + source, | |
| 414 | + "main.tf", | |
| 415 | + "aws_instance.web", | |
| 416 | + NodeLabel.Class, | |
| 417 | + store, | |
| 418 | + stats, | |
| 419 | + ) | |
| 420 | + assert stats["edges"] == 1 | |
| 421 | + edge_call = store.create_edge.call_args[0] | |
| 422 | + assert edge_call[2] == EdgeType.DEPENDS_ON | |
| 423 | + | |
| 424 | + def test_finds_local_reference(self): | |
| 425 | + parser = _make_parser() | |
| 426 | + store = _make_store() | |
| 427 | + source = b"local.common_tags" | |
| 428 | + node = MockNode("body", start_byte=0, end_byte=17) | |
| 429 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 430 | + parser._extract_references( | |
| 431 | + node, | |
| 432 | + source, | |
| 433 | + "main.tf", | |
| 434 | + "aws_instance.web", | |
| 435 | + NodeLabel.Class, | |
| 436 | + store, | |
| 437 | + stats, | |
| 438 | + ) | |
| 439 | + assert stats["edges"] == 1 | |
| 440 | + | |
| 441 | + def test_finds_module_reference(self): | |
| 442 | + parser = _make_parser() | |
| 443 | + store = _make_store() | |
| 444 | + source = b"module.vpc" | |
| 445 | + node = MockNode("body", start_byte=0, end_byte=10) | |
| 446 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 447 | + parser._extract_references( | |
| 448 | + node, | |
| 449 | + source, | |
| 450 | + "main.tf", | |
| 451 | + "output_vpc", | |
| 452 | + NodeLabel.Variable, | |
| 453 | + store, | |
| 454 | + stats, | |
| 455 | + ) | |
| 456 | + assert stats["edges"] == 1 | |
| 457 | + edge_call = store.create_edge.call_args[0] | |
| 458 | + assert edge_call[3] == NodeLabel.Module | |
| 459 | + | |
| 460 | + def test_finds_data_reference(self): | |
| 461 | + parser = _make_parser() | |
| 462 | + store = _make_store() | |
| 463 | + source = b"data.http.myip" | |
| 464 | + node = MockNode("body", start_byte=0, end_byte=14) | |
| 465 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 466 | + parser._extract_references( | |
| 467 | + node, | |
| 468 | + source, | |
| 469 | + "main.tf", | |
| 470 | + "aws_instance.web", | |
| 471 | + NodeLabel.Class, | |
| 472 | + store, | |
| 473 | + stats, | |
| 474 | + ) | |
| 475 | + assert stats["edges"] == 1 | |
| 476 | + edge_call = store.create_edge.call_args[0] | |
| 477 | + assert edge_call[2] == EdgeType.DEPENDS_ON | |
| 478 | + assert edge_call[4]["name"] == "http.myip" | |
| 479 | + | |
| 480 | + | |
| 481 | +class TestHCLParseFile: | |
| 482 | + def test_creates_file_node(self): | |
| 483 | + import tempfile | |
| 484 | + from pathlib import Path | |
| 485 | + | |
| 486 | + parser = _make_parser() | |
| 487 | + store = _make_store() | |
| 488 | + mock_tree = MagicMock() | |
| 489 | + mock_tree.root_node.type = "config_file" | |
| 490 | + mock_tree.root_node.children = [] | |
| 491 | + parser._parser.parse.return_value = mock_tree | |
| 492 | + with tempfile.NamedTemporaryFile(suffix=".tf", delete=False) as f: | |
| 493 | + f.write(b'resource "aws_instance" "web" {}\n') | |
| 494 | + fpath = Path(f.name) | |
| 495 | + try: | |
| 496 | + parser.parse_file(fpath, fpath.parent, store) | |
| 497 | + store.create_node.assert_called_once() | |
| 498 | + label = store.create_node.call_args[0][0] | |
| 499 | + props = store.create_node.call_args[0][1] | |
| 500 | + assert label == NodeLabel.File | |
| 501 | + assert props["language"] == "hcl" | |
| 502 | + finally: | |
| 503 | + fpath.unlink() |
| --- a/tests/test_hcl_parser.py | |
| +++ b/tests/test_hcl_parser.py | |
| @@ -0,0 +1,503 @@ | |
| --- a/tests/test_hcl_parser.py | |
| +++ b/tests/test_hcl_parser.py | |
| @@ -0,0 +1,503 @@ | |
| 1 | """Tests for navegador.ingestion.hcl — HCLParser internal methods.""" |
| 2 | |
| 3 | from unittest.mock import MagicMock, patch |
| 4 | |
| 5 | import pytest |
| 6 | |
| 7 | from navegador.graph.schema import EdgeType, NodeLabel |
| 8 | |
| 9 | |
| 10 | class MockNode: |
| 11 | _id_counter = 0 |
| 12 | |
| 13 | def __init__( |
| 14 | self, |
| 15 | type_: str, |
| 16 | text: bytes = b"", |
| 17 | children: list = None, |
| 18 | start_byte: int = 0, |
| 19 | end_byte: int = 0, |
| 20 | start_point: tuple = (0, 0), |
| 21 | end_point: tuple = (0, 0), |
| 22 | parent=None, |
| 23 | ): |
| 24 | MockNode._id_counter += 1 |
| 25 | self.id = MockNode._id_counter |
| 26 | self.type = type_ |
| 27 | self._text = text |
| 28 | self.children = children or [] |
| 29 | self.start_byte = start_byte |
| 30 | self.end_byte = end_byte |
| 31 | self.start_point = start_point |
| 32 | self.end_point = end_point |
| 33 | self.parent = parent |
| 34 | self._fields: dict = {} |
| 35 | for child in self.children: |
| 36 | child.parent = self |
| 37 | |
| 38 | def child_by_field_name(self, name: str): |
| 39 | return self._fields.get(name) |
| 40 | |
| 41 | def set_field(self, name: str, node): |
| 42 | self._fields[name] = node |
| 43 | node.parent = self |
| 44 | return self |
| 45 | |
| 46 | |
| 47 | def _text_node(text: bytes, type_: str = "identifier") -> MockNode: |
| 48 | return MockNode(type_, text, start_byte=0, end_byte=len(text)) |
| 49 | |
| 50 | |
| 51 | def _make_store(): |
| 52 | store = MagicMock() |
| 53 | store.query.return_value = MagicMock(result_set=[]) |
| 54 | return store |
| 55 | |
| 56 | |
| 57 | def _make_parser(): |
| 58 | from navegador.ingestion.hcl import HCLParser |
| 59 | |
| 60 | parser = HCLParser.__new__(HCLParser) |
| 61 | parser._parser = MagicMock() |
| 62 | return parser |
| 63 | |
| 64 | |
| 65 | class TestHCLGetLanguage: |
| 66 | def test_raises_when_not_installed(self): |
| 67 | from navegador.ingestion.hcl import _get_hcl_language |
| 68 | |
| 69 | with patch.dict( |
| 70 | "sys.modules", |
| 71 | { |
| 72 | "tree_sitter_hcl": None, |
| 73 | "tree_sitter": None, |
| 74 | }, |
| 75 | ): |
| 76 | with pytest.raises(ImportError, match="tree-sitter-hcl"): |
| 77 | _get_hcl_language() |
| 78 | |
| 79 | def test_returns_language_object(self): |
| 80 | from navegador.ingestion.hcl import _get_hcl_language |
| 81 | |
| 82 | mock_tshcl = MagicMock() |
| 83 | mock_ts = MagicMock() |
| 84 | with patch.dict( |
| 85 | "sys.modules", |
| 86 | { |
| 87 | "tree_sitter_hcl": mock_tshcl, |
| 88 | "tree_sitter": mock_ts, |
| 89 | }, |
| 90 | ): |
| 91 | result = _get_hcl_language() |
| 92 | assert result is mock_ts.Language.return_value |
| 93 | |
| 94 | |
| 95 | class TestHCLNodeText: |
| 96 | def test_extracts_bytes(self): |
| 97 | from navegador.ingestion.hcl import _node_text |
| 98 | |
| 99 | source = b'resource "aws_instance" "web" {}' |
| 100 | node = MockNode("identifier", start_byte=10, end_byte=22) |
| 101 | assert _node_text(node, source) == "aws_instance" |
| 102 | |
| 103 | |
| 104 | class TestHCLHandleResource: |
| 105 | def test_creates_class_node_with_semantic_type(self): |
| 106 | parser = _make_parser() |
| 107 | store = _make_store() |
| 108 | source = b'resource "aws_instance" "web" {}' |
| 109 | node = MockNode( |
| 110 | "block", |
| 111 | start_point=(0, 0), |
| 112 | end_point=(0, 30), |
| 113 | ) |
| 114 | labels = ["aws_instance", "web"] |
| 115 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 116 | parser._handle_resource(node, source, "main.tf", store, stats, labels, None) |
| 117 | assert stats["classes"] == 1 |
| 118 | assert stats["edges"] == 1 |
| 119 | store.create_node.assert_called_once() |
| 120 | label = store.create_node.call_args[0][0] |
| 121 | props = store.create_node.call_args[0][1] |
| 122 | assert label == NodeLabel.Class |
| 123 | assert props["name"] == "aws_instance.web" |
| 124 | assert props["semantic_type"] == "terraform_resource" |
| 125 | |
| 126 | def test_extracts_references_from_body(self): |
| 127 | parser = _make_parser() |
| 128 | store = _make_store() |
| 129 | source = b"var.region" |
| 130 | body = MockNode("body", start_byte=0, end_byte=10) |
| 131 | node = MockNode( |
| 132 | "block", |
| 133 | start_point=(0, 0), |
| 134 | end_point=(0, 30), |
| 135 | ) |
| 136 | labels = ["aws_instance", "web"] |
| 137 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 138 | parser._handle_resource(node, source, "main.tf", store, stats, labels, body) |
| 139 | # 1 CONTAINS edge + 1 REFERENCES edge from var.region |
| 140 | assert stats["edges"] == 2 |
| 141 | |
| 142 | |
| 143 | class TestHCLHandleVariable: |
| 144 | def test_creates_variable_node(self): |
| 145 | parser = _make_parser() |
| 146 | store = _make_store() |
| 147 | source = b'variable "region" {}' |
| 148 | node = MockNode( |
| 149 | "block", |
| 150 | start_point=(0, 0), |
| 151 | end_point=(0, 19), |
| 152 | ) |
| 153 | labels = ["region"] |
| 154 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 155 | parser._handle_variable(node, source, "vars.tf", store, stats, labels, None) |
| 156 | assert stats["functions"] == 1 |
| 157 | assert stats["edges"] == 1 |
| 158 | label = store.create_node.call_args[0][0] |
| 159 | props = store.create_node.call_args[0][1] |
| 160 | assert label == NodeLabel.Variable |
| 161 | assert props["name"] == "region" |
| 162 | assert props["semantic_type"] == "terraform_variable" |
| 163 | |
| 164 | |
| 165 | class TestHCLHandleModule: |
| 166 | def test_creates_module_node(self): |
| 167 | parser = _make_parser() |
| 168 | store = _make_store() |
| 169 | source = b'module "vpc" {}' |
| 170 | node = MockNode( |
| 171 | "block", |
| 172 | start_point=(0, 0), |
| 173 | end_point=(0, 14), |
| 174 | ) |
| 175 | labels = ["vpc"] |
| 176 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 177 | parser._handle_module(node, source, "main.tf", store, stats, labels, None) |
| 178 | assert stats["classes"] == 1 |
| 179 | assert stats["edges"] == 1 |
| 180 | label = store.create_node.call_args[0][0] |
| 181 | props = store.create_node.call_args[0][1] |
| 182 | assert label == NodeLabel.Module |
| 183 | assert props["name"] == "vpc" |
| 184 | assert props["semantic_type"] == "terraform_module" |
| 185 | |
| 186 | def test_extracts_source_attribute(self): |
| 187 | parser = _make_parser() |
| 188 | store = _make_store() |
| 189 | full_src = b"source./modules/vpc" |
| 190 | ident_node = MockNode( |
| 191 | "identifier", |
| 192 | start_byte=0, |
| 193 | end_byte=6, |
| 194 | ) |
| 195 | expr_node = MockNode( |
| 196 | "expression", |
| 197 | start_byte=6, |
| 198 | end_byte=19, |
| 199 | ) |
| 200 | expr_node.is_named = True |
| 201 | attr_node = MockNode( |
| 202 | "attribute", |
| 203 | children=[ident_node, expr_node], |
| 204 | ) |
| 205 | body_node = MockNode("body", children=[attr_node]) |
| 206 | node = MockNode( |
| 207 | "block", |
| 208 | start_point=(0, 0), |
| 209 | end_point=(0, 30), |
| 210 | ) |
| 211 | labels = ["vpc"] |
| 212 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 213 | parser._handle_module(node, full_src, "main.tf", store, stats, labels, body_node) |
| 214 | props = store.create_node.call_args[0][1] |
| 215 | assert props["source"] == "./modules/vpc" |
| 216 | |
| 217 | |
| 218 | class TestHCLHandleOutput: |
| 219 | def test_creates_variable_node(self): |
| 220 | parser = _make_parser() |
| 221 | store = _make_store() |
| 222 | source = b'output "vpc_id" {}' |
| 223 | node = MockNode( |
| 224 | "block", |
| 225 | start_point=(0, 0), |
| 226 | end_point=(0, 17), |
| 227 | ) |
| 228 | labels = ["vpc_id"] |
| 229 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 230 | parser._handle_output(node, source, "outputs.tf", store, stats, labels, None) |
| 231 | assert stats["functions"] == 1 |
| 232 | assert stats["edges"] == 1 |
| 233 | label = store.create_node.call_args[0][0] |
| 234 | props = store.create_node.call_args[0][1] |
| 235 | assert label == NodeLabel.Variable |
| 236 | assert props["semantic_type"] == "terraform_output" |
| 237 | |
| 238 | def test_extracts_references_from_body(self): |
| 239 | parser = _make_parser() |
| 240 | store = _make_store() |
| 241 | source = b"module.vpc" |
| 242 | body = MockNode("body", start_byte=0, end_byte=10) |
| 243 | node = MockNode( |
| 244 | "block", |
| 245 | start_point=(0, 0), |
| 246 | end_point=(0, 17), |
| 247 | ) |
| 248 | labels = ["vpc_id"] |
| 249 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 250 | parser._handle_output(node, source, "outputs.tf", store, stats, labels, body) |
| 251 | # 1 CONTAINS + 1 REFERENCES (module.vpc) |
| 252 | assert stats["edges"] == 2 |
| 253 | |
| 254 | |
| 255 | class TestHCLHandleProvider: |
| 256 | def test_creates_class_node(self): |
| 257 | parser = _make_parser() |
| 258 | store = _make_store() |
| 259 | source = b'provider "aws" {}' |
| 260 | node = MockNode( |
| 261 | "block", |
| 262 | start_point=(0, 0), |
| 263 | end_point=(0, 16), |
| 264 | ) |
| 265 | labels = ["aws"] |
| 266 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 267 | parser._handle_provider(node, source, "provider.tf", store, stats, labels, None) |
| 268 | assert stats["classes"] == 1 |
| 269 | assert stats["edges"] == 1 |
| 270 | label = store.create_node.call_args[0][0] |
| 271 | props = store.create_node.call_args[0][1] |
| 272 | assert label == NodeLabel.Class |
| 273 | assert props["name"] == "aws" |
| 274 | assert props["semantic_type"] == "terraform_provider" |
| 275 | |
| 276 | |
| 277 | class TestHCLHandleLocals: |
| 278 | def test_creates_variable_nodes(self): |
| 279 | parser = _make_parser() |
| 280 | store = _make_store() |
| 281 | source = b"region" |
| 282 | ident = MockNode( |
| 283 | "identifier", |
| 284 | start_byte=0, |
| 285 | end_byte=6, |
| 286 | ) |
| 287 | attr = MockNode( |
| 288 | "attribute", |
| 289 | children=[ident], |
| 290 | start_point=(1, 0), |
| 291 | end_point=(1, 20), |
| 292 | ) |
| 293 | body = MockNode("body", children=[attr]) |
| 294 | node = MockNode( |
| 295 | "block", |
| 296 | start_point=(0, 0), |
| 297 | end_point=(2, 1), |
| 298 | ) |
| 299 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 300 | parser._handle_locals(node, source, "locals.tf", store, stats, body) |
| 301 | assert stats["functions"] == 1 |
| 302 | assert stats["edges"] >= 1 |
| 303 | label = store.create_node.call_args[0][0] |
| 304 | props = store.create_node.call_args[0][1] |
| 305 | assert label == NodeLabel.Variable |
| 306 | assert props["semantic_type"] == "terraform_local" |
| 307 | |
| 308 | def test_skips_when_no_body(self): |
| 309 | parser = _make_parser() |
| 310 | store = _make_store() |
| 311 | node = MockNode("block", start_point=(0, 0), end_point=(0, 5)) |
| 312 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 313 | parser._handle_locals(node, b"", "locals.tf", store, stats, None) |
| 314 | assert stats["functions"] == 0 |
| 315 | store.create_node.assert_not_called() |
| 316 | |
| 317 | |
| 318 | class TestHCLWalkDispatch: |
| 319 | def test_walk_dispatches_block_in_body(self): |
| 320 | parser = _make_parser() |
| 321 | store = _make_store() |
| 322 | # Build: root > body > block(variable "region") |
| 323 | source = b'variable "region" {}' |
| 324 | ident = MockNode( |
| 325 | "identifier", |
| 326 | start_byte=0, |
| 327 | end_byte=8, |
| 328 | ) |
| 329 | string_lit_inner = MockNode( |
| 330 | "template_literal", |
| 331 | start_byte=10, |
| 332 | end_byte=16, |
| 333 | ) |
| 334 | string_lit = MockNode( |
| 335 | "string_lit", |
| 336 | children=[string_lit_inner], |
| 337 | start_byte=9, |
| 338 | end_byte=17, |
| 339 | ) |
| 340 | block = MockNode( |
| 341 | "block", |
| 342 | children=[ident, string_lit], |
| 343 | start_point=(0, 0), |
| 344 | end_point=(0, 19), |
| 345 | ) |
| 346 | body = MockNode("body", children=[block]) |
| 347 | root = MockNode("config_file", children=[body]) |
| 348 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 349 | parser._walk(root, source, "vars.tf", store, stats) |
| 350 | assert stats["functions"] == 1 |
| 351 | |
| 352 | def test_walk_dispatches_top_level_block(self): |
| 353 | parser = _make_parser() |
| 354 | store = _make_store() |
| 355 | source = b'provider "aws" {}' |
| 356 | ident = MockNode( |
| 357 | "identifier", |
| 358 | start_byte=0, |
| 359 | end_byte=8, |
| 360 | ) |
| 361 | string_lit_inner = MockNode( |
| 362 | "template_literal", |
| 363 | start_byte=10, |
| 364 | end_byte=13, |
| 365 | ) |
| 366 | string_lit = MockNode( |
| 367 | "string_lit", |
| 368 | children=[string_lit_inner], |
| 369 | start_byte=9, |
| 370 | end_byte=14, |
| 371 | ) |
| 372 | block = MockNode( |
| 373 | "block", |
| 374 | children=[ident, string_lit], |
| 375 | start_point=(0, 0), |
| 376 | end_point=(0, 16), |
| 377 | ) |
| 378 | root = MockNode("config_file", children=[block]) |
| 379 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 380 | parser._walk(root, source, "main.tf", store, stats) |
| 381 | assert stats["classes"] == 1 |
| 382 | |
| 383 | |
| 384 | class TestHCLExtractReferences: |
| 385 | def test_finds_var_reference(self): |
| 386 | parser = _make_parser() |
| 387 | store = _make_store() |
| 388 | source = b"var.region" |
| 389 | node = MockNode("body", start_byte=0, end_byte=10) |
| 390 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 391 | parser._extract_references( |
| 392 | node, |
| 393 | source, |
| 394 | "main.tf", |
| 395 | "aws_instance.web", |
| 396 | NodeLabel.Class, |
| 397 | store, |
| 398 | stats, |
| 399 | ) |
| 400 | assert stats["edges"] == 1 |
| 401 | edge_call = store.create_edge.call_args[0] |
| 402 | assert edge_call[2] == EdgeType.REFERENCES |
| 403 | assert edge_call[4]["name"] == "region" |
| 404 | |
| 405 | def test_finds_resource_reference(self): |
| 406 | parser = _make_parser() |
| 407 | store = _make_store() |
| 408 | source = b"aws_security_group.default" |
| 409 | node = MockNode("body", start_byte=0, end_byte=25) |
| 410 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 411 | parser._extract_references( |
| 412 | node, |
| 413 | source, |
| 414 | "main.tf", |
| 415 | "aws_instance.web", |
| 416 | NodeLabel.Class, |
| 417 | store, |
| 418 | stats, |
| 419 | ) |
| 420 | assert stats["edges"] == 1 |
| 421 | edge_call = store.create_edge.call_args[0] |
| 422 | assert edge_call[2] == EdgeType.DEPENDS_ON |
| 423 | |
| 424 | def test_finds_local_reference(self): |
| 425 | parser = _make_parser() |
| 426 | store = _make_store() |
| 427 | source = b"local.common_tags" |
| 428 | node = MockNode("body", start_byte=0, end_byte=17) |
| 429 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 430 | parser._extract_references( |
| 431 | node, |
| 432 | source, |
| 433 | "main.tf", |
| 434 | "aws_instance.web", |
| 435 | NodeLabel.Class, |
| 436 | store, |
| 437 | stats, |
| 438 | ) |
| 439 | assert stats["edges"] == 1 |
| 440 | |
| 441 | def test_finds_module_reference(self): |
| 442 | parser = _make_parser() |
| 443 | store = _make_store() |
| 444 | source = b"module.vpc" |
| 445 | node = MockNode("body", start_byte=0, end_byte=10) |
| 446 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 447 | parser._extract_references( |
| 448 | node, |
| 449 | source, |
| 450 | "main.tf", |
| 451 | "output_vpc", |
| 452 | NodeLabel.Variable, |
| 453 | store, |
| 454 | stats, |
| 455 | ) |
| 456 | assert stats["edges"] == 1 |
| 457 | edge_call = store.create_edge.call_args[0] |
| 458 | assert edge_call[3] == NodeLabel.Module |
| 459 | |
| 460 | def test_finds_data_reference(self): |
| 461 | parser = _make_parser() |
| 462 | store = _make_store() |
| 463 | source = b"data.http.myip" |
| 464 | node = MockNode("body", start_byte=0, end_byte=14) |
| 465 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 466 | parser._extract_references( |
| 467 | node, |
| 468 | source, |
| 469 | "main.tf", |
| 470 | "aws_instance.web", |
| 471 | NodeLabel.Class, |
| 472 | store, |
| 473 | stats, |
| 474 | ) |
| 475 | assert stats["edges"] == 1 |
| 476 | edge_call = store.create_edge.call_args[0] |
| 477 | assert edge_call[2] == EdgeType.DEPENDS_ON |
| 478 | assert edge_call[4]["name"] == "http.myip" |
| 479 | |
| 480 | |
| 481 | class TestHCLParseFile: |
| 482 | def test_creates_file_node(self): |
| 483 | import tempfile |
| 484 | from pathlib import Path |
| 485 | |
| 486 | parser = _make_parser() |
| 487 | store = _make_store() |
| 488 | mock_tree = MagicMock() |
| 489 | mock_tree.root_node.type = "config_file" |
| 490 | mock_tree.root_node.children = [] |
| 491 | parser._parser.parse.return_value = mock_tree |
| 492 | with tempfile.NamedTemporaryFile(suffix=".tf", delete=False) as f: |
| 493 | f.write(b'resource "aws_instance" "web" {}\n') |
| 494 | fpath = Path(f.name) |
| 495 | try: |
| 496 | parser.parse_file(fpath, fpath.parent, store) |
| 497 | store.create_node.assert_called_once() |
| 498 | label = store.create_node.call_args[0][0] |
| 499 | props = store.create_node.call_args[0][1] |
| 500 | assert label == NodeLabel.File |
| 501 | assert props["language"] == "hcl" |
| 502 | finally: |
| 503 | fpath.unlink() |
| --- a/tests/test_puppet_parser.py | ||
| +++ b/tests/test_puppet_parser.py | ||
| @@ -0,0 +1,509 @@ | ||
| 1 | +"""Tests for navegador.ingestion.puppet — PuppetParser internal methods.""" | |
| 2 | + | |
| 3 | +from unittest.mock import MagicMock, patch | |
| 4 | + | |
| 5 | +import pytest | |
| 6 | + | |
| 7 | +from navegador.graph.schema import NodeLabel | |
| 8 | + | |
| 9 | + | |
| 10 | +class MockNode: | |
| 11 | + _id_counter = 0 | |
| 12 | + | |
| 13 | + def __init__( | |
| 14 | + self, | |
| 15 | + type_: str, | |
| 16 | + text: bytes = b"", | |
| 17 | + children: list = None, | |
| 18 | + start_byte: int = 0, | |
| 19 | + end_byte: int = 0, | |
| 20 | + start_point: tuple = (0, 0), | |
| 21 | + end_point: tuple = (0, 0), | |
| 22 | + parent=None, | |
| 23 | + ): | |
| 24 | + MockNode._id_counter += 1 | |
| 25 | + self.id = MockNode._id_counter | |
| 26 | + self.type = type_ | |
| 27 | + self._text = text | |
| 28 | + self.children = children or [] | |
| 29 | + self.start_byte = start_byte | |
| 30 | + self.end_byte = end_byte | |
| 31 | + self.start_point = start_point | |
| 32 | + self.end_point = end_point | |
| 33 | + self.parent = parent | |
| 34 | + self._fields: dict = {} | |
| 35 | + for child in self.children: | |
| 36 | + child.parent = self | |
| 37 | + | |
| 38 | + def child_by_field_name(self, name: str): | |
| 39 | + return self._fields.get(name) | |
| 40 | + | |
| 41 | + def set_field(self, name: str, node): | |
| 42 | + self._fields[name] = node | |
| 43 | + node.parent = self | |
| 44 | + return self | |
| 45 | + | |
| 46 | + | |
| 47 | +def _text_node(text: bytes, type_: str = "identifier") -> MockNode: | |
| 48 | + return MockNode(type_, text, start_byte=0, end_byte=len(text)) | |
| 49 | + | |
| 50 | + | |
| 51 | +def _make_store(): | |
| 52 | + store = MagicMock() | |
| 53 | + store.query.return_value = MagicMock(result_set=[]) | |
| 54 | + return store | |
| 55 | + | |
| 56 | + | |
| 57 | +def _make_parser(): | |
| 58 | + from navegador.ingestion.puppet import PuppetParser | |
| 59 | + | |
| 60 | + parser = PuppetParser.__new__(PuppetParser) | |
| 61 | + parser._parser = MagicMock() | |
| 62 | + return parser | |
| 63 | + | |
| 64 | + | |
| 65 | +class TestPuppetGetLanguage: | |
| 66 | + def test_raises_when_not_installed(self): | |
| 67 | + from navegador.ingestion.puppet import _get_puppet_language | |
| 68 | + | |
| 69 | + with patch.dict( | |
| 70 | + "sys.modules", | |
| 71 | + { | |
| 72 | + "tree_sitter_puppet": None, | |
| 73 | + "tree_sitter": None, | |
| 74 | + }, | |
| 75 | + ): | |
| 76 | + with pytest.raises(ImportError, match="tree-sitter-puppet"): | |
| 77 | + _get_puppet_language() | |
| 78 | + | |
| 79 | + def test_returns_language_object(self): | |
| 80 | + from navegador.ingestion.puppet import _get_puppet_language | |
| 81 | + | |
| 82 | + mock_tspuppet = MagicMock() | |
| 83 | + mock_ts = MagicMock() | |
| 84 | + with patch.dict( | |
| 85 | + "sys.modules", | |
| 86 | + { | |
| 87 | + "tree_sitter_puppet": mock_tspuppet, | |
| 88 | + "tree_sitter": mock_ts, | |
| 89 | + }, | |
| 90 | + ): | |
| 91 | + result = _get_puppet_language() | |
| 92 | + assert result is mock_ts.Language.return_value | |
| 93 | + | |
| 94 | + | |
| 95 | +class TestPuppetHandleClass: | |
| 96 | + def test_creates_class_with_puppet_class_semantic_type(self): | |
| 97 | + parser = _make_parser() | |
| 98 | + store = _make_store() | |
| 99 | + source = b"nginx" | |
| 100 | + class_ident = MockNode( | |
| 101 | + "class_identifier", | |
| 102 | + children=[ | |
| 103 | + MockNode( | |
| 104 | + "identifier", | |
| 105 | + start_byte=0, | |
| 106 | + end_byte=5, | |
| 107 | + ), | |
| 108 | + ], | |
| 109 | + start_byte=0, | |
| 110 | + end_byte=5, | |
| 111 | + ) | |
| 112 | + node = MockNode( | |
| 113 | + "class_definition", | |
| 114 | + children=[class_ident], | |
| 115 | + start_point=(0, 0), | |
| 116 | + end_point=(5, 1), | |
| 117 | + ) | |
| 118 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 119 | + parser._handle_class(node, source, "nginx.pp", store, stats) | |
| 120 | + assert stats["classes"] == 1 | |
| 121 | + assert stats["edges"] == 1 | |
| 122 | + label = store.create_node.call_args[0][0] | |
| 123 | + props = store.create_node.call_args[0][1] | |
| 124 | + assert label == NodeLabel.Class | |
| 125 | + assert props["name"] == "nginx" | |
| 126 | + assert props["semantic_type"] == "puppet_class" | |
| 127 | + | |
| 128 | + def test_skips_when_no_class_identifier(self): | |
| 129 | + parser = _make_parser() | |
| 130 | + store = _make_store() | |
| 131 | + node = MockNode( | |
| 132 | + "class_definition", | |
| 133 | + children=[], | |
| 134 | + start_point=(0, 0), | |
| 135 | + end_point=(0, 5), | |
| 136 | + ) | |
| 137 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 138 | + parser._handle_class(node, b"", "test.pp", store, stats) | |
| 139 | + assert stats["classes"] == 0 | |
| 140 | + store.create_node.assert_not_called() | |
| 141 | + | |
| 142 | + | |
| 143 | +class TestPuppetHandleDefinedType: | |
| 144 | + def test_creates_class_with_puppet_defined_type(self): | |
| 145 | + parser = _make_parser() | |
| 146 | + store = _make_store() | |
| 147 | + source = b"nginx::vhost" | |
| 148 | + class_ident = MockNode( | |
| 149 | + "class_identifier", | |
| 150 | + children=[ | |
| 151 | + MockNode( | |
| 152 | + "identifier", | |
| 153 | + start_byte=0, | |
| 154 | + end_byte=5, | |
| 155 | + ), | |
| 156 | + MockNode( | |
| 157 | + "identifier", | |
| 158 | + start_byte=7, | |
| 159 | + end_byte=12, | |
| 160 | + ), | |
| 161 | + ], | |
| 162 | + start_byte=0, | |
| 163 | + end_byte=12, | |
| 164 | + ) | |
| 165 | + node = MockNode( | |
| 166 | + "defined_resource_type", | |
| 167 | + children=[class_ident], | |
| 168 | + start_point=(0, 0), | |
| 169 | + end_point=(3, 1), | |
| 170 | + ) | |
| 171 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 172 | + parser._handle_defined_type(node, source, "vhost.pp", store, stats) | |
| 173 | + assert stats["classes"] == 1 | |
| 174 | + label = store.create_node.call_args[0][0] | |
| 175 | + props = store.create_node.call_args[0][1] | |
| 176 | + assert label == NodeLabel.Class | |
| 177 | + assert props["name"] == "nginx::vhost" | |
| 178 | + assert props["semantic_type"] == "puppet_defined_type" | |
| 179 | + | |
| 180 | + | |
| 181 | +class TestPuppetHandleNode: | |
| 182 | + def test_creates_class_with_puppet_node(self): | |
| 183 | + parser = _make_parser() | |
| 184 | + store = _make_store() | |
| 185 | + source = b"'webserver'" | |
| 186 | + string_node = MockNode( | |
| 187 | + "string", | |
| 188 | + start_byte=0, | |
| 189 | + end_byte=11, | |
| 190 | + ) | |
| 191 | + node_name = MockNode( | |
| 192 | + "node_name", | |
| 193 | + children=[string_node], | |
| 194 | + ) | |
| 195 | + node = MockNode( | |
| 196 | + "node_definition", | |
| 197 | + children=[node_name], | |
| 198 | + start_point=(0, 0), | |
| 199 | + end_point=(3, 1), | |
| 200 | + ) | |
| 201 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 202 | + parser._handle_node(node, source, "nodes.pp", store, stats) | |
| 203 | + assert stats["classes"] == 1 | |
| 204 | + label = store.create_node.call_args[0][0] | |
| 205 | + props = store.create_node.call_args[0][1] | |
| 206 | + assert label == NodeLabel.Class | |
| 207 | + assert props["name"] == "webserver" | |
| 208 | + assert props["semantic_type"] == "puppet_node" | |
| 209 | + | |
| 210 | + def test_skips_when_no_node_name(self): | |
| 211 | + parser = _make_parser() | |
| 212 | + store = _make_store() | |
| 213 | + node = MockNode( | |
| 214 | + "node_definition", | |
| 215 | + children=[], | |
| 216 | + start_point=(0, 0), | |
| 217 | + end_point=(0, 5), | |
| 218 | + ) | |
| 219 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 220 | + parser._handle_node(node, b"", "nodes.pp", store, stats) | |
| 221 | + assert stats["classes"] == 0 | |
| 222 | + | |
| 223 | + | |
| 224 | +class TestPuppetHandleResource: | |
| 225 | + def test_creates_function_with_puppet_resource(self): | |
| 226 | + parser = _make_parser() | |
| 227 | + store = _make_store() | |
| 228 | + source = b"package 'nginx'" | |
| 229 | + ident = MockNode( | |
| 230 | + "identifier", | |
| 231 | + start_byte=0, | |
| 232 | + end_byte=7, | |
| 233 | + ) | |
| 234 | + title = MockNode( | |
| 235 | + "string", | |
| 236 | + start_byte=8, | |
| 237 | + end_byte=15, | |
| 238 | + ) | |
| 239 | + node = MockNode( | |
| 240 | + "resource_declaration", | |
| 241 | + children=[ident, title], | |
| 242 | + start_point=(1, 0), | |
| 243 | + end_point=(3, 1), | |
| 244 | + ) | |
| 245 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 246 | + parser._handle_resource(node, source, "nginx.pp", "nginx", store, stats) | |
| 247 | + assert stats["functions"] == 1 | |
| 248 | + assert stats["edges"] == 1 | |
| 249 | + label = store.create_node.call_args[0][0] | |
| 250 | + props = store.create_node.call_args[0][1] | |
| 251 | + assert label == NodeLabel.Function | |
| 252 | + assert props["name"] == "package[nginx]" | |
| 253 | + assert props["semantic_type"] == "puppet_resource" | |
| 254 | + | |
| 255 | + def test_skips_when_no_type_identifier(self): | |
| 256 | + parser = _make_parser() | |
| 257 | + store = _make_store() | |
| 258 | + node = MockNode( | |
| 259 | + "resource_declaration", | |
| 260 | + children=[], | |
| 261 | + start_point=(0, 0), | |
| 262 | + end_point=(0, 5), | |
| 263 | + ) | |
| 264 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 265 | + parser._handle_resource(node, b"", "test.pp", "myclass", store, stats) | |
| 266 | + assert stats["functions"] == 0 | |
| 267 | + | |
| 268 | + | |
| 269 | +class TestPuppetHandleInclude: | |
| 270 | + def test_creates_import_node(self): | |
| 271 | + parser = _make_parser() | |
| 272 | + store = _make_store() | |
| 273 | + source = b"stdlib" | |
| 274 | + class_ident = MockNode( | |
| 275 | + "class_identifier", | |
| 276 | + children=[ | |
| 277 | + MockNode( | |
| 278 | + "identifier", | |
| 279 | + start_byte=0, | |
| 280 | + end_byte=6, | |
| 281 | + ), | |
| 282 | + ], | |
| 283 | + ) | |
| 284 | + node = MockNode( | |
| 285 | + "include_statement", | |
| 286 | + children=[class_ident], | |
| 287 | + start_point=(0, 0), | |
| 288 | + end_point=(0, 14), | |
| 289 | + ) | |
| 290 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 291 | + parser._handle_include(node, source, "init.pp", store, stats) | |
| 292 | + assert stats["edges"] == 1 | |
| 293 | + label = store.create_node.call_args[0][0] | |
| 294 | + props = store.create_node.call_args[0][1] | |
| 295 | + assert label == NodeLabel.Import | |
| 296 | + assert props["name"] == "stdlib" | |
| 297 | + assert props["semantic_type"] == "puppet_include" | |
| 298 | + | |
| 299 | + def test_skips_when_no_class_identifier(self): | |
| 300 | + parser = _make_parser() | |
| 301 | + store = _make_store() | |
| 302 | + node = MockNode( | |
| 303 | + "include_statement", | |
| 304 | + children=[], | |
| 305 | + start_point=(0, 0), | |
| 306 | + end_point=(0, 7), | |
| 307 | + ) | |
| 308 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 309 | + parser._handle_include(node, b"", "init.pp", store, stats) | |
| 310 | + assert stats["edges"] == 0 | |
| 311 | + store.create_node.assert_not_called() | |
| 312 | + | |
| 313 | + | |
| 314 | +class TestPuppetHandleParameters: | |
| 315 | + def test_creates_variable_nodes(self): | |
| 316 | + parser = _make_parser() | |
| 317 | + store = _make_store() | |
| 318 | + source = b"$port" | |
| 319 | + var_node = MockNode( | |
| 320 | + "variable", | |
| 321 | + start_byte=0, | |
| 322 | + end_byte=5, | |
| 323 | + ) | |
| 324 | + param = MockNode( | |
| 325 | + "parameter", | |
| 326 | + children=[var_node], | |
| 327 | + start_point=(1, 2), | |
| 328 | + end_point=(1, 7), | |
| 329 | + ) | |
| 330 | + param_list = MockNode( | |
| 331 | + "parameter_list", | |
| 332 | + children=[param], | |
| 333 | + ) | |
| 334 | + class_ident = MockNode( | |
| 335 | + "class_identifier", | |
| 336 | + children=[ | |
| 337 | + MockNode( | |
| 338 | + "identifier", | |
| 339 | + start_byte=0, | |
| 340 | + end_byte=5, | |
| 341 | + ), | |
| 342 | + ], | |
| 343 | + start_byte=0, | |
| 344 | + end_byte=5, | |
| 345 | + ) | |
| 346 | + node = MockNode( | |
| 347 | + "class_definition", | |
| 348 | + children=[class_ident, param_list], | |
| 349 | + start_point=(0, 0), | |
| 350 | + end_point=(5, 1), | |
| 351 | + ) | |
| 352 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 353 | + parser._extract_parameters(node, source, "nginx.pp", "nginx", store, stats) | |
| 354 | + store.create_node.assert_called_once() | |
| 355 | + label = store.create_node.call_args[0][0] | |
| 356 | + props = store.create_node.call_args[0][1] | |
| 357 | + assert label == NodeLabel.Variable | |
| 358 | + assert props["name"] == "port" | |
| 359 | + assert props["semantic_type"] == "puppet_parameter" | |
| 360 | + assert stats["edges"] == 1 | |
| 361 | + | |
| 362 | + def test_skips_param_without_variable(self): | |
| 363 | + parser = _make_parser() | |
| 364 | + store = _make_store() | |
| 365 | + param = MockNode( | |
| 366 | + "parameter", | |
| 367 | + children=[MockNode("type")], | |
| 368 | + start_point=(1, 2), | |
| 369 | + end_point=(1, 7), | |
| 370 | + ) | |
| 371 | + param_list = MockNode( | |
| 372 | + "parameter_list", | |
| 373 | + children=[param], | |
| 374 | + ) | |
| 375 | + node = MockNode( | |
| 376 | + "class_definition", | |
| 377 | + children=[param_list], | |
| 378 | + start_point=(0, 0), | |
| 379 | + end_point=(5, 1), | |
| 380 | + ) | |
| 381 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 382 | + parser._extract_parameters(node, b"", "test.pp", "myclass", store, stats) | |
| 383 | + store.create_node.assert_not_called() | |
| 384 | + | |
| 385 | + | |
| 386 | +class TestPuppetWalkDispatch: | |
| 387 | + def test_walk_dispatches_class_definition(self): | |
| 388 | + parser = _make_parser() | |
| 389 | + store = _make_store() | |
| 390 | + source = b"nginx" | |
| 391 | + class_ident = MockNode( | |
| 392 | + "class_identifier", | |
| 393 | + children=[ | |
| 394 | + MockNode( | |
| 395 | + "identifier", | |
| 396 | + start_byte=0, | |
| 397 | + end_byte=5, | |
| 398 | + ), | |
| 399 | + ], | |
| 400 | + ) | |
| 401 | + class_def = MockNode( | |
| 402 | + "class_definition", | |
| 403 | + children=[class_ident], | |
| 404 | + start_point=(0, 0), | |
| 405 | + end_point=(5, 1), | |
| 406 | + ) | |
| 407 | + root = MockNode("program", children=[class_def]) | |
| 408 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 409 | + parser._walk(root, source, "nginx.pp", store, stats) | |
| 410 | + assert stats["classes"] == 1 | |
| 411 | + | |
| 412 | + def test_walk_dispatches_defined_resource_type(self): | |
| 413 | + parser = _make_parser() | |
| 414 | + store = _make_store() | |
| 415 | + source = b"vhost" | |
| 416 | + class_ident = MockNode( | |
| 417 | + "class_identifier", | |
| 418 | + children=[ | |
| 419 | + MockNode( | |
| 420 | + "identifier", | |
| 421 | + start_byte=0, | |
| 422 | + end_byte=5, | |
| 423 | + ), | |
| 424 | + ], | |
| 425 | + ) | |
| 426 | + define_node = MockNode( | |
| 427 | + "defined_resource_type", | |
| 428 | + children=[class_ident], | |
| 429 | + start_point=(0, 0), | |
| 430 | + end_point=(3, 1), | |
| 431 | + ) | |
| 432 | + root = MockNode("program", children=[define_node]) | |
| 433 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 434 | + parser._walk(root, source, "vhost.pp", store, stats) | |
| 435 | + assert stats["classes"] == 1 | |
| 436 | + | |
| 437 | + def test_walk_dispatches_node_definition(self): | |
| 438 | + parser = _make_parser() | |
| 439 | + store = _make_store() | |
| 440 | + source = b"'webserver'" | |
| 441 | + string_node = MockNode( | |
| 442 | + "string", | |
| 443 | + start_byte=0, | |
| 444 | + end_byte=11, | |
| 445 | + ) | |
| 446 | + node_name = MockNode( | |
| 447 | + "node_name", | |
| 448 | + children=[string_node], | |
| 449 | + ) | |
| 450 | + node_def = MockNode( | |
| 451 | + "node_definition", | |
| 452 | + children=[node_name], | |
| 453 | + start_point=(0, 0), | |
| 454 | + end_point=(3, 1), | |
| 455 | + ) | |
| 456 | + root = MockNode("program", children=[node_def]) | |
| 457 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 458 | + parser._walk(root, source, "nodes.pp", store, stats) | |
| 459 | + assert stats["classes"] == 1 | |
| 460 | + | |
| 461 | + def test_walk_dispatches_include_statement(self): | |
| 462 | + parser = _make_parser() | |
| 463 | + store = _make_store() | |
| 464 | + source = b"stdlib" | |
| 465 | + class_ident = MockNode( | |
| 466 | + "class_identifier", | |
| 467 | + children=[ | |
| 468 | + MockNode( | |
| 469 | + "identifier", | |
| 470 | + start_byte=0, | |
| 471 | + end_byte=6, | |
| 472 | + ), | |
| 473 | + ], | |
| 474 | + ) | |
| 475 | + include = MockNode( | |
| 476 | + "include_statement", | |
| 477 | + children=[class_ident], | |
| 478 | + start_point=(0, 0), | |
| 479 | + end_point=(0, 14), | |
| 480 | + ) | |
| 481 | + root = MockNode("program", children=[include]) | |
| 482 | + stats = {"functions": 0, "classes": 0, "edges": 0} | |
| 483 | + parser._walk(root, source, "init.pp", store, stats) | |
| 484 | + assert stats["edges"] == 1 | |
| 485 | + | |
| 486 | + | |
| 487 | +class TestPuppetParseFile: | |
| 488 | + def test_creates_file_node(self): | |
| 489 | + import tempfile | |
| 490 | + from pathlib import Path | |
| 491 | + | |
| 492 | + parser = _make_parser() | |
| 493 | + store = _make_store() | |
| 494 | + mock_tree = MagicMock() | |
| 495 | + mock_tree.root_node.type = "program" | |
| 496 | + mock_tree.root_node.children = [] | |
| 497 | + parser._parser.parse.return_value = mock_tree | |
| 498 | + with tempfile.NamedTemporaryFile(suffix=".pp", delete=False) as f: | |
| 499 | + f.write(b"class nginx {}\n") | |
| 500 | + fpath = Path(f.name) | |
| 501 | + try: | |
| 502 | + parser.parse_file(fpath, fpath.parent, store) | |
| 503 | + store.create_node.assert_called_once() | |
| 504 | + label = store.create_node.call_args[0][0] | |
| 505 | + props = store.create_node.call_args[0][1] | |
| 506 | + assert label == NodeLabel.File | |
| 507 | + assert props["language"] == "puppet" | |
| 508 | + finally: | |
| 509 | + fpath.unlink() |
| --- a/tests/test_puppet_parser.py | |
| +++ b/tests/test_puppet_parser.py | |
| @@ -0,0 +1,509 @@ | |
| --- a/tests/test_puppet_parser.py | |
| +++ b/tests/test_puppet_parser.py | |
| @@ -0,0 +1,509 @@ | |
| 1 | """Tests for navegador.ingestion.puppet — PuppetParser internal methods.""" |
| 2 | |
| 3 | from unittest.mock import MagicMock, patch |
| 4 | |
| 5 | import pytest |
| 6 | |
| 7 | from navegador.graph.schema import NodeLabel |
| 8 | |
| 9 | |
| 10 | class MockNode: |
| 11 | _id_counter = 0 |
| 12 | |
| 13 | def __init__( |
| 14 | self, |
| 15 | type_: str, |
| 16 | text: bytes = b"", |
| 17 | children: list = None, |
| 18 | start_byte: int = 0, |
| 19 | end_byte: int = 0, |
| 20 | start_point: tuple = (0, 0), |
| 21 | end_point: tuple = (0, 0), |
| 22 | parent=None, |
| 23 | ): |
| 24 | MockNode._id_counter += 1 |
| 25 | self.id = MockNode._id_counter |
| 26 | self.type = type_ |
| 27 | self._text = text |
| 28 | self.children = children or [] |
| 29 | self.start_byte = start_byte |
| 30 | self.end_byte = end_byte |
| 31 | self.start_point = start_point |
| 32 | self.end_point = end_point |
| 33 | self.parent = parent |
| 34 | self._fields: dict = {} |
| 35 | for child in self.children: |
| 36 | child.parent = self |
| 37 | |
| 38 | def child_by_field_name(self, name: str): |
| 39 | return self._fields.get(name) |
| 40 | |
| 41 | def set_field(self, name: str, node): |
| 42 | self._fields[name] = node |
| 43 | node.parent = self |
| 44 | return self |
| 45 | |
| 46 | |
| 47 | def _text_node(text: bytes, type_: str = "identifier") -> MockNode: |
| 48 | return MockNode(type_, text, start_byte=0, end_byte=len(text)) |
| 49 | |
| 50 | |
| 51 | def _make_store(): |
| 52 | store = MagicMock() |
| 53 | store.query.return_value = MagicMock(result_set=[]) |
| 54 | return store |
| 55 | |
| 56 | |
| 57 | def _make_parser(): |
| 58 | from navegador.ingestion.puppet import PuppetParser |
| 59 | |
| 60 | parser = PuppetParser.__new__(PuppetParser) |
| 61 | parser._parser = MagicMock() |
| 62 | return parser |
| 63 | |
| 64 | |
| 65 | class TestPuppetGetLanguage: |
| 66 | def test_raises_when_not_installed(self): |
| 67 | from navegador.ingestion.puppet import _get_puppet_language |
| 68 | |
| 69 | with patch.dict( |
| 70 | "sys.modules", |
| 71 | { |
| 72 | "tree_sitter_puppet": None, |
| 73 | "tree_sitter": None, |
| 74 | }, |
| 75 | ): |
| 76 | with pytest.raises(ImportError, match="tree-sitter-puppet"): |
| 77 | _get_puppet_language() |
| 78 | |
| 79 | def test_returns_language_object(self): |
| 80 | from navegador.ingestion.puppet import _get_puppet_language |
| 81 | |
| 82 | mock_tspuppet = MagicMock() |
| 83 | mock_ts = MagicMock() |
| 84 | with patch.dict( |
| 85 | "sys.modules", |
| 86 | { |
| 87 | "tree_sitter_puppet": mock_tspuppet, |
| 88 | "tree_sitter": mock_ts, |
| 89 | }, |
| 90 | ): |
| 91 | result = _get_puppet_language() |
| 92 | assert result is mock_ts.Language.return_value |
| 93 | |
| 94 | |
| 95 | class TestPuppetHandleClass: |
| 96 | def test_creates_class_with_puppet_class_semantic_type(self): |
| 97 | parser = _make_parser() |
| 98 | store = _make_store() |
| 99 | source = b"nginx" |
| 100 | class_ident = MockNode( |
| 101 | "class_identifier", |
| 102 | children=[ |
| 103 | MockNode( |
| 104 | "identifier", |
| 105 | start_byte=0, |
| 106 | end_byte=5, |
| 107 | ), |
| 108 | ], |
| 109 | start_byte=0, |
| 110 | end_byte=5, |
| 111 | ) |
| 112 | node = MockNode( |
| 113 | "class_definition", |
| 114 | children=[class_ident], |
| 115 | start_point=(0, 0), |
| 116 | end_point=(5, 1), |
| 117 | ) |
| 118 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 119 | parser._handle_class(node, source, "nginx.pp", store, stats) |
| 120 | assert stats["classes"] == 1 |
| 121 | assert stats["edges"] == 1 |
| 122 | label = store.create_node.call_args[0][0] |
| 123 | props = store.create_node.call_args[0][1] |
| 124 | assert label == NodeLabel.Class |
| 125 | assert props["name"] == "nginx" |
| 126 | assert props["semantic_type"] == "puppet_class" |
| 127 | |
| 128 | def test_skips_when_no_class_identifier(self): |
| 129 | parser = _make_parser() |
| 130 | store = _make_store() |
| 131 | node = MockNode( |
| 132 | "class_definition", |
| 133 | children=[], |
| 134 | start_point=(0, 0), |
| 135 | end_point=(0, 5), |
| 136 | ) |
| 137 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 138 | parser._handle_class(node, b"", "test.pp", store, stats) |
| 139 | assert stats["classes"] == 0 |
| 140 | store.create_node.assert_not_called() |
| 141 | |
| 142 | |
| 143 | class TestPuppetHandleDefinedType: |
| 144 | def test_creates_class_with_puppet_defined_type(self): |
| 145 | parser = _make_parser() |
| 146 | store = _make_store() |
| 147 | source = b"nginx::vhost" |
| 148 | class_ident = MockNode( |
| 149 | "class_identifier", |
| 150 | children=[ |
| 151 | MockNode( |
| 152 | "identifier", |
| 153 | start_byte=0, |
| 154 | end_byte=5, |
| 155 | ), |
| 156 | MockNode( |
| 157 | "identifier", |
| 158 | start_byte=7, |
| 159 | end_byte=12, |
| 160 | ), |
| 161 | ], |
| 162 | start_byte=0, |
| 163 | end_byte=12, |
| 164 | ) |
| 165 | node = MockNode( |
| 166 | "defined_resource_type", |
| 167 | children=[class_ident], |
| 168 | start_point=(0, 0), |
| 169 | end_point=(3, 1), |
| 170 | ) |
| 171 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 172 | parser._handle_defined_type(node, source, "vhost.pp", store, stats) |
| 173 | assert stats["classes"] == 1 |
| 174 | label = store.create_node.call_args[0][0] |
| 175 | props = store.create_node.call_args[0][1] |
| 176 | assert label == NodeLabel.Class |
| 177 | assert props["name"] == "nginx::vhost" |
| 178 | assert props["semantic_type"] == "puppet_defined_type" |
| 179 | |
| 180 | |
| 181 | class TestPuppetHandleNode: |
| 182 | def test_creates_class_with_puppet_node(self): |
| 183 | parser = _make_parser() |
| 184 | store = _make_store() |
| 185 | source = b"'webserver'" |
| 186 | string_node = MockNode( |
| 187 | "string", |
| 188 | start_byte=0, |
| 189 | end_byte=11, |
| 190 | ) |
| 191 | node_name = MockNode( |
| 192 | "node_name", |
| 193 | children=[string_node], |
| 194 | ) |
| 195 | node = MockNode( |
| 196 | "node_definition", |
| 197 | children=[node_name], |
| 198 | start_point=(0, 0), |
| 199 | end_point=(3, 1), |
| 200 | ) |
| 201 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 202 | parser._handle_node(node, source, "nodes.pp", store, stats) |
| 203 | assert stats["classes"] == 1 |
| 204 | label = store.create_node.call_args[0][0] |
| 205 | props = store.create_node.call_args[0][1] |
| 206 | assert label == NodeLabel.Class |
| 207 | assert props["name"] == "webserver" |
| 208 | assert props["semantic_type"] == "puppet_node" |
| 209 | |
| 210 | def test_skips_when_no_node_name(self): |
| 211 | parser = _make_parser() |
| 212 | store = _make_store() |
| 213 | node = MockNode( |
| 214 | "node_definition", |
| 215 | children=[], |
| 216 | start_point=(0, 0), |
| 217 | end_point=(0, 5), |
| 218 | ) |
| 219 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 220 | parser._handle_node(node, b"", "nodes.pp", store, stats) |
| 221 | assert stats["classes"] == 0 |
| 222 | |
| 223 | |
| 224 | class TestPuppetHandleResource: |
| 225 | def test_creates_function_with_puppet_resource(self): |
| 226 | parser = _make_parser() |
| 227 | store = _make_store() |
| 228 | source = b"package 'nginx'" |
| 229 | ident = MockNode( |
| 230 | "identifier", |
| 231 | start_byte=0, |
| 232 | end_byte=7, |
| 233 | ) |
| 234 | title = MockNode( |
| 235 | "string", |
| 236 | start_byte=8, |
| 237 | end_byte=15, |
| 238 | ) |
| 239 | node = MockNode( |
| 240 | "resource_declaration", |
| 241 | children=[ident, title], |
| 242 | start_point=(1, 0), |
| 243 | end_point=(3, 1), |
| 244 | ) |
| 245 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 246 | parser._handle_resource(node, source, "nginx.pp", "nginx", store, stats) |
| 247 | assert stats["functions"] == 1 |
| 248 | assert stats["edges"] == 1 |
| 249 | label = store.create_node.call_args[0][0] |
| 250 | props = store.create_node.call_args[0][1] |
| 251 | assert label == NodeLabel.Function |
| 252 | assert props["name"] == "package[nginx]" |
| 253 | assert props["semantic_type"] == "puppet_resource" |
| 254 | |
| 255 | def test_skips_when_no_type_identifier(self): |
| 256 | parser = _make_parser() |
| 257 | store = _make_store() |
| 258 | node = MockNode( |
| 259 | "resource_declaration", |
| 260 | children=[], |
| 261 | start_point=(0, 0), |
| 262 | end_point=(0, 5), |
| 263 | ) |
| 264 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 265 | parser._handle_resource(node, b"", "test.pp", "myclass", store, stats) |
| 266 | assert stats["functions"] == 0 |
| 267 | |
| 268 | |
| 269 | class TestPuppetHandleInclude: |
| 270 | def test_creates_import_node(self): |
| 271 | parser = _make_parser() |
| 272 | store = _make_store() |
| 273 | source = b"stdlib" |
| 274 | class_ident = MockNode( |
| 275 | "class_identifier", |
| 276 | children=[ |
| 277 | MockNode( |
| 278 | "identifier", |
| 279 | start_byte=0, |
| 280 | end_byte=6, |
| 281 | ), |
| 282 | ], |
| 283 | ) |
| 284 | node = MockNode( |
| 285 | "include_statement", |
| 286 | children=[class_ident], |
| 287 | start_point=(0, 0), |
| 288 | end_point=(0, 14), |
| 289 | ) |
| 290 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 291 | parser._handle_include(node, source, "init.pp", store, stats) |
| 292 | assert stats["edges"] == 1 |
| 293 | label = store.create_node.call_args[0][0] |
| 294 | props = store.create_node.call_args[0][1] |
| 295 | assert label == NodeLabel.Import |
| 296 | assert props["name"] == "stdlib" |
| 297 | assert props["semantic_type"] == "puppet_include" |
| 298 | |
| 299 | def test_skips_when_no_class_identifier(self): |
| 300 | parser = _make_parser() |
| 301 | store = _make_store() |
| 302 | node = MockNode( |
| 303 | "include_statement", |
| 304 | children=[], |
| 305 | start_point=(0, 0), |
| 306 | end_point=(0, 7), |
| 307 | ) |
| 308 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 309 | parser._handle_include(node, b"", "init.pp", store, stats) |
| 310 | assert stats["edges"] == 0 |
| 311 | store.create_node.assert_not_called() |
| 312 | |
| 313 | |
| 314 | class TestPuppetHandleParameters: |
| 315 | def test_creates_variable_nodes(self): |
| 316 | parser = _make_parser() |
| 317 | store = _make_store() |
| 318 | source = b"$port" |
| 319 | var_node = MockNode( |
| 320 | "variable", |
| 321 | start_byte=0, |
| 322 | end_byte=5, |
| 323 | ) |
| 324 | param = MockNode( |
| 325 | "parameter", |
| 326 | children=[var_node], |
| 327 | start_point=(1, 2), |
| 328 | end_point=(1, 7), |
| 329 | ) |
| 330 | param_list = MockNode( |
| 331 | "parameter_list", |
| 332 | children=[param], |
| 333 | ) |
| 334 | class_ident = MockNode( |
| 335 | "class_identifier", |
| 336 | children=[ |
| 337 | MockNode( |
| 338 | "identifier", |
| 339 | start_byte=0, |
| 340 | end_byte=5, |
| 341 | ), |
| 342 | ], |
| 343 | start_byte=0, |
| 344 | end_byte=5, |
| 345 | ) |
| 346 | node = MockNode( |
| 347 | "class_definition", |
| 348 | children=[class_ident, param_list], |
| 349 | start_point=(0, 0), |
| 350 | end_point=(5, 1), |
| 351 | ) |
| 352 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 353 | parser._extract_parameters(node, source, "nginx.pp", "nginx", store, stats) |
| 354 | store.create_node.assert_called_once() |
| 355 | label = store.create_node.call_args[0][0] |
| 356 | props = store.create_node.call_args[0][1] |
| 357 | assert label == NodeLabel.Variable |
| 358 | assert props["name"] == "port" |
| 359 | assert props["semantic_type"] == "puppet_parameter" |
| 360 | assert stats["edges"] == 1 |
| 361 | |
| 362 | def test_skips_param_without_variable(self): |
| 363 | parser = _make_parser() |
| 364 | store = _make_store() |
| 365 | param = MockNode( |
| 366 | "parameter", |
| 367 | children=[MockNode("type")], |
| 368 | start_point=(1, 2), |
| 369 | end_point=(1, 7), |
| 370 | ) |
| 371 | param_list = MockNode( |
| 372 | "parameter_list", |
| 373 | children=[param], |
| 374 | ) |
| 375 | node = MockNode( |
| 376 | "class_definition", |
| 377 | children=[param_list], |
| 378 | start_point=(0, 0), |
| 379 | end_point=(5, 1), |
| 380 | ) |
| 381 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 382 | parser._extract_parameters(node, b"", "test.pp", "myclass", store, stats) |
| 383 | store.create_node.assert_not_called() |
| 384 | |
| 385 | |
| 386 | class TestPuppetWalkDispatch: |
| 387 | def test_walk_dispatches_class_definition(self): |
| 388 | parser = _make_parser() |
| 389 | store = _make_store() |
| 390 | source = b"nginx" |
| 391 | class_ident = MockNode( |
| 392 | "class_identifier", |
| 393 | children=[ |
| 394 | MockNode( |
| 395 | "identifier", |
| 396 | start_byte=0, |
| 397 | end_byte=5, |
| 398 | ), |
| 399 | ], |
| 400 | ) |
| 401 | class_def = MockNode( |
| 402 | "class_definition", |
| 403 | children=[class_ident], |
| 404 | start_point=(0, 0), |
| 405 | end_point=(5, 1), |
| 406 | ) |
| 407 | root = MockNode("program", children=[class_def]) |
| 408 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 409 | parser._walk(root, source, "nginx.pp", store, stats) |
| 410 | assert stats["classes"] == 1 |
| 411 | |
| 412 | def test_walk_dispatches_defined_resource_type(self): |
| 413 | parser = _make_parser() |
| 414 | store = _make_store() |
| 415 | source = b"vhost" |
| 416 | class_ident = MockNode( |
| 417 | "class_identifier", |
| 418 | children=[ |
| 419 | MockNode( |
| 420 | "identifier", |
| 421 | start_byte=0, |
| 422 | end_byte=5, |
| 423 | ), |
| 424 | ], |
| 425 | ) |
| 426 | define_node = MockNode( |
| 427 | "defined_resource_type", |
| 428 | children=[class_ident], |
| 429 | start_point=(0, 0), |
| 430 | end_point=(3, 1), |
| 431 | ) |
| 432 | root = MockNode("program", children=[define_node]) |
| 433 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 434 | parser._walk(root, source, "vhost.pp", store, stats) |
| 435 | assert stats["classes"] == 1 |
| 436 | |
| 437 | def test_walk_dispatches_node_definition(self): |
| 438 | parser = _make_parser() |
| 439 | store = _make_store() |
| 440 | source = b"'webserver'" |
| 441 | string_node = MockNode( |
| 442 | "string", |
| 443 | start_byte=0, |
| 444 | end_byte=11, |
| 445 | ) |
| 446 | node_name = MockNode( |
| 447 | "node_name", |
| 448 | children=[string_node], |
| 449 | ) |
| 450 | node_def = MockNode( |
| 451 | "node_definition", |
| 452 | children=[node_name], |
| 453 | start_point=(0, 0), |
| 454 | end_point=(3, 1), |
| 455 | ) |
| 456 | root = MockNode("program", children=[node_def]) |
| 457 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 458 | parser._walk(root, source, "nodes.pp", store, stats) |
| 459 | assert stats["classes"] == 1 |
| 460 | |
| 461 | def test_walk_dispatches_include_statement(self): |
| 462 | parser = _make_parser() |
| 463 | store = _make_store() |
| 464 | source = b"stdlib" |
| 465 | class_ident = MockNode( |
| 466 | "class_identifier", |
| 467 | children=[ |
| 468 | MockNode( |
| 469 | "identifier", |
| 470 | start_byte=0, |
| 471 | end_byte=6, |
| 472 | ), |
| 473 | ], |
| 474 | ) |
| 475 | include = MockNode( |
| 476 | "include_statement", |
| 477 | children=[class_ident], |
| 478 | start_point=(0, 0), |
| 479 | end_point=(0, 14), |
| 480 | ) |
| 481 | root = MockNode("program", children=[include]) |
| 482 | stats = {"functions": 0, "classes": 0, "edges": 0} |
| 483 | parser._walk(root, source, "init.pp", store, stats) |
| 484 | assert stats["edges"] == 1 |
| 485 | |
| 486 | |
| 487 | class TestPuppetParseFile: |
| 488 | def test_creates_file_node(self): |
| 489 | import tempfile |
| 490 | from pathlib import Path |
| 491 | |
| 492 | parser = _make_parser() |
| 493 | store = _make_store() |
| 494 | mock_tree = MagicMock() |
| 495 | mock_tree.root_node.type = "program" |
| 496 | mock_tree.root_node.children = [] |
| 497 | parser._parser.parse.return_value = mock_tree |
| 498 | with tempfile.NamedTemporaryFile(suffix=".pp", delete=False) as f: |
| 499 | f.write(b"class nginx {}\n") |
| 500 | fpath = Path(f.name) |
| 501 | try: |
| 502 | parser.parse_file(fpath, fpath.parent, store) |
| 503 | store.create_node.assert_called_once() |
| 504 | label = store.create_node.call_args[0][0] |
| 505 | props = store.create_node.call_args[0][1] |
| 506 | assert label == NodeLabel.File |
| 507 | assert props["language"] == "puppet" |
| 508 | finally: |
| 509 | fpath.unlink() |