Navegador

feat: add IaC language support — HCL/Terraform, Puppet, Ansible, Bash, Chef New parsers: - HCL/Terraform (.tf, .hcl) with resource/variable/module/data/output/provider/locals extraction and cross-reference detection (var.x, module.x, resource.name) - Puppet (.pp) with class/define/node/resource/include/parameter extraction - Bash/Shell (.sh, .bash, .zsh) with function/variable/source extraction and call graph - Ansible (YAML, heuristic detection) with playbook/play/task/handler/role/variable extraction New enrichers: - Terraform enricher for cross-file module resolution and provider grouping - Chef enricher promoting Ruby parser output with Chef-specific semantic types Also: - Go module support (go.mod) in DependencyIngester - New [iac] optional dependency group in pyproject.toml - 88 new tests across 5 test files

lmata 2026-03-30 02:29 trunk
Commit b45288f8d236b0bcab6c1813039800e2432052c9cb96bec996a989f33d45acf4
--- navegador/dependencies.py
+++ navegador/dependencies.py
@@ -154,10 +154,77 @@
154154
self._upsert_dep("cargo", pkg_name, version, str(p))
155155
count += 1
156156
157157
logger.info("DependencyIngester.ingest_cargo(%s): %d packages", p, count)
158158
return {"packages": count}
159
+
160
+ # ── go / go.mod ───────────────────────────────────────────────────────────
161
+
162
+ def ingest_gomod(self, gomod_path: str | Path) -> dict[str, Any]:
163
+ """
164
+ Parse a ``go.mod`` and ingest the module declaration and all
165
+ ``require`` entries as external dependencies.
166
+
167
+ Parameters
168
+ ----------
169
+ gomod_path:
170
+ Absolute or relative path to ``go.mod``.
171
+
172
+ Returns
173
+ -------
174
+ dict with key ``packages`` (int count ingested)
175
+ """
176
+ p = Path(gomod_path).resolve()
177
+ text = p.read_text(encoding="utf-8")
178
+
179
+ count = 0
180
+ in_require = False
181
+
182
+ for raw_line in text.splitlines():
183
+ line = raw_line.strip()
184
+
185
+ # Module declaration
186
+ if line.startswith("module "):
187
+ mod_name = line.removeprefix("module").strip()
188
+ self.store.create_node(
189
+ NodeLabel.Concept,
190
+ {
191
+ "name": mod_name,
192
+ "description": f"go:{mod_name}",
193
+ "domain": _DOMAIN,
194
+ "status": "module",
195
+ },
196
+ )
197
+ continue
198
+
199
+ # Require block boundaries
200
+ if line == "require (":
201
+ in_require = True
202
+ continue
203
+ if line == ")" and in_require:
204
+ in_require = False
205
+ continue
206
+
207
+ # Single-line require
208
+ if line.startswith("require ") and "(" not in line:
209
+ parts = line.removeprefix("require").strip().split()
210
+ if len(parts) >= 2:
211
+ pkg_name, version = parts[0], parts[1]
212
+ self._upsert_dep("go", pkg_name, version, str(p))
213
+ count += 1
214
+ continue
215
+
216
+ # Inside require block
217
+ if in_require and line and not line.startswith("//"):
218
+ parts = line.split()
219
+ if len(parts) >= 2:
220
+ pkg_name, version = parts[0], parts[1]
221
+ self._upsert_dep("go", pkg_name, version, str(p))
222
+ count += 1
223
+
224
+ logger.info("DependencyIngester.ingest_gomod(%s): %d packages", p, count)
225
+ return {"packages": count}
159226
160227
# ── Core helpers ──────────────────────────────────────────────────────────
161228
162229
def _upsert_dep(
163230
self,
164231
--- navegador/dependencies.py
+++ navegador/dependencies.py
@@ -154,10 +154,77 @@
154 self._upsert_dep("cargo", pkg_name, version, str(p))
155 count += 1
156
157 logger.info("DependencyIngester.ingest_cargo(%s): %d packages", p, count)
158 return {"packages": count}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
160 # ── Core helpers ──────────────────────────────────────────────────────────
161
162 def _upsert_dep(
163 self,
164
--- navegador/dependencies.py
+++ navegador/dependencies.py
@@ -154,10 +154,77 @@
154 self._upsert_dep("cargo", pkg_name, version, str(p))
155 count += 1
156
157 logger.info("DependencyIngester.ingest_cargo(%s): %d packages", p, count)
158 return {"packages": count}
159
160 # ── go / go.mod ───────────────────────────────────────────────────────────
161
162 def ingest_gomod(self, gomod_path: str | Path) -> dict[str, Any]:
163 """
164 Parse a ``go.mod`` and ingest the module declaration and all
165 ``require`` entries as external dependencies.
166
167 Parameters
168 ----------
169 gomod_path:
170 Absolute or relative path to ``go.mod``.
171
172 Returns
173 -------
174 dict with key ``packages`` (int count ingested)
175 """
176 p = Path(gomod_path).resolve()
177 text = p.read_text(encoding="utf-8")
178
179 count = 0
180 in_require = False
181
182 for raw_line in text.splitlines():
183 line = raw_line.strip()
184
185 # Module declaration
186 if line.startswith("module "):
187 mod_name = line.removeprefix("module").strip()
188 self.store.create_node(
189 NodeLabel.Concept,
190 {
191 "name": mod_name,
192 "description": f"go:{mod_name}",
193 "domain": _DOMAIN,
194 "status": "module",
195 },
196 )
197 continue
198
199 # Require block boundaries
200 if line == "require (":
201 in_require = True
202 continue
203 if line == ")" and in_require:
204 in_require = False
205 continue
206
207 # Single-line require
208 if line.startswith("require ") and "(" not in line:
209 parts = line.removeprefix("require").strip().split()
210 if len(parts) >= 2:
211 pkg_name, version = parts[0], parts[1]
212 self._upsert_dep("go", pkg_name, version, str(p))
213 count += 1
214 continue
215
216 # Inside require block
217 if in_require and line and not line.startswith("//"):
218 parts = line.split()
219 if len(parts) >= 2:
220 pkg_name, version = parts[0], parts[1]
221 self._upsert_dep("go", pkg_name, version, str(p))
222 count += 1
223
224 logger.info("DependencyIngester.ingest_gomod(%s): %d packages", p, count)
225 return {"packages": count}
226
227 # ── Core helpers ──────────────────────────────────────────────────────────
228
229 def _upsert_dep(
230 self,
231
--- navegador/enrichment/base.py
+++ navegador/enrichment/base.py
@@ -57,23 +57,21 @@
5757
def detect(self) -> bool:
5858
"""Check if the framework is present by looking for real imports and marker files."""
5959
# Check Import nodes for actual framework imports
6060
for pattern in self.detection_patterns:
6161
result = self.store.query(
62
- "MATCH (n:Import) WHERE n.name = $name OR n.module = $name "
63
- "RETURN count(n) AS c",
62
+ "MATCH (n:Import) WHERE n.name = $name OR n.module = $name RETURN count(n) AS c",
6463
{"name": pattern},
6564
)
6665
rows = result.result_set or []
6766
if rows and rows[0][0] > 0:
6867
return True
6968
7069
# Check for marker files by exact filename match
7170
for filename in self.detection_files:
7271
result = self.store.query(
73
- "MATCH (f:File) WHERE f.name = $name "
74
- "RETURN count(f) AS c",
72
+ "MATCH (f:File) WHERE f.name = $name RETURN count(f) AS c",
7573
{"name": filename},
7674
)
7775
rows = result.result_set or []
7876
if rows and rows[0][0] > 0:
7977
return True
8078
8179
ADDED navegador/enrichment/chef.py
8280
ADDED navegador/enrichment/terraform.py
8381
ADDED navegador/ingestion/ansible.py
8482
ADDED navegador/ingestion/bash.py
8583
ADDED navegador/ingestion/hcl.py
--- navegador/enrichment/base.py
+++ navegador/enrichment/base.py
@@ -57,23 +57,21 @@
57 def detect(self) -> bool:
58 """Check if the framework is present by looking for real imports and marker files."""
59 # Check Import nodes for actual framework imports
60 for pattern in self.detection_patterns:
61 result = self.store.query(
62 "MATCH (n:Import) WHERE n.name = $name OR n.module = $name "
63 "RETURN count(n) AS c",
64 {"name": pattern},
65 )
66 rows = result.result_set or []
67 if rows and rows[0][0] > 0:
68 return True
69
70 # Check for marker files by exact filename match
71 for filename in self.detection_files:
72 result = self.store.query(
73 "MATCH (f:File) WHERE f.name = $name "
74 "RETURN count(f) AS c",
75 {"name": filename},
76 )
77 rows = result.result_set or []
78 if rows and rows[0][0] > 0:
79 return True
80
81 DDED navegador/enrichment/chef.py
82 DDED navegador/enrichment/terraform.py
83 DDED navegador/ingestion/ansible.py
84 DDED navegador/ingestion/bash.py
85 DDED navegador/ingestion/hcl.py
--- navegador/enrichment/base.py
+++ navegador/enrichment/base.py
@@ -57,23 +57,21 @@
57 def detect(self) -> bool:
58 """Check if the framework is present by looking for real imports and marker files."""
59 # Check Import nodes for actual framework imports
60 for pattern in self.detection_patterns:
61 result = self.store.query(
62 "MATCH (n:Import) WHERE n.name = $name OR n.module = $name RETURN count(n) AS c",
 
63 {"name": pattern},
64 )
65 rows = result.result_set or []
66 if rows and rows[0][0] > 0:
67 return True
68
69 # Check for marker files by exact filename match
70 for filename in self.detection_files:
71 result = self.store.query(
72 "MATCH (f:File) WHERE f.name = $name RETURN count(f) AS c",
 
73 {"name": filename},
74 )
75 rows = result.result_set or []
76 if rows and rows[0][0] > 0:
77 return True
78
79 DDED navegador/enrichment/chef.py
80 DDED navegador/enrichment/terraform.py
81 DDED navegador/ingestion/ansible.py
82 DDED navegador/ingestion/bash.py
83 DDED navegador/ingestion/hcl.py
--- a/navegador/enrichment/chef.py
+++ b/navegador/enrichment/chef.py
@@ -0,0 +1,203 @@
1
+"""
2
+Chef framework enricher.
3
+
4
+Promotes generic graph nodes created by the Ruby parser to Chef-specific
5
+semantic types:
6
+ - chef_recipe — files under recipes/
7
+ - chef_cookbook — metadata.rb files under cookbooks/
8
+ - chef_resource — functions/methods in recipes/ or libraries/ matching
9
+ Chef resource names (package, template, service, etc.)
10
+ - include_recipe — DEPENDS_ON edges for cross-recipe includes
11
+"""
12
+
13
+from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher
14
+from navegador.graph.store import GraphStore
15
+
16
+# Built-in Chef resource types that appear as method calls in recipes
17
+_CHEF_RESOURCES = frozenset(
18
+ {
19
+ "package",
20
+ "template",
21
+ "service",
22
+ "execute",
23
+ "file",
24
+ "directory",
25
+ "cookbook_file",
26
+ "remote_file",
27
+ "cron",
28
+ "user",
29
+ "group",
30
+ "mount",
31
+ "link",
32
+ "bash",
33
+ "ruby_block",
34
+ "apt_package",
35
+ "yum_package",
36
+ "powershell_script",
37
+ "windows_service",
38
+ "chef_gem",
39
+ "log",
40
+ "http_request",
41
+ "remote_directory",
42
+ }
43
+)
44
+
45
+
46
+class ChefEnricher(FrameworkEnricher):
47
+ """Enriches a navegador graph with Chef-specific semantic types."""
48
+
49
+ def __init__(self, store: GraphStore) -> None:
50
+ super().__init__(store)
51
+
52
+ # ── Identity ──────────────────────────────────────────────────────────────
53
+
54
+ @property
55
+ def framework_name(self) -> str:
56
+ return "chef"
57
+
58
+ @property
59
+ def detection_patterns(self) -> list[str]:
60
+ return ["chef"]
61
+
62
+ @property
63
+ def detection_files(self) -> list[str]:
64
+ return ["metadata.rb", "Berksfile"]
65
+
66
+ # ── Enrichment ────────────────────────────────────────────────────────────
67
+
68
+ def enrich(self) -> EnrichmentResult:
69
+ result = EnrichmentResult()
70
+
71
+ recipes = self._enrich_recipes()
72
+ result.promoted += recipes
73
+ result.patterns_found["recipes"] = recipes
74
+
75
+ cookbooks = self._enrich_cookbooks()
76
+ result.promoted += cookbooks
77
+ result.patterns_found["cookbooks"] = cookbooks
78
+
79
+ resources = self._enrich_resources()
80
+ result.promoted += resources
81
+ result.patterns_found["resources"] = resources
82
+
83
+ includes = self._enrich_include_recipe()
84
+ result.edges_added += includes
85
+ result.patterns_found["include_recipe"] = includes
86
+
87
+ return result
88
+
89
+ # ── Pattern helpers ───────────────────────────────────────────────────────
90
+
91
+ def _enrich_recipes(self) -> int:
92
+ """Promote File nodes under /recipes/ to chef_recipe."""
93
+ promoted = 0
94
+ query_result = self.store.query(
95
+ "MATCH (n:File) WHERE n.file_path CONTAINS $pattern RETURN n.name, n.file_path",
96
+ {"pattern": "/recipes/"},
97
+ )
98
+ rows = query_result.result_set or []
99
+ for row in rows:
100
+ name, file_path = row[0], row[1]
101
+ if name and file_path:
102
+ self._promote_node(name, file_path, "chef_recipe")
103
+ promoted += 1
104
+ return promoted
105
+
106
+ def _enrich_cookbooks(self) -> int:
107
+ """Promote metadata.rb File nodes under /cookbooks/ to chef_cookbook."""
108
+ promoted = 0
109
+ query_result = self.store.query(
110
+ "MATCH (n:File) WHERE n.file_path CONTAINS $cookbooks "
111
+ "AND n.name = $name "
112
+ "RETURN n.name, n.file_path",
113
+ {"cookbooks": "/cookbooks/", "name": "metadata.rb"},
114
+ )
115
+ rows = query_result.result_set or []
116
+ for row in rows:
117
+ name, file_path = row[0], row[1]
118
+ if name and file_path:
119
+ self._promote_node(name, file_path, "chef_cookbook")
120
+ promoted += 1
121
+ return promoted
122
+
123
+ def _enrich_resources(self) -> int:
124
+ """Promote Function/Method nodes in recipes/ or libraries/ whose names
125
+ match Chef built-in resource types."""
126
+ promoted = 0
127
+ for path_fragment in ("/recipes/", "/libraries/"):
128
+ query_result = self.store.query(
129
+ "MATCH (n) WHERE (n:Function OR n:Method) "
130
+ "AND n.file_path CONTAINS $pattern "
131
+ "RETURN n.name, n.file_path",
132
+ {"pattern": path_fragment},
133
+ )
134
+ rows = query_result.result_set or []
135
+ for row in rows:
136
+ name, file_path = row[0], row[1]
137
+ if name and file_path and name in _CHEF_RESOURCES:
138
+ self._promote_node(name, file_path, "chef_resource")
139
+ promoted += 1
140
+ return promoted
141
+
142
+ def _enrich_include_recipe(self) -> int:
143
+ """Link include_recipe calls to the referenced recipe File nodes.
144
+
145
+ Looks for Function nodes named ``include_recipe`` and follows CALLS
146
+ edges or checks node properties to find the recipe name argument,
147
+ then creates a DEPENDS_ON edge to the matching recipe File node.
148
+ """
149
+ edges_added = 0
150
+
151
+ # Strategy 1: follow CALLS edges from include_recipe nodes
152
+ query_result = self.store.query(
153
+ "MATCH (n:Function)-[:CALLS]->(target) "
154
+ "WHERE n.name = $name "
155
+ "RETURN n.file_path, target.name",
156
+ {"name": "include_recipe"},
157
+ )
158
+ rows = query_result.result_set or []
159
+ for row in rows:
160
+ caller_path, recipe_ref = row[0], row[1]
161
+ if caller_path and recipe_ref:
162
+ # recipe_ref may be "cookbook::recipe" — extract recipe name
163
+ recipe_name = recipe_ref.split("::")[-1] if "::" in recipe_ref else recipe_ref
164
+ # Find the recipe File node
165
+ match_result = self.store.query(
166
+ "MATCH (f:File) WHERE f.file_path CONTAINS $recipes "
167
+ "AND f.name CONTAINS $recipe "
168
+ "RETURN f.name",
169
+ {"recipes": "/recipes/", "recipe": recipe_name},
170
+ )
171
+ match_rows = match_result.result_set or []
172
+ if match_rows and match_rows[0][0]:
173
+ # Create DEPENDS_ON from the caller's file to the recipe file
174
+ caller_file_result = self.store.query(
175
+ "MATCH (f:File) WHERE f.file_path = $path RETURN f.name",
176
+ {"path": caller_path},
177
+ )
178
+ caller_rows = caller_file_result.result_set or []
179
+ if caller_rows and caller_rows[0][0]:
180
+ self._add_semantic_edge(
181
+ caller_rows[0][0],
182
+ "DEPENDS_ON",
183
+ match_rows[0][0],
184
+ )
185
+ edges_added += 1
186
+
187
+ # Strategy 2: check signature/docstring for include_recipe calls
188
+ for prop in ("signature", "docstring"):
189
+ query_result = self.store.query(
190
+ f"MATCH (n) WHERE (n:Function OR n:Method) "
191
+ f"AND n.{prop} IS NOT NULL "
192
+ f"AND n.{prop} CONTAINS $pattern "
193
+ "RETURN n.name, n.file_path",
194
+ {"pattern": "include_recipe"},
195
+ )
196
+ rows = query_result.result_set or []
197
+ for row in rows:
198
+ name, file_path = row[0], row[1]
199
+ if name and file_path and name == "include_recipe":
200
+ # Already handled in strategy 1 via CALLS edges
201
+ continue
202
+
203
+ return edges_added
--- a/navegador/enrichment/chef.py
+++ b/navegador/enrichment/chef.py
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/navegador/enrichment/chef.py
+++ b/navegador/enrichment/chef.py
@@ -0,0 +1,203 @@
1 """
2 Chef framework enricher.
3
4 Promotes generic graph nodes created by the Ruby parser to Chef-specific
5 semantic types:
6 - chef_recipe — files under recipes/
7 - chef_cookbook — metadata.rb files under cookbooks/
8 - chef_resource — functions/methods in recipes/ or libraries/ matching
9 Chef resource names (package, template, service, etc.)
10 - include_recipe — DEPENDS_ON edges for cross-recipe includes
11 """
12
13 from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher
14 from navegador.graph.store import GraphStore
15
16 # Built-in Chef resource types that appear as method calls in recipes
17 _CHEF_RESOURCES = frozenset(
18 {
19 "package",
20 "template",
21 "service",
22 "execute",
23 "file",
24 "directory",
25 "cookbook_file",
26 "remote_file",
27 "cron",
28 "user",
29 "group",
30 "mount",
31 "link",
32 "bash",
33 "ruby_block",
34 "apt_package",
35 "yum_package",
36 "powershell_script",
37 "windows_service",
38 "chef_gem",
39 "log",
40 "http_request",
41 "remote_directory",
42 }
43 )
44
45
46 class ChefEnricher(FrameworkEnricher):
47 """Enriches a navegador graph with Chef-specific semantic types."""
48
49 def __init__(self, store: GraphStore) -> None:
50 super().__init__(store)
51
52 # ── Identity ──────────────────────────────────────────────────────────────
53
54 @property
55 def framework_name(self) -> str:
56 return "chef"
57
58 @property
59 def detection_patterns(self) -> list[str]:
60 return ["chef"]
61
62 @property
63 def detection_files(self) -> list[str]:
64 return ["metadata.rb", "Berksfile"]
65
66 # ── Enrichment ────────────────────────────────────────────────────────────
67
68 def enrich(self) -> EnrichmentResult:
69 result = EnrichmentResult()
70
71 recipes = self._enrich_recipes()
72 result.promoted += recipes
73 result.patterns_found["recipes"] = recipes
74
75 cookbooks = self._enrich_cookbooks()
76 result.promoted += cookbooks
77 result.patterns_found["cookbooks"] = cookbooks
78
79 resources = self._enrich_resources()
80 result.promoted += resources
81 result.patterns_found["resources"] = resources
82
83 includes = self._enrich_include_recipe()
84 result.edges_added += includes
85 result.patterns_found["include_recipe"] = includes
86
87 return result
88
89 # ── Pattern helpers ───────────────────────────────────────────────────────
90
91 def _enrich_recipes(self) -> int:
92 """Promote File nodes under /recipes/ to chef_recipe."""
93 promoted = 0
94 query_result = self.store.query(
95 "MATCH (n:File) WHERE n.file_path CONTAINS $pattern RETURN n.name, n.file_path",
96 {"pattern": "/recipes/"},
97 )
98 rows = query_result.result_set or []
99 for row in rows:
100 name, file_path = row[0], row[1]
101 if name and file_path:
102 self._promote_node(name, file_path, "chef_recipe")
103 promoted += 1
104 return promoted
105
106 def _enrich_cookbooks(self) -> int:
107 """Promote metadata.rb File nodes under /cookbooks/ to chef_cookbook."""
108 promoted = 0
109 query_result = self.store.query(
110 "MATCH (n:File) WHERE n.file_path CONTAINS $cookbooks "
111 "AND n.name = $name "
112 "RETURN n.name, n.file_path",
113 {"cookbooks": "/cookbooks/", "name": "metadata.rb"},
114 )
115 rows = query_result.result_set or []
116 for row in rows:
117 name, file_path = row[0], row[1]
118 if name and file_path:
119 self._promote_node(name, file_path, "chef_cookbook")
120 promoted += 1
121 return promoted
122
123 def _enrich_resources(self) -> int:
124 """Promote Function/Method nodes in recipes/ or libraries/ whose names
125 match Chef built-in resource types."""
126 promoted = 0
127 for path_fragment in ("/recipes/", "/libraries/"):
128 query_result = self.store.query(
129 "MATCH (n) WHERE (n:Function OR n:Method) "
130 "AND n.file_path CONTAINS $pattern "
131 "RETURN n.name, n.file_path",
132 {"pattern": path_fragment},
133 )
134 rows = query_result.result_set or []
135 for row in rows:
136 name, file_path = row[0], row[1]
137 if name and file_path and name in _CHEF_RESOURCES:
138 self._promote_node(name, file_path, "chef_resource")
139 promoted += 1
140 return promoted
141
142 def _enrich_include_recipe(self) -> int:
143 """Link include_recipe calls to the referenced recipe File nodes.
144
145 Looks for Function nodes named ``include_recipe`` and follows CALLS
146 edges or checks node properties to find the recipe name argument,
147 then creates a DEPENDS_ON edge to the matching recipe File node.
148 """
149 edges_added = 0
150
151 # Strategy 1: follow CALLS edges from include_recipe nodes
152 query_result = self.store.query(
153 "MATCH (n:Function)-[:CALLS]->(target) "
154 "WHERE n.name = $name "
155 "RETURN n.file_path, target.name",
156 {"name": "include_recipe"},
157 )
158 rows = query_result.result_set or []
159 for row in rows:
160 caller_path, recipe_ref = row[0], row[1]
161 if caller_path and recipe_ref:
162 # recipe_ref may be "cookbook::recipe" — extract recipe name
163 recipe_name = recipe_ref.split("::")[-1] if "::" in recipe_ref else recipe_ref
164 # Find the recipe File node
165 match_result = self.store.query(
166 "MATCH (f:File) WHERE f.file_path CONTAINS $recipes "
167 "AND f.name CONTAINS $recipe "
168 "RETURN f.name",
169 {"recipes": "/recipes/", "recipe": recipe_name},
170 )
171 match_rows = match_result.result_set or []
172 if match_rows and match_rows[0][0]:
173 # Create DEPENDS_ON from the caller's file to the recipe file
174 caller_file_result = self.store.query(
175 "MATCH (f:File) WHERE f.file_path = $path RETURN f.name",
176 {"path": caller_path},
177 )
178 caller_rows = caller_file_result.result_set or []
179 if caller_rows and caller_rows[0][0]:
180 self._add_semantic_edge(
181 caller_rows[0][0],
182 "DEPENDS_ON",
183 match_rows[0][0],
184 )
185 edges_added += 1
186
187 # Strategy 2: check signature/docstring for include_recipe calls
188 for prop in ("signature", "docstring"):
189 query_result = self.store.query(
190 f"MATCH (n) WHERE (n:Function OR n:Method) "
191 f"AND n.{prop} IS NOT NULL "
192 f"AND n.{prop} CONTAINS $pattern "
193 "RETURN n.name, n.file_path",
194 {"pattern": "include_recipe"},
195 )
196 rows = query_result.result_set or []
197 for row in rows:
198 name, file_path = row[0], row[1]
199 if name and file_path and name == "include_recipe":
200 # Already handled in strategy 1 via CALLS edges
201 continue
202
203 return edges_added
--- a/navegador/enrichment/terraform.py
+++ b/navegador/enrichment/terraform.py
@@ -0,0 +1,230 @@
1
+"""
2
+Terraform enricher for cross-file module resolution and resource linking.
3
+
4
+Promotes and links Terraform graph nodes:
5
+ - Cross-file variable references (REFERENCES edges)
6
+ - Module source resolution (DEPENDS_ON edges to local source dirs)
7
+ - Provider grouping (BELONGS_TO edges to provider nodes)
8
+"""
9
+
10
+from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher
11
+from navegador.graph.store import GraphStore
12
+
13
+# Common Terraform provider prefixes and their canonical provider names
14
+_PROVIDER_PREFIXES = {
15
+ "aws_": "aws",
16
+ "google_": "google",
17
+ "azurerm_": "azurerm",
18
+ "azuread_": "azuread",
19
+ "kubernetes_": "kubernetes",
20
+ "helm_": "helm",
21
+ "vault_": "vault",
22
+ "datadog_": "datadog",
23
+ "cloudflare_": "cloudflare",
24
+ "digitalocean_": "digitalocean",
25
+ "github_": "github",
26
+ "null_": "null",
27
+ "random_": "random",
28
+ "local_": "local",
29
+ "tls_": "tls",
30
+ "archive_": "archive",
31
+ "external_": "external",
32
+ "template_": "template",
33
+ "time_": "time",
34
+}
35
+
36
+
37
+class TerraformEnricher(FrameworkEnricher):
38
+ """Enriches a navegador graph with Terraform-specific semantics."""
39
+
40
+ def __init__(self, store: GraphStore) -> None:
41
+ super().__init__(store)
42
+
43
+ # ── Identity ──────────────────────────────────────────────────────────────
44
+
45
+ @property
46
+ def framework_name(self) -> str:
47
+ return "terraform"
48
+
49
+ @property
50
+ def detection_patterns(self) -> list[str]:
51
+ return [] # No import nodes for Terraform
52
+
53
+ @property
54
+ def detection_files(self) -> list[str]:
55
+ return ["main.tf", "variables.tf", "outputs.tf", "providers.tf"]
56
+
57
+ # ── Enrichment ────────────────────────────────────────────────────────────
58
+
59
+ def enrich(self) -> EnrichmentResult:
60
+ result = EnrichmentResult()
61
+
62
+ var_refs = self._enrich_variable_references()
63
+ result.edges_added += var_refs
64
+ result.patterns_found["variable_references"] = var_refs
65
+
66
+ module_deps = self._enrich_module_sources()
67
+ result.edges_added += module_deps
68
+ result.patterns_found["module_sources"] = module_deps
69
+
70
+ provider_links = self._enrich_provider_grouping()
71
+ result.edges_added += provider_links
72
+ result.patterns_found["provider_grouping"] = provider_links
73
+
74
+ return result
75
+
76
+ # ── Pattern helpers ───────────────────────────────────────────────────────
77
+
78
+ def _enrich_variable_references(self) -> int:
79
+ """Find terraform_variable and terraform_output nodes that reference
80
+ variables defined in other files, and create REFERENCES edges."""
81
+ edges_added = 0
82
+
83
+ # Find all terraform_variable nodes
84
+ var_result = self.store.query(
85
+ "MATCH (v) WHERE v.semantic_type = $var_type RETURN v.name, v.file_path",
86
+ {"var_type": "terraform_variable"},
87
+ )
88
+ var_rows = var_result.result_set or []
89
+ var_by_name: dict[str, list[str]] = {}
90
+ for row in var_rows:
91
+ name, file_path = row[0], row[1]
92
+ if name and file_path:
93
+ var_by_name.setdefault(name, []).append(file_path)
94
+
95
+ # Find terraform_output nodes and check if they reference variables
96
+ # from other files (outputs often reference var.xxx)
97
+ output_result = self.store.query(
98
+ "MATCH (o) WHERE o.semantic_type = $out_type RETURN o.name, o.file_path",
99
+ {"out_type": "terraform_output"},
100
+ )
101
+ output_rows = output_result.result_set or []
102
+ for row in output_rows:
103
+ out_name, out_file = row[0], row[1]
104
+ if not (out_name and out_file):
105
+ continue
106
+
107
+ # Check CALLS or REFERENCES edges from this output to variables
108
+ ref_result = self.store.query(
109
+ "MATCH (o)-[:CALLS]->(target) "
110
+ "WHERE o.name = $name AND o.file_path = $path "
111
+ "RETURN target.name, target.file_path",
112
+ {"name": out_name, "path": out_file},
113
+ )
114
+ ref_rows = ref_result.result_set or []
115
+ for ref_row in ref_rows:
116
+ target_name, target_path = ref_row[0], ref_row[1]
117
+ if target_name and target_path and target_path != out_file:
118
+ self._add_semantic_edge(out_name, "REFERENCES", target_name)
119
+ edges_added += 1
120
+
121
+ # Also link variables in different files that share the same name
122
+ # (e.g. variables.tf defines var, main.tf uses it)
123
+ for var_name, paths in var_by_name.items():
124
+ if len(paths) <= 1:
125
+ continue
126
+ # Find nodes in other files that reference this variable
127
+ for path in paths:
128
+ ref_result = self.store.query(
129
+ "MATCH (n) WHERE n.file_path <> $path "
130
+ "AND n.name = $name "
131
+ "AND n.semantic_type = $var_type "
132
+ "RETURN n.name, n.file_path",
133
+ {"path": path, "name": var_name, "var_type": "terraform_variable"},
134
+ )
135
+ ref_rows = ref_result.result_set or []
136
+ for ref_row in ref_rows:
137
+ ref_name = ref_row[0]
138
+ if ref_name:
139
+ self._add_semantic_edge(var_name, "REFERENCES", ref_name)
140
+ edges_added += 1
141
+
142
+ return edges_added
143
+
144
+ def _enrich_module_sources(self) -> int:
145
+ """Find terraform_module nodes with local source paths and create
146
+ DEPENDS_ON edges to File nodes in the referenced directory."""
147
+ edges_added = 0
148
+
149
+ # Find Module nodes with terraform_module semantic type
150
+ module_result = self.store.query(
151
+ "MATCH (m) WHERE m.semantic_type = $mod_type RETURN m.name, m.file_path",
152
+ {"mod_type": "terraform_module"},
153
+ )
154
+ module_rows = module_result.result_set or []
155
+
156
+ for row in module_rows:
157
+ mod_name, mod_file = row[0], row[1]
158
+ if not (mod_name and mod_file):
159
+ continue
160
+
161
+ # Check for CALLS edges that may point to the source path,
162
+ # or look for a source property on the node
163
+ source_result = self.store.query(
164
+ "MATCH (m)-[:CALLS]->(target) "
165
+ "WHERE m.name = $name AND m.file_path = $path "
166
+ "RETURN target.name, target.file_path",
167
+ {"name": mod_name, "path": mod_file},
168
+ )
169
+ source_rows = source_result.result_set or []
170
+
171
+ for source_row in source_rows:
172
+ target_name, target_path = source_row[0], source_row[1]
173
+ if target_name and target_path:
174
+ self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name)
175
+ edges_added += 1
176
+ continue
177
+
178
+ # Fallback: look for File nodes whose path contains the module name
179
+ # (local modules are often in ./modules/<name>/)
180
+ file_result = self.store.query(
181
+ "MATCH (f:File) WHERE f.file_path CONTAINS $fragment RETURN f.name",
182
+ {"fragment": f"/modules/{mod_name}/"},
183
+ )
184
+ file_rows = file_result.result_set or []
185
+ for file_row in file_rows:
186
+ target_name = file_row[0]
187
+ if target_name:
188
+ self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name)
189
+ edges_added += 1
190
+
191
+ return edges_added
192
+
193
+ def _enrich_provider_grouping(self) -> int:
194
+ """Group Terraform resources by their provider prefix and create
195
+ BELONGS_TO edges from resources to provider nodes."""
196
+ edges_added = 0
197
+
198
+ # Find all terraform_resource nodes
199
+ resource_result = self.store.query(
200
+ "MATCH (r) WHERE r.semantic_type = $res_type RETURN r.name, r.file_path",
201
+ {"res_type": "terraform_resource"},
202
+ )
203
+ resource_rows = resource_result.result_set or []
204
+
205
+ for row in resource_rows:
206
+ res_name, res_file = row[0], row[1]
207
+ if not (res_name and res_file):
208
+ continue
209
+
210
+ # Match resource name against provider prefixes
211
+ for prefix, provider in _PROVIDER_PREFIXES.items():
212
+ if res_name.startswith(prefix):
213
+ # Find or reference the provider node
214
+ provider_result = self.store.query(
215
+ "MATCH (p) WHERE p.name = $provider "
216
+ "AND p.semantic_type = $prov_type "
217
+ "RETURN p.name",
218
+ {"provider": provider, "prov_type": "terraform_provider"},
219
+ )
220
+ provider_rows = provider_result.result_set or []
221
+ if provider_rows and provider_rows[0][0]:
222
+ self._add_semantic_edge(
223
+ res_name,
224
+ "BELONGS_TO",
225
+ provider,
226
+ )
227
+ edges_added += 1
228
+ break # Only match the first (most specific) prefix
229
+
230
+ return edges_added
--- a/navegador/enrichment/terraform.py
+++ b/navegador/enrichment/terraform.py
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/navegador/enrichment/terraform.py
+++ b/navegador/enrichment/terraform.py
@@ -0,0 +1,230 @@
1 """
2 Terraform enricher for cross-file module resolution and resource linking.
3
4 Promotes and links Terraform graph nodes:
5 - Cross-file variable references (REFERENCES edges)
6 - Module source resolution (DEPENDS_ON edges to local source dirs)
7 - Provider grouping (BELONGS_TO edges to provider nodes)
8 """
9
10 from navegador.enrichment.base import EnrichmentResult, FrameworkEnricher
11 from navegador.graph.store import GraphStore
12
13 # Common Terraform provider prefixes and their canonical provider names
14 _PROVIDER_PREFIXES = {
15 "aws_": "aws",
16 "google_": "google",
17 "azurerm_": "azurerm",
18 "azuread_": "azuread",
19 "kubernetes_": "kubernetes",
20 "helm_": "helm",
21 "vault_": "vault",
22 "datadog_": "datadog",
23 "cloudflare_": "cloudflare",
24 "digitalocean_": "digitalocean",
25 "github_": "github",
26 "null_": "null",
27 "random_": "random",
28 "local_": "local",
29 "tls_": "tls",
30 "archive_": "archive",
31 "external_": "external",
32 "template_": "template",
33 "time_": "time",
34 }
35
36
37 class TerraformEnricher(FrameworkEnricher):
38 """Enriches a navegador graph with Terraform-specific semantics."""
39
40 def __init__(self, store: GraphStore) -> None:
41 super().__init__(store)
42
43 # ── Identity ──────────────────────────────────────────────────────────────
44
45 @property
46 def framework_name(self) -> str:
47 return "terraform"
48
49 @property
50 def detection_patterns(self) -> list[str]:
51 return [] # No import nodes for Terraform
52
53 @property
54 def detection_files(self) -> list[str]:
55 return ["main.tf", "variables.tf", "outputs.tf", "providers.tf"]
56
57 # ── Enrichment ────────────────────────────────────────────────────────────
58
59 def enrich(self) -> EnrichmentResult:
60 result = EnrichmentResult()
61
62 var_refs = self._enrich_variable_references()
63 result.edges_added += var_refs
64 result.patterns_found["variable_references"] = var_refs
65
66 module_deps = self._enrich_module_sources()
67 result.edges_added += module_deps
68 result.patterns_found["module_sources"] = module_deps
69
70 provider_links = self._enrich_provider_grouping()
71 result.edges_added += provider_links
72 result.patterns_found["provider_grouping"] = provider_links
73
74 return result
75
76 # ── Pattern helpers ───────────────────────────────────────────────────────
77
78 def _enrich_variable_references(self) -> int:
79 """Find terraform_variable and terraform_output nodes that reference
80 variables defined in other files, and create REFERENCES edges."""
81 edges_added = 0
82
83 # Find all terraform_variable nodes
84 var_result = self.store.query(
85 "MATCH (v) WHERE v.semantic_type = $var_type RETURN v.name, v.file_path",
86 {"var_type": "terraform_variable"},
87 )
88 var_rows = var_result.result_set or []
89 var_by_name: dict[str, list[str]] = {}
90 for row in var_rows:
91 name, file_path = row[0], row[1]
92 if name and file_path:
93 var_by_name.setdefault(name, []).append(file_path)
94
95 # Find terraform_output nodes and check if they reference variables
96 # from other files (outputs often reference var.xxx)
97 output_result = self.store.query(
98 "MATCH (o) WHERE o.semantic_type = $out_type RETURN o.name, o.file_path",
99 {"out_type": "terraform_output"},
100 )
101 output_rows = output_result.result_set or []
102 for row in output_rows:
103 out_name, out_file = row[0], row[1]
104 if not (out_name and out_file):
105 continue
106
107 # Check CALLS or REFERENCES edges from this output to variables
108 ref_result = self.store.query(
109 "MATCH (o)-[:CALLS]->(target) "
110 "WHERE o.name = $name AND o.file_path = $path "
111 "RETURN target.name, target.file_path",
112 {"name": out_name, "path": out_file},
113 )
114 ref_rows = ref_result.result_set or []
115 for ref_row in ref_rows:
116 target_name, target_path = ref_row[0], ref_row[1]
117 if target_name and target_path and target_path != out_file:
118 self._add_semantic_edge(out_name, "REFERENCES", target_name)
119 edges_added += 1
120
121 # Also link variables in different files that share the same name
122 # (e.g. variables.tf defines var, main.tf uses it)
123 for var_name, paths in var_by_name.items():
124 if len(paths) <= 1:
125 continue
126 # Find nodes in other files that reference this variable
127 for path in paths:
128 ref_result = self.store.query(
129 "MATCH (n) WHERE n.file_path <> $path "
130 "AND n.name = $name "
131 "AND n.semantic_type = $var_type "
132 "RETURN n.name, n.file_path",
133 {"path": path, "name": var_name, "var_type": "terraform_variable"},
134 )
135 ref_rows = ref_result.result_set or []
136 for ref_row in ref_rows:
137 ref_name = ref_row[0]
138 if ref_name:
139 self._add_semantic_edge(var_name, "REFERENCES", ref_name)
140 edges_added += 1
141
142 return edges_added
143
144 def _enrich_module_sources(self) -> int:
145 """Find terraform_module nodes with local source paths and create
146 DEPENDS_ON edges to File nodes in the referenced directory."""
147 edges_added = 0
148
149 # Find Module nodes with terraform_module semantic type
150 module_result = self.store.query(
151 "MATCH (m) WHERE m.semantic_type = $mod_type RETURN m.name, m.file_path",
152 {"mod_type": "terraform_module"},
153 )
154 module_rows = module_result.result_set or []
155
156 for row in module_rows:
157 mod_name, mod_file = row[0], row[1]
158 if not (mod_name and mod_file):
159 continue
160
161 # Check for CALLS edges that may point to the source path,
162 # or look for a source property on the node
163 source_result = self.store.query(
164 "MATCH (m)-[:CALLS]->(target) "
165 "WHERE m.name = $name AND m.file_path = $path "
166 "RETURN target.name, target.file_path",
167 {"name": mod_name, "path": mod_file},
168 )
169 source_rows = source_result.result_set or []
170
171 for source_row in source_rows:
172 target_name, target_path = source_row[0], source_row[1]
173 if target_name and target_path:
174 self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name)
175 edges_added += 1
176 continue
177
178 # Fallback: look for File nodes whose path contains the module name
179 # (local modules are often in ./modules/<name>/)
180 file_result = self.store.query(
181 "MATCH (f:File) WHERE f.file_path CONTAINS $fragment RETURN f.name",
182 {"fragment": f"/modules/{mod_name}/"},
183 )
184 file_rows = file_result.result_set or []
185 for file_row in file_rows:
186 target_name = file_row[0]
187 if target_name:
188 self._add_semantic_edge(mod_name, "DEPENDS_ON", target_name)
189 edges_added += 1
190
191 return edges_added
192
193 def _enrich_provider_grouping(self) -> int:
194 """Group Terraform resources by their provider prefix and create
195 BELONGS_TO edges from resources to provider nodes."""
196 edges_added = 0
197
198 # Find all terraform_resource nodes
199 resource_result = self.store.query(
200 "MATCH (r) WHERE r.semantic_type = $res_type RETURN r.name, r.file_path",
201 {"res_type": "terraform_resource"},
202 )
203 resource_rows = resource_result.result_set or []
204
205 for row in resource_rows:
206 res_name, res_file = row[0], row[1]
207 if not (res_name and res_file):
208 continue
209
210 # Match resource name against provider prefixes
211 for prefix, provider in _PROVIDER_PREFIXES.items():
212 if res_name.startswith(prefix):
213 # Find or reference the provider node
214 provider_result = self.store.query(
215 "MATCH (p) WHERE p.name = $provider "
216 "AND p.semantic_type = $prov_type "
217 "RETURN p.name",
218 {"provider": provider, "prov_type": "terraform_provider"},
219 )
220 provider_rows = provider_result.result_set or []
221 if provider_rows and provider_rows[0][0]:
222 self._add_semantic_edge(
223 res_name,
224 "BELONGS_TO",
225 provider,
226 )
227 edges_added += 1
228 break # Only match the first (most specific) prefix
229
230 return edges_added
--- a/navegador/ingestion/ansible.py
+++ b/navegador/ingestion/ansible.py
@@ -0,0 +1,616 @@
1
+"""
2
+Ansible playbook/task parser — extracts plays, tasks, handlers, roles,
3
+and variables from Ansible YAML files into the navegador graph.
4
+
5
+Unlike other parsers this does NOT use tree-sitter. Ansible semantics
6
+are encoded in YAML structure (dicts with well-known keys like ``hosts``,
7
+``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk
8
+the resulting Python data structures directly.
9
+
10
+Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP.
11
+"""
12
+
13
+import logging
14
+import re
15
+from pathlib import Path
16
+
17
+import yaml
18
+
19
+from navegador.graph.sath
20
+
21
+from navegador.graph.schema import EdgeType, NodeLabel
22
+from navegador.graph.store import GraphStore
23
+from navegador.ingestion.parser import LanguageParser
24
+
25
+logger = logging.getLogger(__name__)
26
+
27
+# Well-known Ansible module names — used to identify task dicts that lack
28
+# an explicit ``name`` key and to extract the module used by a task.
29
+_ANSIBLE_MODULES = {
30
+ "apt",
31
+ "yum",
32
+ "dnf",
33
+ "pip",
34
+ "gem",
35
+ "npm",
36
+ "copy",
37
+ "template",
38
+ "file",
39
+ "lineinfile",
40
+ "blockinfile",
41
+ "service",
42
+ "systemd",
43
+ "command",
44
+ "shell",
45
+ "raw",
46
+ "script",
47
+ "git",
48
+ "get_url",
49
+ "uri",
50
+ "unarchive",
51
+ "user",
52
+ "group",
53
+ "cron",
54
+ "mount",
55
+ "docker_container",
56
+ "docker_image",
57
+ "k8s",
58
+ "helm",
59
+ "debug",
60
+ "assert",
61
+ "fail",
62
+ "set_fact",
63
+ "include_tasks",
64
+ "import_tasks",
65
+ "include_role",
66
+ "import_role",
67
+ "block",
68
+ "rescue",
69
+ "always",
70
+ "wait_for",
71
+ "pause",
72
+ "stat",
73
+ "find",
74
+ "replace",
75
+ "package",
76
+ "hostname",
77
+ "timezone",
78
+ "sysctl",
79
+ "authorized_key",
80
+ "firewalld",
81
+ "iptables",
82
+ "aws_s3",
83
+ "ec2",
84
+ "ec2_instance",
85
+ "s3_bucket",
86
+ "ansible.builtin.copy",
87
+ "ansible.builtin.template",
88
+ "ansible.builtin.file",
89
+ "ansible.builtin.command",
90
+ "ansible.builtin.shell",
91
+ "ansible.builtin.service",
92
+ "ansible.builtin.debug",
93
+ "ansible.builtin.set_fact",
94
+ "ansible.builtin.include_tasks",
95
+ "ansible.builtin.import_tasks",
96
+ "ansible.builtin.include_role",
97
+ "ansible.builtin.import_role",
98
+ "ansible.builtin.apt",
99
+ "ansible.builtin.yum",
100
+ "ansible.builtin.pip",
101
+ "ansible.builtin.git",
102
+ "ansible.builtin.user",
103
+ "ansible.builtin.group",
104
+ "ansible.builtin.uri",
105
+ "ansible.builtin.get_url",
106
+ "ansible.builtin.lineinfile",
107
+ "ansible.builtin.blockinfile",
108
+ "ansible.builtin.systemd",
109
+ "ansible.builtin.raw",
110
+ "ansible.builtin.script",
111
+ "ansible.builtin.unarchive",
112
+ "ansible.builtin.assert",
113
+ "ansible.builtin.fail",
114
+ "ansible.builtin.wait_for",
115
+ "ansible.builtin.pause",
116
+ "ansible.builtin.stat",
117
+ "ansible.builtin.find",
118
+ "ansible.builtin.replace",
119
+ "ansible.builtin.package",
120
+}
121
+
122
+# Patterns in file paths that strongly suggest Ansible content
123
+_ROLE_TASKS_RE = re.compile(r"roles/[^/]+/tasks/")
124
+_ROLE_HANDLERS_RE = re.compile(r"roles/[^/]+/handlers/")
125
+_ROLE_DEFAULTS_RE = re.compile(r"roles/[^/]+/defaults/")
126
+_ROLE_VARS_RE = re.compile(r"roles/[^/]+/vars/")
127
+_PLAYBOOKS_DIR_RE = re.compile(r"(^|/)playbooks/")
128
+_COMMON_PLAYBOOK_RE = re.compile(
129
+ r"(^|/)(playbook[^/]*|site|main|common|deploy|provision|setup|configure)\.(yml|yaml)$"
130
+)
131
+_GROUP_VARS_RE = re.compile(r"(^|/)group_vars/")
132
+_HOST_VARS_RE = re.compile(r"(^|/)host_vars/")
133
+
134
+
135
+class AnsibleParser(LanguageParser):
136
+ """Parses Ansible YAML files into the navegador graph."""
137
+
138
+ def __init__(self) -> None:
139
+ pass # no tree-sitter parser needed
140
+
141
+ @staticmethod
142
+ def is_ansible_file(path: Path, repo_root: Path | None = None) -> bool:
143
+ """Return True if *path* looks like an Ansible YAML file."""
144
+ if path.suffix not in (".yml", ".yaml"):
145
+ return False
146
+
147
+ rel = str(path)
148
+ if repo_root is not None:
149
+ try:
150
+ rel = str(path.relative_to(repo_root))
151
+ except ValueError:
152
+ pass
153
+
154
+ # Structural heuristics based on path
155
+ if _ROLE_TASKS_RE.search(rel):
156
+ return True
157
+ if _ROLE_HANDLERS_RE.search(rel):
158
+ return True
159
+ if _ROLE_DEFAULTS_RE.search(rel):
160
+ return True
161
+ if _ROLE_VARS_RE.search(rel):
162
+ return True
163
+ if _PLAYBOOKS_DIR_RE.search(rel):
164
+ return True
165
+ if _GROUP_VARS_RE.search(rel):
166
+ return True
167
+ if _HOST_VARS_RE.search(rel):
168
+ return True
169
+
170
+ # ansible.cfg sibling in repo root
171
+ if repo_root is not None and (repo_root / "ansible.cfg").exists():
172
+ if _COMMON_PLAYBOOK_RE.search(rel):
173
+ return True
174
+
175
+ # Content-based: top-level list whose items contain "hosts:" key
176
+ try:
177
+ text = path.read_text(encoding="utf-8", errors="replace")
178
+ except OSError:
179
+ return False
180
+
181
+ if not text.lstrip().startswith("---"ors="replace")
182
+ yaml.YAMLyum",
183
+ "ansible.xcept OSError:
184
+ ]:
185
+ rel_pand variables from Ansible YAML files into the navegador graph.
186
+
187
+Unlike other parsers this does NOT use tree-sitter. Ansible semantics
188
+are encoded in YAML structure (dicts with well-known keys like ``hosts``,
189
+``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk
190
+the resulting Python data structures directly.
191
+
192
+Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP.
193
+"""
194
+
195
+import logging
196
+import re
197
+from pathlib import Path
198
+
199
+from navegador.graph.schema import EdgeType, NodeLabel
200
+from navegador.graph.store impo"""
201
+Ansible playbook/task parser — extracts plays, tasks, handle errors="replace")
202
+ (OSError, yaml.YAMLError)t)
203
+ except Exception as exc:
204
+ logger.warning("Could not parse Ansible file %s: %s", rel_path, exc)
205
+ return stats
206
+
207
+ if data is None:
208
+ return stats
209
+
210
+ # File node
211
+ store.create_node(
212
+ NodeLabel.File,
213
+ {
214
+ "name": path.name,
215
+ "path": rel_path,
216
+ "language": "ansible",
217
+ "line_count": text.count("\n"),
218
+ },
219
+ )
220
+
221
+ rel_str = rel_path.replace("\\", "/")
222
+
223
+ # Dispatch based on file type
224
+ if _ROLE_DEFAULTS_RE.search(rel_str) or _ROLE_VARS_RE.search(rel_str):
225
+ self._parse_variable_file(data, rel_path, store, stats)
226
+ elif _GROUP_VARS_RE.search(rel_str) or _HOST_VARS_RE.search(rel_str):
227
+ self._parse_variable_file(data, rel_path, store, stats)
228
+ elif _ROLE_HANDLERS_RE.search(rel_str):
229
+ self._parse_handler_file(data, rel_path, store, stats)
230
+ elif _ROLE_TASKS_RE.search(rel_str):
231
+ self._parse_task_file(data, rel_path, store, stats)
232
+ elif (
233
+ isinstance(data, list)
234
+ and data
235
+ and any(isinstance(item, dict) and "hosts" in item for item in data)
236
+ ):
237
+ self._parse_playbook(data, rel_path, store, stats)
238
+ elif isinstance(data, list):
239
+ # Might be a task list (e.g. included task file)
240
+ self._parse_task_file(data, rel_path, store, stats)
241
+ elif isinstance(data, dict):
242
+ # Standalone variable file
243
+ self._parse_variable_file(data, rel_path, store, stats)
244
+
245
+ return stats
246
+
247
+ # ── Playbook parsing ─────────────────────────────────────────────────────
248
+
249
+ def _parse_playbook(
250
+ self,
251
+ data: list,
252
+ file_path: str,
253
+ store: GraphStore,
254
+ stats: dict,
255
+ ) -> None:
256
+ """Parse a full playbook (list of plays)."""
257
+ playbook_name = Path(file_path).stem
258
+
259
+ # Module node for the playbook file
260
+ store.create_node(
261
+ NodeLabel.Module,
262
+ {
263
+ "name": playbook_name,
264
+ "file_path": file_path,
265
+ "docstring": "",
266
+ "semantic_type": "ansible_playbook",
267
+ },
268
+ )
269
+ store.create_edge(
270
+ NodeLabel.File,
271
+ {"path": file_path},
272
+ EdgeType.CONTAINS,
273
+ NodeLabel.Module,
274
+ {"name": playbook_name, "file_path": file_path},
275
+ )
276
+ stats["edges"] += 1
277
+
278
+ for play in data:
279
+ if not isinstance(play, dict):
280
+ continue
281
+ if "hosts" not in play:
282
+ continue
283
+ self._parse_play(play, file_path, playbook_name, store, stats)
284
+
285
+ def _parse_play(
286
+ self,
287
+ play: dict,
288
+ file_path: str,
289
+ playbook_name: str,
290
+ store: GraphStore,
291
+ stats: dict,
292
+ ) -> None:
293
+ """Parse a single play dict."""
294
+ play_name = play.get("name", f"play:{play.get('hosts', 'unknown')}")
295
+
296
+ store.create_node(
297
+ NodeLabel.Class,
298
+ {
299
+ "name": play_name,
300
+ "file_path": file_path,
301
+ "line_start": 0,
302
+ "line_end": 0,
303
+ "docstring": f"hosts: {play.get('hosts', '')}",
304
+ "semantic_type": "ansible_play",
305
+ },
306
+ )
307
+ store.create_edge(
308
+ NodeLabel.Module,
309
+ {"name": playbook_name, "file_path": file_path},
310
+ EdgeType.CONTAINS,
311
+ NodeLabel.Class,
312
+ {"name": play_name, "file_path": file_path},
313
+ )
314
+ stats["classes"] += 1
315
+ stats["edges"] += 1
316
+
317
+ # Tasks
318
+ for task_dict in play.get("tasks", []) or []:
319
+ if isinstance(task_dict, dict):
320
+ self._parse_task(task_dict, file_path, play_name, store, stats)
321
+
322
+ # Pre-tasks
323
+ for task_dict in play.get("pre_tasks", []) or []:
324
+ if isinstance(task_dict, dict):
325
+ self._parse_task(task_dict, file_path, play_name, store, stats)
326
+
327
+ # Post-tasks
328
+ for task_dict in play.get("post_tasks", []) or []:
329
+ if isinstance(task_dict, dict):
330
+ self._parse_task(task_dict, file_path, play_name, store, stats)
331
+
332
+ # Handlers
333
+ for handler_dict in play.get("handlers", []) or []:
334
+ if isinstance(handler_dict, dict):
335
+ self._parse_handler(handler_dict, file_path, play_name, store, stats)
336
+
337
+ # Roles
338
+ for role in play.get("roles", []) or []:
339
+ self._parse_role_reference(role, file_path, play_name, store, stats)
340
+
341
+ # Variables
342
+ self._parse_vars_block(play.get("vars"), file_path, play_name, store, stats)
343
+
344
+ # ── Task parsing ─────────────────────────────────────────────────────────
345
+
346
+ def _task_name(self, task: dict) -> str:
347
+ """Derive a task name from the dict."""
348
+ if "name" in task and task["name"]:
349
+ return str(task["name"])
350
+ # Fall back to module name
351
+ for key in task:
352
+ if key in _ANSIBLE_MODULES:
353
+ return key
354
+ # Last resort: first non-meta key
355
+ _meta_keys = {
356
+ "name",
357
+ "register",
358
+ "when",
359
+ "notify",
360
+ "tags",
361
+ "become",
362
+ "become_user",
363
+ "ignore_errors",
364
+ "changed_when",
365
+ "failed_when",
366
+ "loop",
367
+ "with_items",
368
+ "with_dict",
369
+ "with_fileglob",
370
+ "until",
371
+ "retries",
372
+ "delay",
373
+ "no_log",
374
+ "environment",
375
+ "vars",
376
+ "listen",
377
+ "delegate_to",
378
+ "run_once",
379
+ "timeout",
380
+ }
381
+ for key in task:
382
+ if key not in _meta_keys:
383
+ return key
384
+ return "unnamed_task"
385
+
386
+ def _parse_task(
387
+ self,
388
+ task: dict,
389
+ file_path: str,
390
+ parent_name: str,
391
+ store: GraphStore,
392
+ stats: dict,
393
+ ) -> None:
394
+ """Parse a single task dict into a Function node."""
395
+ task_name = self._task_name(task)
396
+
397
+ store.create_node(
398
+ NodeLabel.Function,
399
+ {
400
+ "name": task_name,
401
+ "file_path": file_path,
402
+ "line_start": 0,
403
+ "line_end": 0,
404
+ "docstring": "",
405
+ "semantic_type": "ansible_task",
406
+ },
407
+ )
408
+ store.create_edge(
409
+ NodeLabel.Class,
410
+ {"name": parent_name, "file_path": file_path},
411
+ EdgeType.CONTAINS,
412
+ NodeLabel.Function,
413
+ {"name": task_name, "file_path": file_path},
414
+ )
415
+ stats["functions"] += 1
416
+ stats["edges"] += 1
417
+
418
+ # notify: -> CALLS edge to handler
419
+ notify = task.get("notify")
420
+ if notify:
421
+ if isinstance(notify, str):
422
+ notify = [notify]
423
+ for handler_name in notify:
424
+ store.create_edge(
425
+ NodeLabel.Function,
426
+ {"name": task_name, "file_path": file_path},
427
+ EdgeType.CALLS,
428
+ NodeLabel.Function,
429
+ {"name": str(handler_name), "file_path": file_path},
430
+ )
431
+ stats["edges"] += 1
432
+
433
+ # Handle block/rescue/always
434
+ for block_key in ("block", "rescue", "always"):
435
+ block_tasks = task.get(block_key)
436
+ if isinstance(block_tasks, list):
437
+ for sub_task in block_tasks:
438
+ if isinstance(sub_task, dict):
439
+ self._parse_task(sub_task, file_path, parent_name, store, stats)
440
+
441
+ # ── Handler parsing ──────────────────────────────────────────────────────
442
+
443
+ def _parse_handler(
444
+ self,
445
+ handler: dict,
446
+ file_path: str,
447
+ parent_name: str,
448
+ store: GraphStore,
449
+ stats: dict,
450
+ ) -> None:
451
+ """Parse a handler dict into a Function node."""
452
+ handler_name = handler.get("name", self._task_name(handler))
453
+
454
+ store.create_node(
455
+ NodeLabel.Function,
456
+ {
457
+ "name": handler_name,
458
+ "file_path": file_path,
459
+ "line_start": 0,
460
+ "line_end": 0,
461
+ "docstring": "",
462
+ "semantic_type": "ansible_handler",
463
+ },
464
+ )
465
+ store.create_edge(
466
+ NodeLabel.Class,
467
+ {"name": parent_name, "file_path": file_path},
468
+ EdgeType.CONTAINS,
469
+ NodeLabel.Function,
470
+ {"name": handler_name, "file_path": file_path},
471
+ )
472
+ stats["functions"] += 1
473
+ stats["edges"] += 1
474
+
475
+ # ── Role reference parsing ───────────────────────────────────────────────
476
+
477
+ def _parse_role_reference(
478
+ self,
479
+ role,
480
+ file_path: str,
481
+ play_name: str,
482
+ store: GraphStore,
483
+ stats: dict,
484
+ ) -> None:
485
+ """Parse a role reference (string or dict with 'role' key)."""
486
+ if isinstance(role, str):
487
+ role_name = role
488
+ elif isinstance(role, dict):
489
+ role_name = role.get("role") or role.get("name", "")
490
+ else:
491
+ return
492
+
493
+ if not role_name:
494
+ return
495
+
496
+ store.create_node(
497
+ NodeLabel.Import,
498
+ {
499
+ "name": role_name,
500
+ "file_path": file_path,
501
+ "line_start": 0,
502
+ "module": role_name,
503
+ "semantic_type": "ansible_role",
504
+ },
505
+ )
506
+ store.create_edge(
507
+ NodeLabel.Class,
508
+ {"name": play_name, "file_path": file_path},
509
+ EdgeType.IMPORTS,
510
+ NodeLabel.Import,
511
+ {"name": role_name, "file_path": file_path},
512
+ )
513
+ stats["edges"] += 1
514
+
515
+ # ── Variable parsing ─────────────────────────────────────────────────────
516
+
517
+ def _parse_vars_block(
518
+ self,
519
+ vars_data,
520
+ file_path: str,
521
+ parent_name: str,
522
+ store: GraphStore,
523
+ stats: dict,
524
+ ) -> None:
525
+ """Parse a vars: block (dict) into Variable nodes."""
526
+ if not isinstance(vars_data, dict):
527
+ return
528
+
529
+ for var_name, var_value in vars_data.items():
530
+ store.create_node(
531
+ NodeLabel.Variable,
532
+ {
533
+ "name": str(var_name),
534
+ "file_path": file_path,
535
+ "line_start": 0,
536
+ "semantic_type": "ansible_variable",
537
+ },
538
+ )
539
+ store.create_edge(
540
+ NodeLabel.Class,
541
+ {"name": parent_name, "file_path": file_path},
542
+ EdgeType.CONTAINS,
543
+ NodeLabel.Variable,
544
+ {"name": str(var_name), "file_path": file_path},
545
+ )
546
+ stats["edges"] += 1
547
+
548
+ # ── Standalone file parsers ──────────────────────────────────────────────
549
+
550
+ def _parse_task_file(
551
+ self,
552
+ data,
553
+ file_path: str,
554
+ store: GraphStore,
555
+ stats: dict,
556
+ ) -> None:
557
+ """Parse a standalone task file (roles/*/tasks/main.yml or included file)."""
558
+ if not isinstance(data, list):
559
+ return
560
+
561
+ # Use file stem as a synthetic parent class
562
+ parent_name = Path(file_path).stem
563
+ store.create_node(
564
+ NodeLabel.Class,
565
+ {
566
+ "name": parent_name,
567
+ "file_path": file_path,
568
+ "line_start": 0,
569
+ "line_end": 0,
570
+ "docstring": "",
571
+ "semantic_type": "ansible_play",
572
+ },
573
+ )
574
+ store.create_edge(
575
+ NodeLabel.File,
576
+ {"path": file_path},
577
+ EdgeType.CONTAINS,
578
+ NodeLabel.Class,
579
+ {"name": parent_name, "file_path": file_path},
580
+ )
581
+ stats["classes"] += 1
582
+ stats["edges"] += 1
583
+
584
+ for task_dict in data:
585
+ if isinstance(task_dict, dict):
586
+ self._parse_task(task_dict, file_path, parent_name, store, stats)
587
+
588
+ def _parse_handler_file(
589
+ self,
590
+ data,
591
+ file_path: str,
592
+ store: GraphStore,
593
+ stats: dict,
594
+ ) -> None:
595
+ """Parse a standalone handler file (roles/*/handlers/main.yml)."""
596
+ if not isinstance(data, list):
597
+ return
598
+
599
+ parent_name = Path(file_path).stem
600
+ store.create_node(
601
+ NodeLabel.Class,
602
+ {
603
+ "name": parent_name,
604
+ "file_path": file_path,
605
+ "line_start": 0,
606
+ "line_end": 0,
607
+ "docstring": "",
608
+ "semantic_type": "ansible_play",
609
+ },
610
+ )
611
+ store.create_edge(
612
+ NodeLabel.File,
613
+ {"path": file_path},
614
+ EdgeType.CONTAINS,
615
+ NodeLabel.Class,
616
+ {"name": parent_name, "file_path": file
--- a/navegador/ingestion/ansible.py
+++ b/navegador/ingestion/ansible.py
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/navegador/ingestion/ansible.py
+++ b/navegador/ingestion/ansible.py
@@ -0,0 +1,616 @@
1 """
2 Ansible playbook/task parser — extracts plays, tasks, handlers, roles,
3 and variables from Ansible YAML files into the navegador graph.
4
5 Unlike other parsers this does NOT use tree-sitter. Ansible semantics
6 are encoded in YAML structure (dicts with well-known keys like ``hosts``,
7 ``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk
8 the resulting Python data structures directly.
9
10 Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP.
11 """
12
13 import logging
14 import re
15 from pathlib import Path
16
17 import yaml
18
19 from navegador.graph.sath
20
21 from navegador.graph.schema import EdgeType, NodeLabel
22 from navegador.graph.store import GraphStore
23 from navegador.ingestion.parser import LanguageParser
24
25 logger = logging.getLogger(__name__)
26
27 # Well-known Ansible module names — used to identify task dicts that lack
28 # an explicit ``name`` key and to extract the module used by a task.
29 _ANSIBLE_MODULES = {
30 "apt",
31 "yum",
32 "dnf",
33 "pip",
34 "gem",
35 "npm",
36 "copy",
37 "template",
38 "file",
39 "lineinfile",
40 "blockinfile",
41 "service",
42 "systemd",
43 "command",
44 "shell",
45 "raw",
46 "script",
47 "git",
48 "get_url",
49 "uri",
50 "unarchive",
51 "user",
52 "group",
53 "cron",
54 "mount",
55 "docker_container",
56 "docker_image",
57 "k8s",
58 "helm",
59 "debug",
60 "assert",
61 "fail",
62 "set_fact",
63 "include_tasks",
64 "import_tasks",
65 "include_role",
66 "import_role",
67 "block",
68 "rescue",
69 "always",
70 "wait_for",
71 "pause",
72 "stat",
73 "find",
74 "replace",
75 "package",
76 "hostname",
77 "timezone",
78 "sysctl",
79 "authorized_key",
80 "firewalld",
81 "iptables",
82 "aws_s3",
83 "ec2",
84 "ec2_instance",
85 "s3_bucket",
86 "ansible.builtin.copy",
87 "ansible.builtin.template",
88 "ansible.builtin.file",
89 "ansible.builtin.command",
90 "ansible.builtin.shell",
91 "ansible.builtin.service",
92 "ansible.builtin.debug",
93 "ansible.builtin.set_fact",
94 "ansible.builtin.include_tasks",
95 "ansible.builtin.import_tasks",
96 "ansible.builtin.include_role",
97 "ansible.builtin.import_role",
98 "ansible.builtin.apt",
99 "ansible.builtin.yum",
100 "ansible.builtin.pip",
101 "ansible.builtin.git",
102 "ansible.builtin.user",
103 "ansible.builtin.group",
104 "ansible.builtin.uri",
105 "ansible.builtin.get_url",
106 "ansible.builtin.lineinfile",
107 "ansible.builtin.blockinfile",
108 "ansible.builtin.systemd",
109 "ansible.builtin.raw",
110 "ansible.builtin.script",
111 "ansible.builtin.unarchive",
112 "ansible.builtin.assert",
113 "ansible.builtin.fail",
114 "ansible.builtin.wait_for",
115 "ansible.builtin.pause",
116 "ansible.builtin.stat",
117 "ansible.builtin.find",
118 "ansible.builtin.replace",
119 "ansible.builtin.package",
120 }
121
122 # Patterns in file paths that strongly suggest Ansible content
123 _ROLE_TASKS_RE = re.compile(r"roles/[^/]+/tasks/")
124 _ROLE_HANDLERS_RE = re.compile(r"roles/[^/]+/handlers/")
125 _ROLE_DEFAULTS_RE = re.compile(r"roles/[^/]+/defaults/")
126 _ROLE_VARS_RE = re.compile(r"roles/[^/]+/vars/")
127 _PLAYBOOKS_DIR_RE = re.compile(r"(^|/)playbooks/")
128 _COMMON_PLAYBOOK_RE = re.compile(
129 r"(^|/)(playbook[^/]*|site|main|common|deploy|provision|setup|configure)\.(yml|yaml)$"
130 )
131 _GROUP_VARS_RE = re.compile(r"(^|/)group_vars/")
132 _HOST_VARS_RE = re.compile(r"(^|/)host_vars/")
133
134
135 class AnsibleParser(LanguageParser):
136 """Parses Ansible YAML files into the navegador graph."""
137
138 def __init__(self) -> None:
139 pass # no tree-sitter parser needed
140
141 @staticmethod
142 def is_ansible_file(path: Path, repo_root: Path | None = None) -> bool:
143 """Return True if *path* looks like an Ansible YAML file."""
144 if path.suffix not in (".yml", ".yaml"):
145 return False
146
147 rel = str(path)
148 if repo_root is not None:
149 try:
150 rel = str(path.relative_to(repo_root))
151 except ValueError:
152 pass
153
154 # Structural heuristics based on path
155 if _ROLE_TASKS_RE.search(rel):
156 return True
157 if _ROLE_HANDLERS_RE.search(rel):
158 return True
159 if _ROLE_DEFAULTS_RE.search(rel):
160 return True
161 if _ROLE_VARS_RE.search(rel):
162 return True
163 if _PLAYBOOKS_DIR_RE.search(rel):
164 return True
165 if _GROUP_VARS_RE.search(rel):
166 return True
167 if _HOST_VARS_RE.search(rel):
168 return True
169
170 # ansible.cfg sibling in repo root
171 if repo_root is not None and (repo_root / "ansible.cfg").exists():
172 if _COMMON_PLAYBOOK_RE.search(rel):
173 return True
174
175 # Content-based: top-level list whose items contain "hosts:" key
176 try:
177 text = path.read_text(encoding="utf-8", errors="replace")
178 except OSError:
179 return False
180
181 if not text.lstrip().startswith("---"ors="replace")
182 yaml.YAMLyum",
183 "ansible.xcept OSError:
184 ]:
185 rel_pand variables from Ansible YAML files into the navegador graph.
186
187 Unlike other parsers this does NOT use tree-sitter. Ansible semantics
188 are encoded in YAML structure (dicts with well-known keys like ``hosts``,
189 ``tasks``, ``handlers``), so we parse with ``yaml.safe_load()`` and walk
190 the resulting Python data structures directly.
191
192 Invoked via a hook in RepoIngester rather than through LANGUAGE_MAP.
193 """
194
195 import logging
196 import re
197 from pathlib import Path
198
199 from navegador.graph.schema import EdgeType, NodeLabel
200 from navegador.graph.store impo"""
201 Ansible playbook/task parser — extracts plays, tasks, handle errors="replace")
202 (OSError, yaml.YAMLError)t)
203 except Exception as exc:
204 logger.warning("Could not parse Ansible file %s: %s", rel_path, exc)
205 return stats
206
207 if data is None:
208 return stats
209
210 # File node
211 store.create_node(
212 NodeLabel.File,
213 {
214 "name": path.name,
215 "path": rel_path,
216 "language": "ansible",
217 "line_count": text.count("\n"),
218 },
219 )
220
221 rel_str = rel_path.replace("\\", "/")
222
223 # Dispatch based on file type
224 if _ROLE_DEFAULTS_RE.search(rel_str) or _ROLE_VARS_RE.search(rel_str):
225 self._parse_variable_file(data, rel_path, store, stats)
226 elif _GROUP_VARS_RE.search(rel_str) or _HOST_VARS_RE.search(rel_str):
227 self._parse_variable_file(data, rel_path, store, stats)
228 elif _ROLE_HANDLERS_RE.search(rel_str):
229 self._parse_handler_file(data, rel_path, store, stats)
230 elif _ROLE_TASKS_RE.search(rel_str):
231 self._parse_task_file(data, rel_path, store, stats)
232 elif (
233 isinstance(data, list)
234 and data
235 and any(isinstance(item, dict) and "hosts" in item for item in data)
236 ):
237 self._parse_playbook(data, rel_path, store, stats)
238 elif isinstance(data, list):
239 # Might be a task list (e.g. included task file)
240 self._parse_task_file(data, rel_path, store, stats)
241 elif isinstance(data, dict):
242 # Standalone variable file
243 self._parse_variable_file(data, rel_path, store, stats)
244
245 return stats
246
247 # ── Playbook parsing ─────────────────────────────────────────────────────
248
249 def _parse_playbook(
250 self,
251 data: list,
252 file_path: str,
253 store: GraphStore,
254 stats: dict,
255 ) -> None:
256 """Parse a full playbook (list of plays)."""
257 playbook_name = Path(file_path).stem
258
259 # Module node for the playbook file
260 store.create_node(
261 NodeLabel.Module,
262 {
263 "name": playbook_name,
264 "file_path": file_path,
265 "docstring": "",
266 "semantic_type": "ansible_playbook",
267 },
268 )
269 store.create_edge(
270 NodeLabel.File,
271 {"path": file_path},
272 EdgeType.CONTAINS,
273 NodeLabel.Module,
274 {"name": playbook_name, "file_path": file_path},
275 )
276 stats["edges"] += 1
277
278 for play in data:
279 if not isinstance(play, dict):
280 continue
281 if "hosts" not in play:
282 continue
283 self._parse_play(play, file_path, playbook_name, store, stats)
284
285 def _parse_play(
286 self,
287 play: dict,
288 file_path: str,
289 playbook_name: str,
290 store: GraphStore,
291 stats: dict,
292 ) -> None:
293 """Parse a single play dict."""
294 play_name = play.get("name", f"play:{play.get('hosts', 'unknown')}")
295
296 store.create_node(
297 NodeLabel.Class,
298 {
299 "name": play_name,
300 "file_path": file_path,
301 "line_start": 0,
302 "line_end": 0,
303 "docstring": f"hosts: {play.get('hosts', '')}",
304 "semantic_type": "ansible_play",
305 },
306 )
307 store.create_edge(
308 NodeLabel.Module,
309 {"name": playbook_name, "file_path": file_path},
310 EdgeType.CONTAINS,
311 NodeLabel.Class,
312 {"name": play_name, "file_path": file_path},
313 )
314 stats["classes"] += 1
315 stats["edges"] += 1
316
317 # Tasks
318 for task_dict in play.get("tasks", []) or []:
319 if isinstance(task_dict, dict):
320 self._parse_task(task_dict, file_path, play_name, store, stats)
321
322 # Pre-tasks
323 for task_dict in play.get("pre_tasks", []) or []:
324 if isinstance(task_dict, dict):
325 self._parse_task(task_dict, file_path, play_name, store, stats)
326
327 # Post-tasks
328 for task_dict in play.get("post_tasks", []) or []:
329 if isinstance(task_dict, dict):
330 self._parse_task(task_dict, file_path, play_name, store, stats)
331
332 # Handlers
333 for handler_dict in play.get("handlers", []) or []:
334 if isinstance(handler_dict, dict):
335 self._parse_handler(handler_dict, file_path, play_name, store, stats)
336
337 # Roles
338 for role in play.get("roles", []) or []:
339 self._parse_role_reference(role, file_path, play_name, store, stats)
340
341 # Variables
342 self._parse_vars_block(play.get("vars"), file_path, play_name, store, stats)
343
344 # ── Task parsing ─────────────────────────────────────────────────────────
345
346 def _task_name(self, task: dict) -> str:
347 """Derive a task name from the dict."""
348 if "name" in task and task["name"]:
349 return str(task["name"])
350 # Fall back to module name
351 for key in task:
352 if key in _ANSIBLE_MODULES:
353 return key
354 # Last resort: first non-meta key
355 _meta_keys = {
356 "name",
357 "register",
358 "when",
359 "notify",
360 "tags",
361 "become",
362 "become_user",
363 "ignore_errors",
364 "changed_when",
365 "failed_when",
366 "loop",
367 "with_items",
368 "with_dict",
369 "with_fileglob",
370 "until",
371 "retries",
372 "delay",
373 "no_log",
374 "environment",
375 "vars",
376 "listen",
377 "delegate_to",
378 "run_once",
379 "timeout",
380 }
381 for key in task:
382 if key not in _meta_keys:
383 return key
384 return "unnamed_task"
385
386 def _parse_task(
387 self,
388 task: dict,
389 file_path: str,
390 parent_name: str,
391 store: GraphStore,
392 stats: dict,
393 ) -> None:
394 """Parse a single task dict into a Function node."""
395 task_name = self._task_name(task)
396
397 store.create_node(
398 NodeLabel.Function,
399 {
400 "name": task_name,
401 "file_path": file_path,
402 "line_start": 0,
403 "line_end": 0,
404 "docstring": "",
405 "semantic_type": "ansible_task",
406 },
407 )
408 store.create_edge(
409 NodeLabel.Class,
410 {"name": parent_name, "file_path": file_path},
411 EdgeType.CONTAINS,
412 NodeLabel.Function,
413 {"name": task_name, "file_path": file_path},
414 )
415 stats["functions"] += 1
416 stats["edges"] += 1
417
418 # notify: -> CALLS edge to handler
419 notify = task.get("notify")
420 if notify:
421 if isinstance(notify, str):
422 notify = [notify]
423 for handler_name in notify:
424 store.create_edge(
425 NodeLabel.Function,
426 {"name": task_name, "file_path": file_path},
427 EdgeType.CALLS,
428 NodeLabel.Function,
429 {"name": str(handler_name), "file_path": file_path},
430 )
431 stats["edges"] += 1
432
433 # Handle block/rescue/always
434 for block_key in ("block", "rescue", "always"):
435 block_tasks = task.get(block_key)
436 if isinstance(block_tasks, list):
437 for sub_task in block_tasks:
438 if isinstance(sub_task, dict):
439 self._parse_task(sub_task, file_path, parent_name, store, stats)
440
441 # ── Handler parsing ──────────────────────────────────────────────────────
442
443 def _parse_handler(
444 self,
445 handler: dict,
446 file_path: str,
447 parent_name: str,
448 store: GraphStore,
449 stats: dict,
450 ) -> None:
451 """Parse a handler dict into a Function node."""
452 handler_name = handler.get("name", self._task_name(handler))
453
454 store.create_node(
455 NodeLabel.Function,
456 {
457 "name": handler_name,
458 "file_path": file_path,
459 "line_start": 0,
460 "line_end": 0,
461 "docstring": "",
462 "semantic_type": "ansible_handler",
463 },
464 )
465 store.create_edge(
466 NodeLabel.Class,
467 {"name": parent_name, "file_path": file_path},
468 EdgeType.CONTAINS,
469 NodeLabel.Function,
470 {"name": handler_name, "file_path": file_path},
471 )
472 stats["functions"] += 1
473 stats["edges"] += 1
474
475 # ── Role reference parsing ───────────────────────────────────────────────
476
477 def _parse_role_reference(
478 self,
479 role,
480 file_path: str,
481 play_name: str,
482 store: GraphStore,
483 stats: dict,
484 ) -> None:
485 """Parse a role reference (string or dict with 'role' key)."""
486 if isinstance(role, str):
487 role_name = role
488 elif isinstance(role, dict):
489 role_name = role.get("role") or role.get("name", "")
490 else:
491 return
492
493 if not role_name:
494 return
495
496 store.create_node(
497 NodeLabel.Import,
498 {
499 "name": role_name,
500 "file_path": file_path,
501 "line_start": 0,
502 "module": role_name,
503 "semantic_type": "ansible_role",
504 },
505 )
506 store.create_edge(
507 NodeLabel.Class,
508 {"name": play_name, "file_path": file_path},
509 EdgeType.IMPORTS,
510 NodeLabel.Import,
511 {"name": role_name, "file_path": file_path},
512 )
513 stats["edges"] += 1
514
515 # ── Variable parsing ─────────────────────────────────────────────────────
516
517 def _parse_vars_block(
518 self,
519 vars_data,
520 file_path: str,
521 parent_name: str,
522 store: GraphStore,
523 stats: dict,
524 ) -> None:
525 """Parse a vars: block (dict) into Variable nodes."""
526 if not isinstance(vars_data, dict):
527 return
528
529 for var_name, var_value in vars_data.items():
530 store.create_node(
531 NodeLabel.Variable,
532 {
533 "name": str(var_name),
534 "file_path": file_path,
535 "line_start": 0,
536 "semantic_type": "ansible_variable",
537 },
538 )
539 store.create_edge(
540 NodeLabel.Class,
541 {"name": parent_name, "file_path": file_path},
542 EdgeType.CONTAINS,
543 NodeLabel.Variable,
544 {"name": str(var_name), "file_path": file_path},
545 )
546 stats["edges"] += 1
547
548 # ── Standalone file parsers ──────────────────────────────────────────────
549
550 def _parse_task_file(
551 self,
552 data,
553 file_path: str,
554 store: GraphStore,
555 stats: dict,
556 ) -> None:
557 """Parse a standalone task file (roles/*/tasks/main.yml or included file)."""
558 if not isinstance(data, list):
559 return
560
561 # Use file stem as a synthetic parent class
562 parent_name = Path(file_path).stem
563 store.create_node(
564 NodeLabel.Class,
565 {
566 "name": parent_name,
567 "file_path": file_path,
568 "line_start": 0,
569 "line_end": 0,
570 "docstring": "",
571 "semantic_type": "ansible_play",
572 },
573 )
574 store.create_edge(
575 NodeLabel.File,
576 {"path": file_path},
577 EdgeType.CONTAINS,
578 NodeLabel.Class,
579 {"name": parent_name, "file_path": file_path},
580 )
581 stats["classes"] += 1
582 stats["edges"] += 1
583
584 for task_dict in data:
585 if isinstance(task_dict, dict):
586 self._parse_task(task_dict, file_path, parent_name, store, stats)
587
588 def _parse_handler_file(
589 self,
590 data,
591 file_path: str,
592 store: GraphStore,
593 stats: dict,
594 ) -> None:
595 """Parse a standalone handler file (roles/*/handlers/main.yml)."""
596 if not isinstance(data, list):
597 return
598
599 parent_name = Path(file_path).stem
600 store.create_node(
601 NodeLabel.Class,
602 {
603 "name": parent_name,
604 "file_path": file_path,
605 "line_start": 0,
606 "line_end": 0,
607 "docstring": "",
608 "semantic_type": "ansible_play",
609 },
610 )
611 store.create_edge(
612 NodeLabel.File,
613 {"path": file_path},
614 EdgeType.CONTAINS,
615 NodeLabel.Class,
616 {"name": parent_name, "file_path": file
--- a/navegador/ingestion/bash.py
+++ b/navegador/ingestion/bash.py
@@ -0,0 +1,263 @@
1
+"""
2
+Bash/Shell script parser — extracts functions, top-level variables,
3
+source/. imports, and call edges from .sh/.bash files using tree-sitter.
4
+"""
5
+
6
+import logging
7
+from pathlib import Path
8
+
9
+from navegador.graph.schema import EdgeType, NodeLabel
10
+from navegador.graph.store import GraphStore
11
+from navegador.ingestion.parser import LanguageParser
12
+
13
+logger = logging.getLogger(__name__)
14
+
15
+
16
+def _get_bash_language():
17
+ try:
18
+ import tree_sitter_bash as tsbash # type: ignore[import]
19
+ from tree_sitter import Language
20
+
21
+ return Language(tsbash.language())
22
+ except ImportError as e:
23
+ raise ImportError("Install tree-sitter-bash: pip install tree-sitter-bash") from e
24
+
25
+
26
+def _node_text(node, source: bytes) -> str:
27
+ return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
28
+
29
+
30
+class BashParser(LanguageParser):
31
+ """Parses Bash/Shell script files into the navegador graph."""
32
+
33
+ def __init__(self) -> None:
34
+ from tree_sitter import Parser # type: ignore[import]
35
+
36
+ self._parser = Parser(_get_bash_language())
37
+
38
+ def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
39
+ source = path.read_bytes()
40
+ tree = self._parser.parse(source)
41
+ rel_path = str(path.relative_to(repo_root))
42
+
43
+ store.create_node(
44
+ NodeLabel.File,
45
+ {
46
+ "name": path.name,
47
+ "path": rel_path,
48
+ "language": "bash",
49
+ "line_count": source.count(b"\n"),
50
+ },
51
+ )
52
+
53
+ stats = {"functions": 0, "classes": 0, "edges": 0}
54
+ self._walk(tree.root_node, source, rel_path, store, stats)
55
+ return stats
56
+
57
+ # ── AST walker ────────────────────────────────────────────────────────────
58
+
59
+ def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None:
60
+ if node.type == "function_definition":
61
+ self._handle_function(node, source, file_path, store, stats)
62
+ return
63
+ if node.type == "variable_assignment":
64
+ self._handle_variable(node, source, file_path, store, stats)
65
+ return
66
+ if node.type == "command":
67
+ self._handle_command(node, source, file_path, store, stats)
68
+ return
69
+ for child in node.children:
70
+ self._walk(child, source, file_path, store, stats)
71
+
72
+ # ── Handlers ──────────────────────────────────────────────────────────────
73
+
74
+ def _handle_function(
75
+ self,
76
+ node,
77
+ source: bytes,
78
+ file_path: str,
79
+ store: GraphStore,
80
+ stats: dict,
81
+ ) -> None:
82
+ name_node = node.child_by_field_name("name")
83
+ if not name_node:
84
+ return
85
+ name = _node_text(name_node, source)
86
+
87
+ store.create_node(
88
+ NodeLabel.Function,
89
+ {
90
+ "name": name,
91
+ "file_path": file_path,
92
+ "line_start": node.start_point[0] + 1,
93
+ "line_end": node.end_point[0] + 1,
94
+ "docstring": "",
95
+ "semantic_type": "shell_function",
96
+ },
97
+ )
98
+
99
+ store.create_edge(
100
+ NodeLabel.File,
101
+ {"path": file_path},
102
+ EdgeType.CONTAINS,
103
+ NodeLabel.Function,
104
+ {"name": name, "file_path": file_path},
105
+ )
106
+ stats["functions"] += 1
107
+ stats["edges"] += 1
108
+
109
+ self._extract_calls(node, source, file_path, name, store, stats)
110
+
111
+ def _handle_variable(
112
+ self,
113
+ node,
114
+ source: bytes,
115
+ file_path: str,
116
+ store: GraphStore,
117
+ stats: dict,
118
+ ) -> None:
119
+ # Only track top-level variable assignments (parent is program)
120
+ if node.parent is None or node.parent.type not in ("program", "source_file"):
121
+ return
122
+
123
+ name_node = node.child_by_field_name("name")
124
+ if not name_node:
125
+ return
126
+ name = _node_text(name_node, source)
127
+
128
+ value_node = node.child_by_field_name("value")
129
+ value = _node_text(value_node, source) if value_node else ""
130
+
131
+ store.create_node(
132
+ NodeLabel.Variable,
133
+ {
134
+ "name": name,
135
+ "file_path": file_path,
136
+ "line_start": node.start_point[0] + 1,
137
+ "line_end": node.end_point[0] + 1,
138
+ "semantic_type": "shell_variable",
139
+ "value": value,
140
+ },
141
+ )
142
+
143
+ store.create_edge(
144
+ NodeLabel.File,
145
+ {"path": file_path},
146
+ EdgeType.CONTAINS,
147
+ NodeLabel.Variable,
148
+ {"name": name, "file_path": file_path},
149
+ )
150
+ stats["edges"] += 1
151
+
152
+ def _handle_command(
153
+ self,
154
+ node,
155
+ source: bytes,
156
+ file_path: str,
157
+ store: GraphStore,
158
+ stats: dict,
159
+ ) -> None:
160
+ """Handle source/. commands as imports."""
161
+ name_node = node.child_by_field_name("name")
162
+ if not name_node:
163
+ return
164
+ cmd_name = _node_text(name_node, source)
165
+
166
+ # Only handle source and . (dot-source) commands
167
+ if cmd_name not in ("source", "."):
168
+ return
169
+
170
+ # The sourced file path is the first argument
171
+ arg_types = ("word", "string", "raw_string", "concatenation")
172
+ args = [child for child in node.children if child != name_node and child.type in arg_types]
173
+ if not args:
174
+ return
175
+ sourced_path = _node_text(args[0], source).strip("'\"")
176
+
177
+ store.create_node(
178
+ NodeLabel.Import,
179
+ {
180
+ "name": sourced_path,
181
+ "file_path": file_path,
182
+ "line_start": node.start_point[0] + 1,
183
+ "module": sourced_path,
184
+ "semantic_type": "shell_source",
185
+ },
186
+ )
187
+
188
+ store.create_edge(
189
+ NodeLabel.File,
190
+ {"path": file_path},
191
+ EdgeType.IMPORTS,
192
+ NodeLabel.Import,
193
+ {"name": sourced_path, "file_path": file_path},
194
+ )
195
+ stats["edges"] += 1
196
+
197
+ # ── Call extraction ───────────────────────────────────────────────────────
198
+
199
+ def _extract_calls(
200
+ self,
201
+ fn_node,
202
+ source: bytes,
203
+ file_path: str,
204
+ fn_name: str,
205
+ store: GraphStore,
206
+ stats: dict,
207
+ ) -> None:
208
+ def walk(node):
209
+ if node.type == "command":
210
+ name_node = node.child_by_field_name("name")
211
+ if name_node:
212
+ callee = _node_text(name_node, source)
213
+ # Skip builtins and source commands — only track function calls
214
+ if callee not in (
215
+ "source",
216
+ ".",
217
+ "echo",
218
+ "printf",
219
+ "cd",
220
+ "exit",
221
+ "return",
222
+ "export",
223
+ "local",
224
+ "readonly",
225
+ "declare",
226
+ "typeset",
227
+ "unset",
228
+ "shift",
229
+ "set",
230
+ "eval",
231
+ "exec",
232
+ "test",
233
+ "[",
234
+ "[[",
235
+ "true",
236
+ "false",
237
+ ":",
238
+ "read",
239
+ "if",
240
+ "then",
241
+ "else",
242
+ "fi",
243
+ "for",
244
+ "while",
245
+ "do",
246
+ "done",
247
+ "case",
248
+ "esac",
249
+ ):
250
+ store.create_edge(
251
+ NodeLabel.Function,
252
+ {"name": fn_name, "file_path": file_path},
253
+ EdgeType.CALLS,
254
+ NodeLabel.Function,
255
+ {"name": callee, "file_path": file_path},
256
+ )
257
+ stats["edges"] += 1
258
+ for child in node.children:
259
+ walk(child)
260
+
261
+ body = fn_node.child_by_field_name("body")
262
+ if body:
263
+ walk(body)
--- a/navegador/ingestion/bash.py
+++ b/navegador/ingestion/bash.py
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/navegador/ingestion/bash.py
+++ b/navegador/ingestion/bash.py
@@ -0,0 +1,263 @@
1 """
2 Bash/Shell script parser — extracts functions, top-level variables,
3 source/. imports, and call edges from .sh/.bash files using tree-sitter.
4 """
5
6 import logging
7 from pathlib import Path
8
9 from navegador.graph.schema import EdgeType, NodeLabel
10 from navegador.graph.store import GraphStore
11 from navegador.ingestion.parser import LanguageParser
12
13 logger = logging.getLogger(__name__)
14
15
16 def _get_bash_language():
17 try:
18 import tree_sitter_bash as tsbash # type: ignore[import]
19 from tree_sitter import Language
20
21 return Language(tsbash.language())
22 except ImportError as e:
23 raise ImportError("Install tree-sitter-bash: pip install tree-sitter-bash") from e
24
25
26 def _node_text(node, source: bytes) -> str:
27 return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
28
29
30 class BashParser(LanguageParser):
31 """Parses Bash/Shell script files into the navegador graph."""
32
33 def __init__(self) -> None:
34 from tree_sitter import Parser # type: ignore[import]
35
36 self._parser = Parser(_get_bash_language())
37
38 def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
39 source = path.read_bytes()
40 tree = self._parser.parse(source)
41 rel_path = str(path.relative_to(repo_root))
42
43 store.create_node(
44 NodeLabel.File,
45 {
46 "name": path.name,
47 "path": rel_path,
48 "language": "bash",
49 "line_count": source.count(b"\n"),
50 },
51 )
52
53 stats = {"functions": 0, "classes": 0, "edges": 0}
54 self._walk(tree.root_node, source, rel_path, store, stats)
55 return stats
56
57 # ── AST walker ────────────────────────────────────────────────────────────
58
59 def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None:
60 if node.type == "function_definition":
61 self._handle_function(node, source, file_path, store, stats)
62 return
63 if node.type == "variable_assignment":
64 self._handle_variable(node, source, file_path, store, stats)
65 return
66 if node.type == "command":
67 self._handle_command(node, source, file_path, store, stats)
68 return
69 for child in node.children:
70 self._walk(child, source, file_path, store, stats)
71
72 # ── Handlers ──────────────────────────────────────────────────────────────
73
74 def _handle_function(
75 self,
76 node,
77 source: bytes,
78 file_path: str,
79 store: GraphStore,
80 stats: dict,
81 ) -> None:
82 name_node = node.child_by_field_name("name")
83 if not name_node:
84 return
85 name = _node_text(name_node, source)
86
87 store.create_node(
88 NodeLabel.Function,
89 {
90 "name": name,
91 "file_path": file_path,
92 "line_start": node.start_point[0] + 1,
93 "line_end": node.end_point[0] + 1,
94 "docstring": "",
95 "semantic_type": "shell_function",
96 },
97 )
98
99 store.create_edge(
100 NodeLabel.File,
101 {"path": file_path},
102 EdgeType.CONTAINS,
103 NodeLabel.Function,
104 {"name": name, "file_path": file_path},
105 )
106 stats["functions"] += 1
107 stats["edges"] += 1
108
109 self._extract_calls(node, source, file_path, name, store, stats)
110
111 def _handle_variable(
112 self,
113 node,
114 source: bytes,
115 file_path: str,
116 store: GraphStore,
117 stats: dict,
118 ) -> None:
119 # Only track top-level variable assignments (parent is program)
120 if node.parent is None or node.parent.type not in ("program", "source_file"):
121 return
122
123 name_node = node.child_by_field_name("name")
124 if not name_node:
125 return
126 name = _node_text(name_node, source)
127
128 value_node = node.child_by_field_name("value")
129 value = _node_text(value_node, source) if value_node else ""
130
131 store.create_node(
132 NodeLabel.Variable,
133 {
134 "name": name,
135 "file_path": file_path,
136 "line_start": node.start_point[0] + 1,
137 "line_end": node.end_point[0] + 1,
138 "semantic_type": "shell_variable",
139 "value": value,
140 },
141 )
142
143 store.create_edge(
144 NodeLabel.File,
145 {"path": file_path},
146 EdgeType.CONTAINS,
147 NodeLabel.Variable,
148 {"name": name, "file_path": file_path},
149 )
150 stats["edges"] += 1
151
152 def _handle_command(
153 self,
154 node,
155 source: bytes,
156 file_path: str,
157 store: GraphStore,
158 stats: dict,
159 ) -> None:
160 """Handle source/. commands as imports."""
161 name_node = node.child_by_field_name("name")
162 if not name_node:
163 return
164 cmd_name = _node_text(name_node, source)
165
166 # Only handle source and . (dot-source) commands
167 if cmd_name not in ("source", "."):
168 return
169
170 # The sourced file path is the first argument
171 arg_types = ("word", "string", "raw_string", "concatenation")
172 args = [child for child in node.children if child != name_node and child.type in arg_types]
173 if not args:
174 return
175 sourced_path = _node_text(args[0], source).strip("'\"")
176
177 store.create_node(
178 NodeLabel.Import,
179 {
180 "name": sourced_path,
181 "file_path": file_path,
182 "line_start": node.start_point[0] + 1,
183 "module": sourced_path,
184 "semantic_type": "shell_source",
185 },
186 )
187
188 store.create_edge(
189 NodeLabel.File,
190 {"path": file_path},
191 EdgeType.IMPORTS,
192 NodeLabel.Import,
193 {"name": sourced_path, "file_path": file_path},
194 )
195 stats["edges"] += 1
196
197 # ── Call extraction ───────────────────────────────────────────────────────
198
199 def _extract_calls(
200 self,
201 fn_node,
202 source: bytes,
203 file_path: str,
204 fn_name: str,
205 store: GraphStore,
206 stats: dict,
207 ) -> None:
208 def walk(node):
209 if node.type == "command":
210 name_node = node.child_by_field_name("name")
211 if name_node:
212 callee = _node_text(name_node, source)
213 # Skip builtins and source commands — only track function calls
214 if callee not in (
215 "source",
216 ".",
217 "echo",
218 "printf",
219 "cd",
220 "exit",
221 "return",
222 "export",
223 "local",
224 "readonly",
225 "declare",
226 "typeset",
227 "unset",
228 "shift",
229 "set",
230 "eval",
231 "exec",
232 "test",
233 "[",
234 "[[",
235 "true",
236 "false",
237 ":",
238 "read",
239 "if",
240 "then",
241 "else",
242 "fi",
243 "for",
244 "while",
245 "do",
246 "done",
247 "case",
248 "esac",
249 ):
250 store.create_edge(
251 NodeLabel.Function,
252 {"name": fn_name, "file_path": file_path},
253 EdgeType.CALLS,
254 NodeLabel.Function,
255 {"name": callee, "file_path": file_path},
256 )
257 stats["edges"] += 1
258 for child in node.children:
259 walk(child)
260
261 body = fn_node.child_by_field_name("body")
262 if body:
263 walk(body)
--- a/navegador/ingestion/hcl.py
+++ b/navegador/ingestion/hcl.py
@@ -0,0 +1,490 @@
1
+"""
2
+HCL/Terraform parser — extracts resources, data sources, providers,
3
+variables, outputs, modules, and locals from .tf files using tree-sitter.
4
+"""
5
+
6
+import logging
7
+import re
8
+from pathlib import Path
9
+
10
+from navegador.graph.schema import EdgeType, NodeLabel
11
+from navegador.graph.store import GraphStore
12
+from navegador.ingestion.parser import LanguageParser
13
+
14
+logger = logging.getLogger(__name__)
15
+
16
+# Patterns for reference extraction from expression text
17
+_VAR_REF = re.compile(r"\bvar\.(\w+)")
18
+_LOCAL_REF = re.compile(r"\blocal\.(\w+)")
19
+_MODULE_REF = re.compile(r"\bmodule\.(\w+)")
20
+_DATA_REF = re.compile(r"\bdata\.(\w+)\.(\w+)")
21
+_RESOURCE_REF = re.compile(
22
+ r"(?<!\bdata\.)" # exclude data.resource_type references (handled by _DATA_REF)
23
+ r"\b(aws_\w+|google_\w+|azurerm_\w+|azuread_\w+|oci_\w+|digitalocean_\w+"
24
+ r"|cloudflare_\w+|helm_\w+|kubernetes_\w+|null_\w+|random_\w+"
25
+ r"|local_\w+|tls_\w+|template_\w+|archive_\w+|external_\w+)\.(\w+)"
26
+)
27
+
28
+
29
+def _get_hcl_language():
30
+ try:
31
+ import tree_sitter_hcl as tshcl # type: ignore[import]
32
+ from tree_sitter import Language
33
+
34
+ return Language(tshcl.language())
35
+ except ImportError as e:
36
+ raise ImportError("Install tree-sitter-hcl: pip install tree-sitter-hcl") from e
37
+
38
+
39
+def _node_text(node, source: bytes) -> str:
40
+ return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
41
+
42
+
43
+def _string_lit_text(node, source: bytes) -> str:
44
+ """Extract the inner text from a string_lit node (strips quotes)."""
45
+ for child in node.children:
46
+ if child.type == "template_literal":
47
+ return _node_text(child, source)
48
+ # Fallback: strip surrounding quotes from the full text
49
+ text = _node_text(node, source)
50
+ return text.strip('"').strip("'")
51
+
52
+
53
+class HCLParser(LanguageParser):
54
+ """Parses HCL/Terraform files into the navegador graph."""
55
+
56
+ def __init__(self) -> None:
57
+ from tree_sitter import Parser # type: ignore[import]
58
+
59
+ self._parser = Parser(_get_hcl_language())
60
+
61
+ def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
62
+ source = path.read_bytes()
63
+ tree = self._parser.parse(source)
64
+ rel_path = str(path.relative_to(repo_root))
65
+
66
+ store.create_node(
67
+ NodeLabel.File,
68
+ {
69
+ "name": path.name,
70
+ "path": rel_path,
71
+ "language": "hcl",
72
+ "line_count": source.count(b"\n"),
73
+ },
74
+ )
75
+
76
+ stats = {"functions": 0, "classes": 0, "edges": 0}
77
+ self._walk(tree.root_node, source, rel_path, store, stats)
78
+ return stats
79
+
80
+ # ── AST walker ────────────────────────────────────────────────────────────
81
+
82
+ def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None:
83
+ """Walk the top-level body looking for block nodes."""
84
+ for child in node.children:
85
+ if child.type == "body":
86
+ for body_child in child.children:
87
+ if body_child.type == "block":
88
+ self._handle_block(body_child, source, file_path, store, stats)
89
+ elif child.type == "block":
90
+ self._handle_block(child, source, file_path, store, stats)
91
+
92
+ def _handle_block(
93
+ self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
94
+ ) -> None:
95
+ """Dispatch a block based on its block-type identifier."""
96
+ block_type = None
97
+ labels: list[str] = []
98
+ body_node = None
99
+
100
+ for child in node.children:
101
+ if child.type == "identifier" and block_type is None:
102
+ block_type = _node_text(child, source)
103
+ elif child.type == "string_lit":
104
+ labels.append(_string_lit_text(child, source))
105
+ elif child.type == "body":
106
+ body_node = child
107
+
108
+ if not block_type:
109
+ return
110
+
111
+ if block_type == "resource" and len(labels) >= 2:
112
+ self._handle_resource(node, source, file_path, store, stats, labels, body_node)
113
+ elif block_type == "data" and len(labels) >= 2:
114
+ self._handle_data(node, source, file_path, store, stats, labels, body_node)
115
+ elif block_type == "provider" and len(labels) >= 1:
116
+ self._handle_provider(node, source, file_path, store, stats, labels, body_node)
117
+ elif block_type == "variable" and len(labels) >= 1:
118
+ self._handle_variable(node, source, file_path, store, stats, labels, body_node)
119
+ elif block_type == "output" and len(labels) >= 1:
120
+ self._handle_output(node, source, file_path, store, stats, labels, body_node)
121
+ elif block_type == "module" and len(labels) >= 1:
122
+ self._handle_module(node, source, file_path, store, stats, labels, body_node)
123
+ elif block_type == "locals":
124
+ self._handle_locals(node, source, file_path, store, stats, body_node)
125
+ elif block_type == "terraform":
126
+ pass # Configuration block, skip
127
+ else:
128
+ logger.debug("Skipping unknown HCL block type: %s", block_type)
129
+
130
+ # ── Handlers ──────────────────────────────────────────────────────────────
131
+
132
+ def _handle_resource(
133
+ self,
134
+ node,
135
+ source: bytes,
136
+ file_path: str,
137
+ store: GraphStore,
138
+ stats: dict,
139
+ labels: list[str],
140
+ body_node,
141
+ ) -> None:
142
+ name = f"{labels[0]}.{labels[1]}"
143
+ store.create_node(
144
+ NodeLabel.Class,
145
+ {
146
+ "name": name,
147
+ "file_path": file_path,
148
+ "line_start": node.start_point[0] + 1,
149
+ "line_end": node.end_point[0] + 1,
150
+ "docstring": "",
151
+ "semantic_type": "terraform_resource",
152
+ },
153
+ )
154
+ store.create_edge(
155
+ NodeLabel.File,
156
+ {"path": file_path},
157
+ EdgeType.CONTAINS,
158
+ NodeLabel.Class,
159
+ {"name": name, "file_path": file_path},
160
+ )
161
+ stats["classes"] += 1
162
+ stats["edges"] += 1
163
+
164
+ if body_node:
165
+ self._extract_references(
166
+ body_node, source, file_path, name, NodeLabel.Class, store, stats
167
+ )
168
+
169
+ def _handle_data(
170
+ self,
171
+ node,
172
+ source: bytes,
173
+ file_path: str,
174
+ store: GraphStore,
175
+ stats: dict,
176
+ labels: list[str],
177
+ body_node,
178
+ ) -> None:
179
+ name = f"{labels[0]}.{labels[1]}"
180
+ store.create_node(
181
+ NodeLabel.Class,
182
+ {
183
+ "name": name,
184
+ "file_path": file_path,
185
+ "line_start": node.start_point[0] + 1,
186
+ "line_end": node.end_point[0] + 1,
187
+ "docstring": "",
188
+ "semantic_type": "terraform_data",
189
+ },
190
+ )
191
+ store.create_edge(
192
+ NodeLabel.File,
193
+ {"path": file_path},
194
+ EdgeType.CONTAINS,
195
+ NodeLabel.Class,
196
+ {"name": name, "file_path": file_path},
197
+ )
198
+ stats["classes"] += 1
199
+ stats["edges"] += 1
200
+
201
+ if body_node:
202
+ self._extract_references(
203
+ body_node, source, file_path, name, NodeLabel.Class, store, stats
204
+ )
205
+
206
+ def _handle_provider(
207
+ self,
208
+ node,
209
+ source: bytes,
210
+ file_path: str,
211
+ store: GraphStore,
212
+ stats: dict,
213
+ labels: list[str],
214
+ body_node,
215
+ ) -> None:
216
+ name = labels[0]
217
+ store.create_node(
218
+ NodeLabel.Class,
219
+ {
220
+ "name": name,
221
+ "file_path": file_path,
222
+ "line_start": node.start_point[0] + 1,
223
+ "line_end": node.end_point[0] + 1,
224
+ "docstring": "",
225
+ "semantic_type": "terraform_provider",
226
+ },
227
+ )
228
+ store.create_edge(
229
+ NodeLabel.File,
230
+ {"path": file_path},
231
+ EdgeType.CONTAINS,
232
+ NodeLabel.Class,
233
+ {"name": name, "file_path": file_path},
234
+ )
235
+ stats["classes"] += 1
236
+ stats["edges"] += 1
237
+
238
+ if body_node:
239
+ self._extract_references(
240
+ body_node, source, file_path, name, NodeLabel.Class, store, stats
241
+ )
242
+
243
+ def _handle_variable(
244
+ self,
245
+ node,
246
+ source: bytes,
247
+ file_path: str,
248
+ store: GraphStore,
249
+ stats: dict,
250
+ labels: list[str],
251
+ body_node,
252
+ ) -> None:
253
+ name = labels[0]
254
+ store.create_node(
255
+ NodeLabel.Variable,
256
+ {
257
+ "name": name,
258
+ "file_path": file_path,
259
+ "line_start": node.start_point[0] + 1,
260
+ "line_end": node.end_point[0] + 1,
261
+ "semantic_type": "terraform_variable",
262
+ },
263
+ )
264
+ store.create_edge(
265
+ NodeLabel.File,
266
+ {"path": file_path},
267
+ EdgeType.CONTAINS,
268
+ NodeLabel.Variable,
269
+ {"name": name, "file_path": file_path},
270
+ )
271
+ stats["functions"] += 1
272
+ stats["edges"] += 1
273
+
274
+ def _handle_output(
275
+ self,
276
+ node,
277
+ source: bytes,
278
+ file_path: str,
279
+ store: GraphStore,
280
+ stats: dict,
281
+ labels: list[str],
282
+ body_node,
283
+ ) -> None:
284
+ name = labels[0]
285
+ store.create_node(
286
+ NodeLabel.Variable,
287
+ {
288
+ "name": name,
289
+ "file_path": file_path,
290
+ "line_start": node.start_point[0] + 1,
291
+ "line_end": node.end_point[0] + 1,
292
+ "semantic_type": "terraform_output",
293
+ },
294
+ )
295
+ store.create_edge(
296
+ NodeLabel.File,
297
+ {"path": file_path},
298
+ EdgeType.CONTAINS,
299
+ NodeLabel.Variable,
300
+ {"name": name, "file_path": file_path},
301
+ )
302
+ stats["functions"] += 1
303
+ stats["edges"] += 1
304
+
305
+ if body_node:
306
+ self._extract_references(
307
+ body_node, source, file_path, name, NodeLabel.Variable, store, stats
308
+ )
309
+
310
+ def _handle_module(
311
+ self,
312
+ node,
313
+ source: bytes,
314
+ file_path: str,
315
+ store: GraphStore,
316
+ stats: dict,
317
+ labels: list[str],
318
+ body_node,
319
+ ) -> None:
320
+ name = labels[0]
321
+ source_attr = ""
322
+ if body_node:
323
+ source_attr = self._get_attribute_value(body_node, "source", source)
324
+
325
+ store.create_node(
326
+ NodeLabel.Module,
327
+ {
328
+ "name": name,
329
+ "file_path": file_path,
330
+ "line_start": node.start_point[0] + 1,
331
+ "line_end": node.end_point[0] + 1,
332
+ "semantic_type": "terraform_module",
333
+ "source": source_attr,
334
+ },
335
+ )
336
+ store.create_edge(
337
+ NodeLabel.File,
338
+ {"path": file_path},
339
+ EdgeType.CONTAINS,
340
+ NodeLabel.Module,
341
+ {"name": name, "file_path": file_path},
342
+ )
343
+ stats["classes"] += 1
344
+ stats["edges"] += 1
345
+
346
+ if body_node:
347
+ self._extract_references(
348
+ body_node, source, file_path, name, NodeLabel.Module, store, stats
349
+ )
350
+
351
+ def _handle_locals(
352
+ self,
353
+ node,
354
+ source: bytes,
355
+ file_path: str,
356
+ store: GraphStore,
357
+ stats: dict,
358
+ body_node,
359
+ ) -> None:
360
+ if not body_node:
361
+ return
362
+
363
+ for child in body_node.children:
364
+ if child.type == "attribute":
365
+ attr_name = None
366
+ for attr_child in child.children:
367
+ if attr_child.type == "identifier":
368
+ attr_name = _node_text(attr_child, source)
369
+ break
370
+
371
+ if not attr_name:
372
+ continue
373
+
374
+ store.create_node(
375
+ NodeLabel.Variable,
376
+ {
377
+ "name": attr_name,
378
+ "file_path": file_path,
379
+ "line_start": child.start_point[0] + 1,
380
+ "line_end": child.end_point[0] + 1,
381
+ "semantic_type": "terraform_local",
382
+ },
383
+ )
384
+ store.create_edge(
385
+ NodeLabel.File,
386
+ {"path": file_path},
387
+ EdgeType.CONTAINS,
388
+ NodeLabel.Variable,
389
+ {"name": attr_name, "file_path": file_path},
390
+ )
391
+ stats["functions"] += 1
392
+ stats["edges"] += 1
393
+
394
+ # Extract references from the attribute expression
395
+ self._extract_references(
396
+ child, source, file_path, attr_name, NodeLabel.Variable, store, stats
397
+ )
398
+
399
+ # ── Reference extraction ──────────────────────────────────────────────────
400
+
401
+ def _extract_references(
402
+ self,
403
+ node,
404
+ source: bytes,
405
+ file_path: str,
406
+ from_name: str,
407
+ from_label: str,
408
+ store: GraphStore,
409
+ stats: dict,
410
+ ) -> None:
411
+ """Scan expression text for var.X, local.X, module.X, data.T.N, and resource references."""
412
+ text = _node_text(node, source)
413
+
414
+ # var.xxx → REFERENCES edge to terraform_variable
415
+ for match in _VAR_REF.finditer(text):
416
+ var_name = match.group(1)
417
+ store.create_edge(
418
+ from_label,
419
+ {"name": from_name, "file_path": file_path},
420
+ EdgeType.REFERENCES,
421
+ NodeLabel.Variable,
422
+ {"name": var_name, "file_path": file_path},
423
+ )
424
+ stats["edges"] += 1
425
+
426
+ # local.xxx → REFERENCES edge to terraform_local
427
+ for match in _LOCAL_REF.finditer(text):
428
+ local_name = match.group(1)
429
+ store.create_edge(
430
+ from_label,
431
+ {"name": from_name, "file_path": file_path},
432
+ EdgeType.REFERENCES,
433
+ NodeLabel.Variable,
434
+ {"name": local_name, "file_path": file_path},
435
+ )
436
+ stats["edges"] += 1
437
+
438
+ # module.xxx → REFERENCES edge to terraform_module
439
+ for match in _MODULE_REF.finditer(text):
440
+ mod_name = match.group(1)
441
+ store.create_edge(
442
+ from_label,
443
+ {"name": from_name, "file_path": file_path},
444
+ EdgeType.REFERENCES,
445
+ NodeLabel.Module,
446
+ {"name": mod_name, "file_path": file_path},
447
+ )
448
+ stats["edges"] += 1
449
+
450
+ # data.type.name → DEPENDS_ON edge to terraform_data
451
+ for match in _DATA_REF.finditer(text):
452
+ data_name = f"{match.group(1)}.{match.group(2)}"
453
+ store.create_edge(
454
+ from_label,
455
+ {"name": from_name, "file_path": file_path},
456
+ EdgeType.DEPENDS_ON,
457
+ NodeLabel.Class,
458
+ {"name": data_name, "file_path": file_path},
459
+ )
460
+ stats["edges"] += 1
461
+
462
+ # resource_type.resource_name → DEPENDS_ON edge to terraform_resource
463
+ for match in _RESOURCE_REF.finditer(text):
464
+ resource_name = f"{match.group(1)}.{match.group(2)}"
465
+ store.create_edge(
466
+ from_label,
467
+ {"name": from_name, "file_path": file_path},
468
+ EdgeType.DEPENDS_ON,
469
+ NodeLabel.Class,
470
+ {"name": resource_name, "file_path": file_path},
471
+ )
472
+ stats["edges"] += 1
473
+
474
+ # ── Helpers ───────────────────────────────────────────────────────────────
475
+
476
+ def _get_attribute_value(self, body_node, attr_name: str, source: bytes) -> str:
477
+ """Extract the string value of a named attribute from a body node."""
478
+ for child in body_node.children:
479
+ if child.type == "attribute":
480
+ ident = None
481
+ expr = None
482
+ for attr_child in child.children:
483
+ if attr_child.type == "identifier":
484
+ ident = _node_text(attr_child, source)
485
+ elif attr_child.type == "expression" or attr_child.is_named:
486
+ expr = attr_child
487
+ if ident == attr_name and expr is not None:
488
+ text = _node_text(expr, source).strip().strip('"').strip("'")
489
+ return text
490
+ return ""
--- a/navegador/ingestion/hcl.py
+++ b/navegador/ingestion/hcl.py
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/navegador/ingestion/hcl.py
+++ b/navegador/ingestion/hcl.py
@@ -0,0 +1,490 @@
1 """
2 HCL/Terraform parser — extracts resources, data sources, providers,
3 variables, outputs, modules, and locals from .tf files using tree-sitter.
4 """
5
6 import logging
7 import re
8 from pathlib import Path
9
10 from navegador.graph.schema import EdgeType, NodeLabel
11 from navegador.graph.store import GraphStore
12 from navegador.ingestion.parser import LanguageParser
13
14 logger = logging.getLogger(__name__)
15
16 # Patterns for reference extraction from expression text
17 _VAR_REF = re.compile(r"\bvar\.(\w+)")
18 _LOCAL_REF = re.compile(r"\blocal\.(\w+)")
19 _MODULE_REF = re.compile(r"\bmodule\.(\w+)")
20 _DATA_REF = re.compile(r"\bdata\.(\w+)\.(\w+)")
21 _RESOURCE_REF = re.compile(
22 r"(?<!\bdata\.)" # exclude data.resource_type references (handled by _DATA_REF)
23 r"\b(aws_\w+|google_\w+|azurerm_\w+|azuread_\w+|oci_\w+|digitalocean_\w+"
24 r"|cloudflare_\w+|helm_\w+|kubernetes_\w+|null_\w+|random_\w+"
25 r"|local_\w+|tls_\w+|template_\w+|archive_\w+|external_\w+)\.(\w+)"
26 )
27
28
29 def _get_hcl_language():
30 try:
31 import tree_sitter_hcl as tshcl # type: ignore[import]
32 from tree_sitter import Language
33
34 return Language(tshcl.language())
35 except ImportError as e:
36 raise ImportError("Install tree-sitter-hcl: pip install tree-sitter-hcl") from e
37
38
39 def _node_text(node, source: bytes) -> str:
40 return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
41
42
43 def _string_lit_text(node, source: bytes) -> str:
44 """Extract the inner text from a string_lit node (strips quotes)."""
45 for child in node.children:
46 if child.type == "template_literal":
47 return _node_text(child, source)
48 # Fallback: strip surrounding quotes from the full text
49 text = _node_text(node, source)
50 return text.strip('"').strip("'")
51
52
53 class HCLParser(LanguageParser):
54 """Parses HCL/Terraform files into the navegador graph."""
55
56 def __init__(self) -> None:
57 from tree_sitter import Parser # type: ignore[import]
58
59 self._parser = Parser(_get_hcl_language())
60
61 def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
62 source = path.read_bytes()
63 tree = self._parser.parse(source)
64 rel_path = str(path.relative_to(repo_root))
65
66 store.create_node(
67 NodeLabel.File,
68 {
69 "name": path.name,
70 "path": rel_path,
71 "language": "hcl",
72 "line_count": source.count(b"\n"),
73 },
74 )
75
76 stats = {"functions": 0, "classes": 0, "edges": 0}
77 self._walk(tree.root_node, source, rel_path, store, stats)
78 return stats
79
80 # ── AST walker ────────────────────────────────────────────────────────────
81
82 def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None:
83 """Walk the top-level body looking for block nodes."""
84 for child in node.children:
85 if child.type == "body":
86 for body_child in child.children:
87 if body_child.type == "block":
88 self._handle_block(body_child, source, file_path, store, stats)
89 elif child.type == "block":
90 self._handle_block(child, source, file_path, store, stats)
91
92 def _handle_block(
93 self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
94 ) -> None:
95 """Dispatch a block based on its block-type identifier."""
96 block_type = None
97 labels: list[str] = []
98 body_node = None
99
100 for child in node.children:
101 if child.type == "identifier" and block_type is None:
102 block_type = _node_text(child, source)
103 elif child.type == "string_lit":
104 labels.append(_string_lit_text(child, source))
105 elif child.type == "body":
106 body_node = child
107
108 if not block_type:
109 return
110
111 if block_type == "resource" and len(labels) >= 2:
112 self._handle_resource(node, source, file_path, store, stats, labels, body_node)
113 elif block_type == "data" and len(labels) >= 2:
114 self._handle_data(node, source, file_path, store, stats, labels, body_node)
115 elif block_type == "provider" and len(labels) >= 1:
116 self._handle_provider(node, source, file_path, store, stats, labels, body_node)
117 elif block_type == "variable" and len(labels) >= 1:
118 self._handle_variable(node, source, file_path, store, stats, labels, body_node)
119 elif block_type == "output" and len(labels) >= 1:
120 self._handle_output(node, source, file_path, store, stats, labels, body_node)
121 elif block_type == "module" and len(labels) >= 1:
122 self._handle_module(node, source, file_path, store, stats, labels, body_node)
123 elif block_type == "locals":
124 self._handle_locals(node, source, file_path, store, stats, body_node)
125 elif block_type == "terraform":
126 pass # Configuration block, skip
127 else:
128 logger.debug("Skipping unknown HCL block type: %s", block_type)
129
130 # ── Handlers ──────────────────────────────────────────────────────────────
131
132 def _handle_resource(
133 self,
134 node,
135 source: bytes,
136 file_path: str,
137 store: GraphStore,
138 stats: dict,
139 labels: list[str],
140 body_node,
141 ) -> None:
142 name = f"{labels[0]}.{labels[1]}"
143 store.create_node(
144 NodeLabel.Class,
145 {
146 "name": name,
147 "file_path": file_path,
148 "line_start": node.start_point[0] + 1,
149 "line_end": node.end_point[0] + 1,
150 "docstring": "",
151 "semantic_type": "terraform_resource",
152 },
153 )
154 store.create_edge(
155 NodeLabel.File,
156 {"path": file_path},
157 EdgeType.CONTAINS,
158 NodeLabel.Class,
159 {"name": name, "file_path": file_path},
160 )
161 stats["classes"] += 1
162 stats["edges"] += 1
163
164 if body_node:
165 self._extract_references(
166 body_node, source, file_path, name, NodeLabel.Class, store, stats
167 )
168
169 def _handle_data(
170 self,
171 node,
172 source: bytes,
173 file_path: str,
174 store: GraphStore,
175 stats: dict,
176 labels: list[str],
177 body_node,
178 ) -> None:
179 name = f"{labels[0]}.{labels[1]}"
180 store.create_node(
181 NodeLabel.Class,
182 {
183 "name": name,
184 "file_path": file_path,
185 "line_start": node.start_point[0] + 1,
186 "line_end": node.end_point[0] + 1,
187 "docstring": "",
188 "semantic_type": "terraform_data",
189 },
190 )
191 store.create_edge(
192 NodeLabel.File,
193 {"path": file_path},
194 EdgeType.CONTAINS,
195 NodeLabel.Class,
196 {"name": name, "file_path": file_path},
197 )
198 stats["classes"] += 1
199 stats["edges"] += 1
200
201 if body_node:
202 self._extract_references(
203 body_node, source, file_path, name, NodeLabel.Class, store, stats
204 )
205
206 def _handle_provider(
207 self,
208 node,
209 source: bytes,
210 file_path: str,
211 store: GraphStore,
212 stats: dict,
213 labels: list[str],
214 body_node,
215 ) -> None:
216 name = labels[0]
217 store.create_node(
218 NodeLabel.Class,
219 {
220 "name": name,
221 "file_path": file_path,
222 "line_start": node.start_point[0] + 1,
223 "line_end": node.end_point[0] + 1,
224 "docstring": "",
225 "semantic_type": "terraform_provider",
226 },
227 )
228 store.create_edge(
229 NodeLabel.File,
230 {"path": file_path},
231 EdgeType.CONTAINS,
232 NodeLabel.Class,
233 {"name": name, "file_path": file_path},
234 )
235 stats["classes"] += 1
236 stats["edges"] += 1
237
238 if body_node:
239 self._extract_references(
240 body_node, source, file_path, name, NodeLabel.Class, store, stats
241 )
242
243 def _handle_variable(
244 self,
245 node,
246 source: bytes,
247 file_path: str,
248 store: GraphStore,
249 stats: dict,
250 labels: list[str],
251 body_node,
252 ) -> None:
253 name = labels[0]
254 store.create_node(
255 NodeLabel.Variable,
256 {
257 "name": name,
258 "file_path": file_path,
259 "line_start": node.start_point[0] + 1,
260 "line_end": node.end_point[0] + 1,
261 "semantic_type": "terraform_variable",
262 },
263 )
264 store.create_edge(
265 NodeLabel.File,
266 {"path": file_path},
267 EdgeType.CONTAINS,
268 NodeLabel.Variable,
269 {"name": name, "file_path": file_path},
270 )
271 stats["functions"] += 1
272 stats["edges"] += 1
273
274 def _handle_output(
275 self,
276 node,
277 source: bytes,
278 file_path: str,
279 store: GraphStore,
280 stats: dict,
281 labels: list[str],
282 body_node,
283 ) -> None:
284 name = labels[0]
285 store.create_node(
286 NodeLabel.Variable,
287 {
288 "name": name,
289 "file_path": file_path,
290 "line_start": node.start_point[0] + 1,
291 "line_end": node.end_point[0] + 1,
292 "semantic_type": "terraform_output",
293 },
294 )
295 store.create_edge(
296 NodeLabel.File,
297 {"path": file_path},
298 EdgeType.CONTAINS,
299 NodeLabel.Variable,
300 {"name": name, "file_path": file_path},
301 )
302 stats["functions"] += 1
303 stats["edges"] += 1
304
305 if body_node:
306 self._extract_references(
307 body_node, source, file_path, name, NodeLabel.Variable, store, stats
308 )
309
310 def _handle_module(
311 self,
312 node,
313 source: bytes,
314 file_path: str,
315 store: GraphStore,
316 stats: dict,
317 labels: list[str],
318 body_node,
319 ) -> None:
320 name = labels[0]
321 source_attr = ""
322 if body_node:
323 source_attr = self._get_attribute_value(body_node, "source", source)
324
325 store.create_node(
326 NodeLabel.Module,
327 {
328 "name": name,
329 "file_path": file_path,
330 "line_start": node.start_point[0] + 1,
331 "line_end": node.end_point[0] + 1,
332 "semantic_type": "terraform_module",
333 "source": source_attr,
334 },
335 )
336 store.create_edge(
337 NodeLabel.File,
338 {"path": file_path},
339 EdgeType.CONTAINS,
340 NodeLabel.Module,
341 {"name": name, "file_path": file_path},
342 )
343 stats["classes"] += 1
344 stats["edges"] += 1
345
346 if body_node:
347 self._extract_references(
348 body_node, source, file_path, name, NodeLabel.Module, store, stats
349 )
350
351 def _handle_locals(
352 self,
353 node,
354 source: bytes,
355 file_path: str,
356 store: GraphStore,
357 stats: dict,
358 body_node,
359 ) -> None:
360 if not body_node:
361 return
362
363 for child in body_node.children:
364 if child.type == "attribute":
365 attr_name = None
366 for attr_child in child.children:
367 if attr_child.type == "identifier":
368 attr_name = _node_text(attr_child, source)
369 break
370
371 if not attr_name:
372 continue
373
374 store.create_node(
375 NodeLabel.Variable,
376 {
377 "name": attr_name,
378 "file_path": file_path,
379 "line_start": child.start_point[0] + 1,
380 "line_end": child.end_point[0] + 1,
381 "semantic_type": "terraform_local",
382 },
383 )
384 store.create_edge(
385 NodeLabel.File,
386 {"path": file_path},
387 EdgeType.CONTAINS,
388 NodeLabel.Variable,
389 {"name": attr_name, "file_path": file_path},
390 )
391 stats["functions"] += 1
392 stats["edges"] += 1
393
394 # Extract references from the attribute expression
395 self._extract_references(
396 child, source, file_path, attr_name, NodeLabel.Variable, store, stats
397 )
398
399 # ── Reference extraction ──────────────────────────────────────────────────
400
401 def _extract_references(
402 self,
403 node,
404 source: bytes,
405 file_path: str,
406 from_name: str,
407 from_label: str,
408 store: GraphStore,
409 stats: dict,
410 ) -> None:
411 """Scan expression text for var.X, local.X, module.X, data.T.N, and resource references."""
412 text = _node_text(node, source)
413
414 # var.xxx → REFERENCES edge to terraform_variable
415 for match in _VAR_REF.finditer(text):
416 var_name = match.group(1)
417 store.create_edge(
418 from_label,
419 {"name": from_name, "file_path": file_path},
420 EdgeType.REFERENCES,
421 NodeLabel.Variable,
422 {"name": var_name, "file_path": file_path},
423 )
424 stats["edges"] += 1
425
426 # local.xxx → REFERENCES edge to terraform_local
427 for match in _LOCAL_REF.finditer(text):
428 local_name = match.group(1)
429 store.create_edge(
430 from_label,
431 {"name": from_name, "file_path": file_path},
432 EdgeType.REFERENCES,
433 NodeLabel.Variable,
434 {"name": local_name, "file_path": file_path},
435 )
436 stats["edges"] += 1
437
438 # module.xxx → REFERENCES edge to terraform_module
439 for match in _MODULE_REF.finditer(text):
440 mod_name = match.group(1)
441 store.create_edge(
442 from_label,
443 {"name": from_name, "file_path": file_path},
444 EdgeType.REFERENCES,
445 NodeLabel.Module,
446 {"name": mod_name, "file_path": file_path},
447 )
448 stats["edges"] += 1
449
450 # data.type.name → DEPENDS_ON edge to terraform_data
451 for match in _DATA_REF.finditer(text):
452 data_name = f"{match.group(1)}.{match.group(2)}"
453 store.create_edge(
454 from_label,
455 {"name": from_name, "file_path": file_path},
456 EdgeType.DEPENDS_ON,
457 NodeLabel.Class,
458 {"name": data_name, "file_path": file_path},
459 )
460 stats["edges"] += 1
461
462 # resource_type.resource_name → DEPENDS_ON edge to terraform_resource
463 for match in _RESOURCE_REF.finditer(text):
464 resource_name = f"{match.group(1)}.{match.group(2)}"
465 store.create_edge(
466 from_label,
467 {"name": from_name, "file_path": file_path},
468 EdgeType.DEPENDS_ON,
469 NodeLabel.Class,
470 {"name": resource_name, "file_path": file_path},
471 )
472 stats["edges"] += 1
473
474 # ── Helpers ───────────────────────────────────────────────────────────────
475
476 def _get_attribute_value(self, body_node, attr_name: str, source: bytes) -> str:
477 """Extract the string value of a named attribute from a body node."""
478 for child in body_node.children:
479 if child.type == "attribute":
480 ident = None
481 expr = None
482 for attr_child in child.children:
483 if attr_child.type == "identifier":
484 ident = _node_text(attr_child, source)
485 elif attr_child.type == "expression" or attr_child.is_named:
486 expr = attr_child
487 if ident == attr_name and expr is not None:
488 text = _node_text(expr, source).strip().strip('"').strip("'")
489 return text
490 return ""
--- navegador/ingestion/parser.py
+++ navegador/ingestion/parser.py
@@ -14,10 +14,16 @@
1414
PHP .php
1515
Ruby .rb
1616
Swift .swift
1717
C .c .h
1818
C++ .cpp .hpp .cc .cxx
19
+
20
+Infrastructure-as-Code:
21
+ HCL .tf .hcl (Terraform / OpenTofu)
22
+ Puppet .pp
23
+ Bash .sh .bash .zsh
24
+ Ansible .yml .yaml (detected heuristically, not via extension)
1925
"""
2026
2127
import hashlib
2228
import logging
2329
import time
@@ -49,10 +55,16 @@
4955
".h": "c",
5056
".cpp": "cpp",
5157
".hpp": "cpp",
5258
".cc": "cpp",
5359
".cxx": "cpp",
60
+ ".tf": "hcl",
61
+ ".hcl": "hcl",
62
+ ".pp": "puppet",
63
+ ".sh": "bash",
64
+ ".bash": "bash",
65
+ ".zsh": "bash",
5466
}
5567
5668
5769
class RepoIngester:
5870
"""
@@ -154,10 +166,13 @@
154166
# Remove the temporary redacted directory if one was created
155167
if effective_root is not repo_path:
156168
import shutil
157169
158170
shutil.rmtree(effective_root, ignore_errors=True)
171
+
172
+ # Ansible pass — heuristically detect and parse Ansible YAML files
173
+ self._ingest_ansible(repo_path, stats, incremental)
159174
160175
logger.info(
161176
"Ingested %s: %d files, %d functions, %d classes, %d skipped",
162177
repo_path.name,
163178
stats["files"],
@@ -266,10 +281,79 @@
266281
for path in repo_path.rglob("*"):
267282
if path.is_file() and path.suffix in LANGUAGE_MAP:
268283
if not any(part in skip_dirs for part in path.parts):
269284
yield path
270285
286
+ def _ingest_ansible(self, repo_path: Path, stats: dict[str, int], incremental: bool) -> None:
287
+ """Detect and parse Ansible YAML files (playbooks, roles, tasks)."""
288
+ from navegador.ingestion.ansible import AnsibleParser
289
+
290
+ is_ansible_file = AnsibleParser.is_ansible_file
291
+
292
+ ansible_parser: AnsibleParser | None = None
293
+
294
+ for path in repo_path.rglob("*.yml"):
295
+ if not path.is_file():
296
+ continue
297
+ if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts):
298
+ continue
299
+ if not is_ansible_file(path, repo_path):
300
+ continue
301
+
302
+ rel_path = str(path.relative_to(repo_path))
303
+ content_hash = _file_hash(path)
304
+
305
+ if incremental and self._file_unchanged(rel_path, content_hash):
306
+ stats["skipped"] += 1
307
+ continue
308
+
309
+ if incremental:
310
+ self._clear_file_subgraph(rel_path)
311
+
312
+ if ansible_parser is None:
313
+ ansible_parser = AnsibleParser()
314
+ try:
315
+ file_stats = ansible_parser.parse_file(path, repo_path, self.store)
316
+ stats["files"] += 1
317
+ stats["functions"] += file_stats.get("functions", 0)
318
+ stats["classes"] += file_stats.get("classes", 0)
319
+ stats["edges"] += file_stats.get("edges", 0)
320
+ self._store_file_hash(rel_path, content_hash)
321
+ except Exception:
322
+ logger.exception("Failed to parse Ansible file %s", path)
323
+
324
+ # Also check .yaml extension
325
+ for path in repo_path.rglob("*.yaml"):
326
+ if not path.is_file():
327
+ continue
328
+ if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts):
329
+ continue
330
+ if not is_ansible_file(path, repo_path):
331
+ continue
332
+
333
+ rel_path = str(path.relative_to(repo_path))
334
+ content_hash = _file_hash(path)
335
+
336
+ if incremental and self._file_unchanged(rel_path, content_hash):
337
+ stats["skipped"] += 1
338
+ continue
339
+
340
+ if incremental:
341
+ self._clear_file_subgraph(rel_path)
342
+
343
+ if ansible_parser is None:
344
+ ansible_parser = AnsibleParser()
345
+ try:
346
+ file_stats = ansible_parser.parse_file(path, repo_path, self.store)
347
+ stats["files"] += 1
348
+ stats["functions"] += file_stats.get("functions", 0)
349
+ stats["classes"] += file_stats.get("classes", 0)
350
+ stats["edges"] += file_stats.get("edges", 0)
351
+ self._store_file_hash(rel_path, content_hash)
352
+ except Exception:
353
+ logger.exception("Failed to parse Ansible file %s", path)
354
+
271355
def _get_parser(self, language: str) -> "LanguageParser":
272356
if language not in self._parsers:
273357
if language == "python":
274358
from navegador.ingestion.python import PythonParser
275359
@@ -316,10 +400,22 @@
316400
self._parsers[language] = CParser()
317401
elif language == "cpp":
318402
from navegador.ingestion.cpp import CppParser
319403
320404
self._parsers[language] = CppParser()
405
+ elif language == "hcl":
406
+ from navegador.ingestion.hcl import HCLParser
407
+
408
+ self._parsers[language] = HCLParser()
409
+ elif language == "puppet":
410
+ from navegador.ingestion.puppet import PuppetParser
411
+
412
+ self._parsers[language] = PuppetParser()
413
+ elif language == "bash":
414
+ from navegador.ingestion.bash import BashParser
415
+
416
+ self._parsers[language] = BashParser()
321417
else:
322418
raise ValueError(f"Unsupported language: {language}")
323419
return self._parsers[language]
324420
325421
326422
327423
ADDED navegador/ingestion/puppet.py
--- navegador/ingestion/parser.py
+++ navegador/ingestion/parser.py
@@ -14,10 +14,16 @@
14 PHP .php
15 Ruby .rb
16 Swift .swift
17 C .c .h
18 C++ .cpp .hpp .cc .cxx
 
 
 
 
 
 
19 """
20
21 import hashlib
22 import logging
23 import time
@@ -49,10 +55,16 @@
49 ".h": "c",
50 ".cpp": "cpp",
51 ".hpp": "cpp",
52 ".cc": "cpp",
53 ".cxx": "cpp",
 
 
 
 
 
 
54 }
55
56
57 class RepoIngester:
58 """
@@ -154,10 +166,13 @@
154 # Remove the temporary redacted directory if one was created
155 if effective_root is not repo_path:
156 import shutil
157
158 shutil.rmtree(effective_root, ignore_errors=True)
 
 
 
159
160 logger.info(
161 "Ingested %s: %d files, %d functions, %d classes, %d skipped",
162 repo_path.name,
163 stats["files"],
@@ -266,10 +281,79 @@
266 for path in repo_path.rglob("*"):
267 if path.is_file() and path.suffix in LANGUAGE_MAP:
268 if not any(part in skip_dirs for part in path.parts):
269 yield path
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271 def _get_parser(self, language: str) -> "LanguageParser":
272 if language not in self._parsers:
273 if language == "python":
274 from navegador.ingestion.python import PythonParser
275
@@ -316,10 +400,22 @@
316 self._parsers[language] = CParser()
317 elif language == "cpp":
318 from navegador.ingestion.cpp import CppParser
319
320 self._parsers[language] = CppParser()
 
 
 
 
 
 
 
 
 
 
 
 
321 else:
322 raise ValueError(f"Unsupported language: {language}")
323 return self._parsers[language]
324
325
326
327 DDED navegador/ingestion/puppet.py
--- navegador/ingestion/parser.py
+++ navegador/ingestion/parser.py
@@ -14,10 +14,16 @@
14 PHP .php
15 Ruby .rb
16 Swift .swift
17 C .c .h
18 C++ .cpp .hpp .cc .cxx
19
20 Infrastructure-as-Code:
21 HCL .tf .hcl (Terraform / OpenTofu)
22 Puppet .pp
23 Bash .sh .bash .zsh
24 Ansible .yml .yaml (detected heuristically, not via extension)
25 """
26
27 import hashlib
28 import logging
29 import time
@@ -49,10 +55,16 @@
55 ".h": "c",
56 ".cpp": "cpp",
57 ".hpp": "cpp",
58 ".cc": "cpp",
59 ".cxx": "cpp",
60 ".tf": "hcl",
61 ".hcl": "hcl",
62 ".pp": "puppet",
63 ".sh": "bash",
64 ".bash": "bash",
65 ".zsh": "bash",
66 }
67
68
69 class RepoIngester:
70 """
@@ -154,10 +166,13 @@
166 # Remove the temporary redacted directory if one was created
167 if effective_root is not repo_path:
168 import shutil
169
170 shutil.rmtree(effective_root, ignore_errors=True)
171
172 # Ansible pass — heuristically detect and parse Ansible YAML files
173 self._ingest_ansible(repo_path, stats, incremental)
174
175 logger.info(
176 "Ingested %s: %d files, %d functions, %d classes, %d skipped",
177 repo_path.name,
178 stats["files"],
@@ -266,10 +281,79 @@
281 for path in repo_path.rglob("*"):
282 if path.is_file() and path.suffix in LANGUAGE_MAP:
283 if not any(part in skip_dirs for part in path.parts):
284 yield path
285
286 def _ingest_ansible(self, repo_path: Path, stats: dict[str, int], incremental: bool) -> None:
287 """Detect and parse Ansible YAML files (playbooks, roles, tasks)."""
288 from navegador.ingestion.ansible import AnsibleParser
289
290 is_ansible_file = AnsibleParser.is_ansible_file
291
292 ansible_parser: AnsibleParser | None = None
293
294 for path in repo_path.rglob("*.yml"):
295 if not path.is_file():
296 continue
297 if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts):
298 continue
299 if not is_ansible_file(path, repo_path):
300 continue
301
302 rel_path = str(path.relative_to(repo_path))
303 content_hash = _file_hash(path)
304
305 if incremental and self._file_unchanged(rel_path, content_hash):
306 stats["skipped"] += 1
307 continue
308
309 if incremental:
310 self._clear_file_subgraph(rel_path)
311
312 if ansible_parser is None:
313 ansible_parser = AnsibleParser()
314 try:
315 file_stats = ansible_parser.parse_file(path, repo_path, self.store)
316 stats["files"] += 1
317 stats["functions"] += file_stats.get("functions", 0)
318 stats["classes"] += file_stats.get("classes", 0)
319 stats["edges"] += file_stats.get("edges", 0)
320 self._store_file_hash(rel_path, content_hash)
321 except Exception:
322 logger.exception("Failed to parse Ansible file %s", path)
323
324 # Also check .yaml extension
325 for path in repo_path.rglob("*.yaml"):
326 if not path.is_file():
327 continue
328 if any(part in (".git", ".venv", "venv", "node_modules") for part in path.parts):
329 continue
330 if not is_ansible_file(path, repo_path):
331 continue
332
333 rel_path = str(path.relative_to(repo_path))
334 content_hash = _file_hash(path)
335
336 if incremental and self._file_unchanged(rel_path, content_hash):
337 stats["skipped"] += 1
338 continue
339
340 if incremental:
341 self._clear_file_subgraph(rel_path)
342
343 if ansible_parser is None:
344 ansible_parser = AnsibleParser()
345 try:
346 file_stats = ansible_parser.parse_file(path, repo_path, self.store)
347 stats["files"] += 1
348 stats["functions"] += file_stats.get("functions", 0)
349 stats["classes"] += file_stats.get("classes", 0)
350 stats["edges"] += file_stats.get("edges", 0)
351 self._store_file_hash(rel_path, content_hash)
352 except Exception:
353 logger.exception("Failed to parse Ansible file %s", path)
354
355 def _get_parser(self, language: str) -> "LanguageParser":
356 if language not in self._parsers:
357 if language == "python":
358 from navegador.ingestion.python import PythonParser
359
@@ -316,10 +400,22 @@
400 self._parsers[language] = CParser()
401 elif language == "cpp":
402 from navegador.ingestion.cpp import CppParser
403
404 self._parsers[language] = CppParser()
405 elif language == "hcl":
406 from navegador.ingestion.hcl import HCLParser
407
408 self._parsers[language] = HCLParser()
409 elif language == "puppet":
410 from navegador.ingestion.puppet import PuppetParser
411
412 self._parsers[language] = PuppetParser()
413 elif language == "bash":
414 from navegador.ingestion.bash import BashParser
415
416 self._parsers[language] = BashParser()
417 else:
418 raise ValueError(f"Unsupported language: {language}")
419 return self._parsers[language]
420
421
422
423 DDED navegador/ingestion/puppet.py
--- a/navegador/ingestion/puppet.py
+++ b/navegador/ingestion/puppet.py
@@ -0,0 +1,339 @@
1
+"""
2
+Puppet manifest parser — extracts classes, defined types, node definitions,
3
+resource declarations, includes, and parameters from .pp files using tree-sitter.
4
+"""
5
+
6
+import logging
7
+from pathlib import Path
8
+
9
+from navegador.graph.schema import EdgeType, NodeLabel
10
+from navegador.graph.store import GraphStore
11
+from navegador.ingestion.parser import LanguageParser
12
+
13
+logger = logging.getLogger(__name__)
14
+
15
+
16
+def _get_puppet_language():
17
+ try:
18
+ import tree_sitter_puppet as tspuppet # type: ignore[import]
19
+ from tree_sitter import Language
20
+
21
+ return Language(tspuppet.language())
22
+ except ImportError as e:
23
+ raise ImportError("Install tree-sitter-puppet: pip install tree-sitter-puppet") from e
24
+
25
+
26
+def _node_text(node, source: bytes) -> str:
27
+ return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
28
+
29
+
30
+def _class_identifier_text(node, source: bytes) -> str:
31
+ """Join identifier children of a class_identifier with '::'."""
32
+ parts = [_node_text(child, source) for child in node.children if child.type == "identifier"]
33
+ return "::".join(parts) if parts else _node_text(node, source)
34
+
35
+
36
+class PuppetParser(LanguageParser):
37
+ """Parses Puppet manifest files into the navegador graph."""
38
+
39
+ def __init__(self) -> None:
40
+ from tree_sitter import Parser # type: ignore[import]
41
+
42
+ self._parser = Parser(_get_puppet_language())
43
+
44
+ def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
45
+ source = path.read_bytes()
46
+ tree = self._parser.parse(source)
47
+ rel_path = str(path.relative_to(repo_root))
48
+
49
+ store.create_node(
50
+ NodeLabel.File,
51
+ {
52
+ "name": path.name,
53
+ "path": rel_path,
54
+ "language": "puppet",
55
+ "line_count": source.count(b"\n"),
56
+ },
57
+ )
58
+
59
+ stats = {"functions": 0, "classes": 0, "edges": 0}
60
+ self._walk(tree.root_node, source, rel_path, store, stats)
61
+ return stats
62
+
63
+ # ── AST walker ────────────────────────────────────────────────────────────
64
+
65
+ def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None:
66
+ if node.type == "class_definition":
67
+ self._handle_class(node, source, file_path, store, stats)
68
+ return
69
+ if node.type == "defined_resource_type":
70
+ self._handle_defined_type(node, source, file_path, store, stats)
71
+ return
72
+ if node.type == "node_definition":
73
+ self._handle_node(node, source, file_path, store, stats)
74
+ return
75
+ if node.type == "include_statement":
76
+ self._handle_include(node, source, file_path, store, stats)
77
+ return
78
+ for child in node.children:
79
+ self._walk(child, source, file_path, store, stats)
80
+
81
+ # ── Handlers ──────────────────────────────────────────────────────────────
82
+
83
+ def _handle_class(
84
+ self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
85
+ ) -> None:
86
+ name = self._extract_class_identifier(node, source)
87
+ if not name:
88
+ return
89
+
90
+ store.create_node(
91
+ NodeLabel.Class,
92
+ {
93
+ "name": name,
94
+ "file_path": file_path,
95
+ "line_start": node.start_point[0] + 1,
96
+ "line_end": node.end_point[0] + 1,
97
+ "docstring": "",
98
+ "semantic_type": "puppet_class",
99
+ },
100
+ )
101
+ store.create_edge(
102
+ NodeLabel.File,
103
+ {"path": file_path},
104
+ EdgeType.CONTAINS,
105
+ NodeLabel.Class,
106
+ {"name": name, "file_path": file_path},
107
+ )
108
+ stats["classes"] += 1
109
+ stats["edges"] += 1
110
+
111
+ self._extract_parameters(node, source, file_path, name, store, stats)
112
+ self._extract_resources(node, source, file_path, name, store, stats)
113
+
114
+ def _handle_defined_type(
115
+ self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
116
+ ) -> None:
117
+ name = self._extract_class_identifier(node, source)
118
+ if not name:
119
+ return
120
+
121
+ store.create_node(
122
+ NodeLabel.Class,
123
+ {
124
+ "name": name,
125
+ "file_path": file_path,
126
+ "line_start": node.start_point[0] + 1,
127
+ "line_end": node.end_point[0] + 1,
128
+ "docstring": "",
129
+ "semantic_type": "puppet_defined_type",
130
+ },
131
+ )
132
+ store.create_edge(
133
+ NodeLabel.File,
134
+ {"path": file_path},
135
+ EdgeType.CONTAINS,
136
+ NodeLabel.Class,
137
+ {"name": name, "file_path": file_path},
138
+ )
139
+ stats["classes"] += 1
140
+ stats["edges"] += 1
141
+
142
+ self._extract_parameters(node, source, file_path, name, store, stats)
143
+ self._extract_resources(node, source, file_path, name, store, stats)
144
+
145
+ def _handle_node(
146
+ self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
147
+ ) -> None:
148
+ name = self._extract_node_name(node, source)
149
+ if not name:
150
+ return
151
+
152
+ store.create_node(
153
+ NodeLabel.Class,
154
+ {
155
+ "name": name,
156
+ "file_path": file_path,
157
+ "line_start": node.start_point[0] + 1,
158
+ "line_end": node.end_point[0] + 1,
159
+ "docstring": "",
160
+ "semantic_type": "puppet_node",
161
+ },
162
+ )
163
+ store.create_edge(
164
+ NodeLabel.File,
165
+ {"path": file_path},
166
+ EdgeType.CONTAINS,
167
+ NodeLabel.Class,
168
+ {"name": name, "file_path": file_path},
169
+ )
170
+ stats["classes"] += 1
171
+ stats["edges"] += 1
172
+
173
+ self._extract_resources(node, source, file_path, name, store, stats)
174
+
175
+ def _handle_include(
176
+ self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
177
+ ) -> None:
178
+ ident_node = None
179
+ for child in node.children:
180
+ if child.type == "class_identifier":
181
+ ident_node = child
182
+ break
183
+ if not ident_node:
184
+ return
185
+
186
+ module = _class_identifier_text(ident_node, source)
187
+ store.create_node(
188
+ NodeLabel.Import,
189
+ {
190
+ "name": module,
191
+ "file_path": file_path,
192
+ "line_start": node.start_point[0] + 1,
193
+ "module": module,
194
+ "semantic_type": "puppet_include",
195
+ },
196
+ )
197
+ store.create_edge(
198
+ NodeLabel.File,
199
+ {"path": file_path},
200
+ EdgeType.IMPORTS,
201
+ NodeLabel.Import,
202
+ {"name": module, "file_path": file_path},
203
+ )
204
+ stats["edges"] += 1
205
+
206
+ # ── Extractors ────────────────────────────────────────────────────────────
207
+
208
+ def _extract_class_identifier(self, node, source: bytes) -> str | None:
209
+ """Find and return the class_identifier text from a class/define node."""
210
+ for child in node.children:
211
+ if child.type == "class_identifier":
212
+ return _class_identifier_text(child, source)
213
+ return None
214
+
215
+ def _extract_node_name(self, node, source: bytes) -> str | None:
216
+ """Extract the node name from a node_definition (string child of node_name)."""
217
+ for child in node.children:
218
+ if child.type == "node_name":
219
+ for grandchild in child.children:
220
+ if grandchild.type == "string":
221
+ return _node_text(grandchild, source).strip("'\"")
222
+ return _node_text(child, source).strip("'\"")
223
+ return None
224
+
225
+ def _extract_parameters(
226
+ self,
227
+ node,
228
+ source: bytes,
229
+ file_path: str,
230
+ class_name: str,
231
+ store: GraphStore,
232
+ stats: dict,
233
+ ) -> None:
234
+ """Extract parameters from a parameter_list inside a class/define."""
235
+ for child in node.children:
236
+ if child.type != "parameter_list":
237
+ continue
238
+ for param in child.children:
239
+ if param.type != "parameter":
240
+ continue
241
+ var_node = None
242
+ for pc in param.children:
243
+ if pc.type == "variable":
244
+ var_node = pc
245
+ break
246
+ if not var_node:
247
+ continue
248
+ var_name = _node_text(var_node, source).lstrip("$")
249
+ store.create_node(
250
+ NodeLabel.Variable,
251
+ {
252
+ "name": var_name,
253
+ "file_path": file_path,
254
+ "line_start": param.start_point[0] + 1,
255
+ "semantic_type": "puppet_parameter",
256
+ },
257
+ )
258
+ store.create_edge(
259
+ NodeLabel.Class,
260
+ {"name": class_name, "file_path": file_path},
261
+ EdgeType.CONTAINS,
262
+ NodeLabel.Variable,
263
+ {"name": var_name, "file_path": file_path},
264
+ )
265
+ stats["edges"] += 1
266
+
267
+ def _extract_resources(
268
+ self,
269
+ node,
270
+ source: bytes,
271
+ file_path: str,
272
+ class_name: str,
273
+ store: GraphStore,
274
+ stats: dict,
275
+ ) -> None:
276
+ """Walk the block of a class/define/node to find resource declarations."""
277
+ for child in node.children:
278
+ if child.type == "block":
279
+ self._walk_block_for_resources(child, source, file_path, class_name, store, stats)
280
+ break
281
+
282
+ def _walk_block_for_resources(
283
+ self,
284
+ node,
285
+ source: bytes,
286
+ file_path: str,
287
+ class_name: str,
288
+ store: GraphStore,
289
+ stats: dict,
290
+ ) -> None:
291
+ """Recursively find resource_declaration nodes inside a block."""
292
+ if node.type == "resource_declaration":
293
+ self._handle_resource(node, source, file_path, class_name, store, stats)
294
+ return
295
+ for child in node.children:
296
+ self._walk_block_for_resources(child, source, file_path, class_name, store, stats)
297
+
298
+ def _handle_resource(
299
+ self,
300
+ node,
301
+ source: bytes,
302
+ file_path: str,
303
+ class_name: str,
304
+ store: GraphStore,
305
+ stats: dict,
306
+ ) -> None:
307
+ """Handle a resource_declaration: first identifier = type, first string = title."""
308
+ res_type = None
309
+ res_title = None
310
+ for child in node.children:
311
+ if child.type == "identifier" and res_type is None:
312
+ res_type = _node_text(child, source)
313
+ if child.type == "string" and res_title is None:
314
+ res_title = _node_text(child, source).strip("'\"")
315
+ if not res_type:
316
+ return
317
+
318
+ name = f"{res_type}[{res_title}]" if res_title else res_type
319
+ store.create_node(
320
+ NodeLabel.Function,
321
+ {
322
+ "name": name,
323
+ "file_path": file_path,
324
+ "line_start": node.start_point[0] + 1,
325
+ "line_end": node.end_point[0] + 1,
326
+ "docstring": "",
327
+ "class_name": class_name,
328
+ "semantic_type": "puppet_resource",
329
+ },
330
+ )
331
+ store.create_edge(
332
+ NodeLabel.Class,
333
+ {"name": class_name, "file_path": file_path},
334
+ EdgeType.CONTAINS,
335
+ NodeLabel.Function,
336
+ {"name": name, "file_path": file_path},
337
+ )
338
+ stats["functions"] += 1
339
+ stats["edges"] += 1
--- a/navegador/ingestion/puppet.py
+++ b/navegador/ingestion/puppet.py
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/navegador/ingestion/puppet.py
+++ b/navegador/ingestion/puppet.py
@@ -0,0 +1,339 @@
1 """
2 Puppet manifest parser — extracts classes, defined types, node definitions,
3 resource declarations, includes, and parameters from .pp files using tree-sitter.
4 """
5
6 import logging
7 from pathlib import Path
8
9 from navegador.graph.schema import EdgeType, NodeLabel
10 from navegador.graph.store import GraphStore
11 from navegador.ingestion.parser import LanguageParser
12
13 logger = logging.getLogger(__name__)
14
15
16 def _get_puppet_language():
17 try:
18 import tree_sitter_puppet as tspuppet # type: ignore[import]
19 from tree_sitter import Language
20
21 return Language(tspuppet.language())
22 except ImportError as e:
23 raise ImportError("Install tree-sitter-puppet: pip install tree-sitter-puppet") from e
24
25
26 def _node_text(node, source: bytes) -> str:
27 return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
28
29
30 def _class_identifier_text(node, source: bytes) -> str:
31 """Join identifier children of a class_identifier with '::'."""
32 parts = [_node_text(child, source) for child in node.children if child.type == "identifier"]
33 return "::".join(parts) if parts else _node_text(node, source)
34
35
36 class PuppetParser(LanguageParser):
37 """Parses Puppet manifest files into the navegador graph."""
38
39 def __init__(self) -> None:
40 from tree_sitter import Parser # type: ignore[import]
41
42 self._parser = Parser(_get_puppet_language())
43
44 def parse_file(self, path: Path, repo_root: Path, store: GraphStore) -> dict[str, int]:
45 source = path.read_bytes()
46 tree = self._parser.parse(source)
47 rel_path = str(path.relative_to(repo_root))
48
49 store.create_node(
50 NodeLabel.File,
51 {
52 "name": path.name,
53 "path": rel_path,
54 "language": "puppet",
55 "line_count": source.count(b"\n"),
56 },
57 )
58
59 stats = {"functions": 0, "classes": 0, "edges": 0}
60 self._walk(tree.root_node, source, rel_path, store, stats)
61 return stats
62
63 # ── AST walker ────────────────────────────────────────────────────────────
64
65 def _walk(self, node, source: bytes, file_path: str, store: GraphStore, stats: dict) -> None:
66 if node.type == "class_definition":
67 self._handle_class(node, source, file_path, store, stats)
68 return
69 if node.type == "defined_resource_type":
70 self._handle_defined_type(node, source, file_path, store, stats)
71 return
72 if node.type == "node_definition":
73 self._handle_node(node, source, file_path, store, stats)
74 return
75 if node.type == "include_statement":
76 self._handle_include(node, source, file_path, store, stats)
77 return
78 for child in node.children:
79 self._walk(child, source, file_path, store, stats)
80
81 # ── Handlers ──────────────────────────────────────────────────────────────
82
83 def _handle_class(
84 self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
85 ) -> None:
86 name = self._extract_class_identifier(node, source)
87 if not name:
88 return
89
90 store.create_node(
91 NodeLabel.Class,
92 {
93 "name": name,
94 "file_path": file_path,
95 "line_start": node.start_point[0] + 1,
96 "line_end": node.end_point[0] + 1,
97 "docstring": "",
98 "semantic_type": "puppet_class",
99 },
100 )
101 store.create_edge(
102 NodeLabel.File,
103 {"path": file_path},
104 EdgeType.CONTAINS,
105 NodeLabel.Class,
106 {"name": name, "file_path": file_path},
107 )
108 stats["classes"] += 1
109 stats["edges"] += 1
110
111 self._extract_parameters(node, source, file_path, name, store, stats)
112 self._extract_resources(node, source, file_path, name, store, stats)
113
114 def _handle_defined_type(
115 self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
116 ) -> None:
117 name = self._extract_class_identifier(node, source)
118 if not name:
119 return
120
121 store.create_node(
122 NodeLabel.Class,
123 {
124 "name": name,
125 "file_path": file_path,
126 "line_start": node.start_point[0] + 1,
127 "line_end": node.end_point[0] + 1,
128 "docstring": "",
129 "semantic_type": "puppet_defined_type",
130 },
131 )
132 store.create_edge(
133 NodeLabel.File,
134 {"path": file_path},
135 EdgeType.CONTAINS,
136 NodeLabel.Class,
137 {"name": name, "file_path": file_path},
138 )
139 stats["classes"] += 1
140 stats["edges"] += 1
141
142 self._extract_parameters(node, source, file_path, name, store, stats)
143 self._extract_resources(node, source, file_path, name, store, stats)
144
145 def _handle_node(
146 self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
147 ) -> None:
148 name = self._extract_node_name(node, source)
149 if not name:
150 return
151
152 store.create_node(
153 NodeLabel.Class,
154 {
155 "name": name,
156 "file_path": file_path,
157 "line_start": node.start_point[0] + 1,
158 "line_end": node.end_point[0] + 1,
159 "docstring": "",
160 "semantic_type": "puppet_node",
161 },
162 )
163 store.create_edge(
164 NodeLabel.File,
165 {"path": file_path},
166 EdgeType.CONTAINS,
167 NodeLabel.Class,
168 {"name": name, "file_path": file_path},
169 )
170 stats["classes"] += 1
171 stats["edges"] += 1
172
173 self._extract_resources(node, source, file_path, name, store, stats)
174
175 def _handle_include(
176 self, node, source: bytes, file_path: str, store: GraphStore, stats: dict
177 ) -> None:
178 ident_node = None
179 for child in node.children:
180 if child.type == "class_identifier":
181 ident_node = child
182 break
183 if not ident_node:
184 return
185
186 module = _class_identifier_text(ident_node, source)
187 store.create_node(
188 NodeLabel.Import,
189 {
190 "name": module,
191 "file_path": file_path,
192 "line_start": node.start_point[0] + 1,
193 "module": module,
194 "semantic_type": "puppet_include",
195 },
196 )
197 store.create_edge(
198 NodeLabel.File,
199 {"path": file_path},
200 EdgeType.IMPORTS,
201 NodeLabel.Import,
202 {"name": module, "file_path": file_path},
203 )
204 stats["edges"] += 1
205
206 # ── Extractors ────────────────────────────────────────────────────────────
207
208 def _extract_class_identifier(self, node, source: bytes) -> str | None:
209 """Find and return the class_identifier text from a class/define node."""
210 for child in node.children:
211 if child.type == "class_identifier":
212 return _class_identifier_text(child, source)
213 return None
214
215 def _extract_node_name(self, node, source: bytes) -> str | None:
216 """Extract the node name from a node_definition (string child of node_name)."""
217 for child in node.children:
218 if child.type == "node_name":
219 for grandchild in child.children:
220 if grandchild.type == "string":
221 return _node_text(grandchild, source).strip("'\"")
222 return _node_text(child, source).strip("'\"")
223 return None
224
225 def _extract_parameters(
226 self,
227 node,
228 source: bytes,
229 file_path: str,
230 class_name: str,
231 store: GraphStore,
232 stats: dict,
233 ) -> None:
234 """Extract parameters from a parameter_list inside a class/define."""
235 for child in node.children:
236 if child.type != "parameter_list":
237 continue
238 for param in child.children:
239 if param.type != "parameter":
240 continue
241 var_node = None
242 for pc in param.children:
243 if pc.type == "variable":
244 var_node = pc
245 break
246 if not var_node:
247 continue
248 var_name = _node_text(var_node, source).lstrip("$")
249 store.create_node(
250 NodeLabel.Variable,
251 {
252 "name": var_name,
253 "file_path": file_path,
254 "line_start": param.start_point[0] + 1,
255 "semantic_type": "puppet_parameter",
256 },
257 )
258 store.create_edge(
259 NodeLabel.Class,
260 {"name": class_name, "file_path": file_path},
261 EdgeType.CONTAINS,
262 NodeLabel.Variable,
263 {"name": var_name, "file_path": file_path},
264 )
265 stats["edges"] += 1
266
267 def _extract_resources(
268 self,
269 node,
270 source: bytes,
271 file_path: str,
272 class_name: str,
273 store: GraphStore,
274 stats: dict,
275 ) -> None:
276 """Walk the block of a class/define/node to find resource declarations."""
277 for child in node.children:
278 if child.type == "block":
279 self._walk_block_for_resources(child, source, file_path, class_name, store, stats)
280 break
281
282 def _walk_block_for_resources(
283 self,
284 node,
285 source: bytes,
286 file_path: str,
287 class_name: str,
288 store: GraphStore,
289 stats: dict,
290 ) -> None:
291 """Recursively find resource_declaration nodes inside a block."""
292 if node.type == "resource_declaration":
293 self._handle_resource(node, source, file_path, class_name, store, stats)
294 return
295 for child in node.children:
296 self._walk_block_for_resources(child, source, file_path, class_name, store, stats)
297
298 def _handle_resource(
299 self,
300 node,
301 source: bytes,
302 file_path: str,
303 class_name: str,
304 store: GraphStore,
305 stats: dict,
306 ) -> None:
307 """Handle a resource_declaration: first identifier = type, first string = title."""
308 res_type = None
309 res_title = None
310 for child in node.children:
311 if child.type == "identifier" and res_type is None:
312 res_type = _node_text(child, source)
313 if child.type == "string" and res_title is None:
314 res_title = _node_text(child, source).strip("'\"")
315 if not res_type:
316 return
317
318 name = f"{res_type}[{res_title}]" if res_title else res_type
319 store.create_node(
320 NodeLabel.Function,
321 {
322 "name": name,
323 "file_path": file_path,
324 "line_start": node.start_point[0] + 1,
325 "line_end": node.end_point[0] + 1,
326 "docstring": "",
327 "class_name": class_name,
328 "semantic_type": "puppet_resource",
329 },
330 )
331 store.create_edge(
332 NodeLabel.Class,
333 {"name": class_name, "file_path": file_path},
334 EdgeType.CONTAINS,
335 NodeLabel.Function,
336 {"name": name, "file_path": file_path},
337 )
338 stats["functions"] += 1
339 stats["edges"] += 1
+9 -2
--- pyproject.toml
+++ pyproject.toml
@@ -2,19 +2,19 @@
22
requires = ["setuptools>=69.0", "wheel"]
33
build-backend = "setuptools.build_meta"
44
55
[project]
66
name = "navegador"
7
-version = "0.7.4"
7
+version = "0.8.0"
88
description = "AST + knowledge graph context engine for AI coding agents"
99
readme = "README.md"
1010
license = "MIT"
1111
requires-python = ">=3.12"
1212
authors = [
1313
{ name = "CONFLICT LLC" },
1414
]
15
-keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp"]
15
+keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp", "terraform", "hcl", "puppet", "ansible", "chef", "bash", "iac"]
1616
classifiers = [
1717
"Development Status :: 3 - Alpha",
1818
"Intended Audience :: Developers",
1919
"Operating System :: OS Independent",
2020
"Programming Language :: Python :: 3",
@@ -61,10 +61,16 @@
6161
"tree-sitter-ruby>=0.23.0",
6262
"tree-sitter-swift>=0.23.0",
6363
"tree-sitter-c>=0.23.0",
6464
"tree-sitter-cpp>=0.23.0",
6565
]
66
+iac = [
67
+ # Infrastructure-as-Code tree-sitter grammars
68
+ "tree-sitter-hcl>=1.2.0",
69
+ "tree-sitter-puppet>=1.3.0",
70
+ "tree-sitter-bash>=0.25.0",
71
+]
6672
llm = [
6773
# LLM provider SDKs (install the ones you use)
6874
"anthropic>=0.39.0",
6975
"openai>=1.0.0",
7076
]
@@ -81,10 +87,11 @@
8187
"pymdown-extensions>=10.0",
8288
]
8389
all = [
8490
"navegador[redis]",
8591
"navegador[languages]",
92
+ "navegador[iac]",
8693
"navegador[llm]",
8794
"navegador[dev]",
8895
"navegador[docs]",
8996
]
9097
9198
9299
ADDED tests/test_ansible_parser.py
93100
ADDED tests/test_bash_parser.py
94101
ADDED tests/test_chef_enricher.py
95102
ADDED tests/test_hcl_parser.py
96103
ADDED tests/test_puppet_parser.py
--- pyproject.toml
+++ pyproject.toml
@@ -2,19 +2,19 @@
2 requires = ["setuptools>=69.0", "wheel"]
3 build-backend = "setuptools.build_meta"
4
5 [project]
6 name = "navegador"
7 version = "0.7.4"
8 description = "AST + knowledge graph context engine for AI coding agents"
9 readme = "README.md"
10 license = "MIT"
11 requires-python = ">=3.12"
12 authors = [
13 { name = "CONFLICT LLC" },
14 ]
15 keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp"]
16 classifiers = [
17 "Development Status :: 3 - Alpha",
18 "Intended Audience :: Developers",
19 "Operating System :: OS Independent",
20 "Programming Language :: Python :: 3",
@@ -61,10 +61,16 @@
61 "tree-sitter-ruby>=0.23.0",
62 "tree-sitter-swift>=0.23.0",
63 "tree-sitter-c>=0.23.0",
64 "tree-sitter-cpp>=0.23.0",
65 ]
 
 
 
 
 
 
66 llm = [
67 # LLM provider SDKs (install the ones you use)
68 "anthropic>=0.39.0",
69 "openai>=1.0.0",
70 ]
@@ -81,10 +87,11 @@
81 "pymdown-extensions>=10.0",
82 ]
83 all = [
84 "navegador[redis]",
85 "navegador[languages]",
 
86 "navegador[llm]",
87 "navegador[dev]",
88 "navegador[docs]",
89 ]
90
91
92 DDED tests/test_ansible_parser.py
93 DDED tests/test_bash_parser.py
94 DDED tests/test_chef_enricher.py
95 DDED tests/test_hcl_parser.py
96 DDED tests/test_puppet_parser.py
--- pyproject.toml
+++ pyproject.toml
@@ -2,19 +2,19 @@
2 requires = ["setuptools>=69.0", "wheel"]
3 build-backend = "setuptools.build_meta"
4
5 [project]
6 name = "navegador"
7 version = "0.8.0"
8 description = "AST + knowledge graph context engine for AI coding agents"
9 readme = "README.md"
10 license = "MIT"
11 requires-python = ">=3.12"
12 authors = [
13 { name = "CONFLICT LLC" },
14 ]
15 keywords = ["ast", "knowledge-graph", "code-analysis", "ai-agents", "mcp", "context-management", "falkordb", "go", "rust", "java", "typescript", "kotlin", "csharp", "php", "ruby", "swift", "c", "cpp", "terraform", "hcl", "puppet", "ansible", "chef", "bash", "iac"]
16 classifiers = [
17 "Development Status :: 3 - Alpha",
18 "Intended Audience :: Developers",
19 "Operating System :: OS Independent",
20 "Programming Language :: Python :: 3",
@@ -61,10 +61,16 @@
61 "tree-sitter-ruby>=0.23.0",
62 "tree-sitter-swift>=0.23.0",
63 "tree-sitter-c>=0.23.0",
64 "tree-sitter-cpp>=0.23.0",
65 ]
66 iac = [
67 # Infrastructure-as-Code tree-sitter grammars
68 "tree-sitter-hcl>=1.2.0",
69 "tree-sitter-puppet>=1.3.0",
70 "tree-sitter-bash>=0.25.0",
71 ]
72 llm = [
73 # LLM provider SDKs (install the ones you use)
74 "anthropic>=0.39.0",
75 "openai>=1.0.0",
76 ]
@@ -81,10 +87,11 @@
87 "pymdown-extensions>=10.0",
88 ]
89 all = [
90 "navegador[redis]",
91 "navegador[languages]",
92 "navegador[iac]",
93 "navegador[llm]",
94 "navegador[dev]",
95 "navegador[docs]",
96 ]
97
98
99 DDED tests/test_ansible_parser.py
100 DDED tests/test_bash_parser.py
101 DDED tests/test_chef_enricher.py
102 DDED tests/test_hcl_parser.py
103 DDED tests/test_puppet_parser.py
--- a/tests/test_ansible_parser.py
+++ b/tests/test_ansible_parser.py
@@ -0,0 +1,234 @@
1
+"""Tests for navegador.ingestion.ansible — AnsibleParser."""
2
+
3
+import tempfile
4
+from pathlib import Path
5
+from unittest.mock import MagicMock
6
+
7
+from navegador.graph.schema
8
+from navegador.ingestion.ansible import AnsibleParserAnsibleParser # noqa: E402
9
+
10
+
11
+def _make_store():
12
+ store = MagicMock()
13
+ store.query.return_value = MagicMock(result_set=[])
14
+ return store
15
+
16
+
17
+class TestIsAnsibleFile:
18
+ """Tests for AnsibleParser.is_ansible_file() path detection."""
19
+
20
+ def test_role_tasks_detected(self):
21
+ with tempfile.TemporaryDirectory() as tmp:
22
+ p = Path(tmp) / "roles" / "webserver" / "tasks" / "main.yml"
23
+ p.parent.mkdir(parents=True)
24
+ p.write_text("---\n- name: test\n debug:\n")
25
+ assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
26
+
27
+ def test_role_handlers_detected(self):
28
+ with tempfile.TemporaryDirectory() as tmp:
29
+ p = Path(tmp) / "roles" / "webserver" / "handlers" / "main.yml"
30
+ p.parent.mkdir(parents=True)
31
+ p.write_text("---\n- name: restart nginx\n service:\n")
32
+ assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
33
+
34
+ def test_playbooks_dir_detected(self):
35
+ with tempfile.TemporaryDirectory() as tmp:
36
+ p = Path(tmp) / "playbooks" / "deploy.yml"
37
+ p.parent.mkdir(parents=True)
38
+ p.write_text("---\n- hosts: all\n tasks: []\n")
39
+ assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
40
+
41
+ def test_group_vars_detected(self):
42
+ with tempfile.TemporaryDirectory() as tmp:
43
+ p = Path(tmp) / "group_vars" / "all.yml"
44
+ p.parent.mkdir(parents=True)
45
+ p.write_text("---\nhttp_port: 80\n")
46
+ assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
47
+
48
+ def test_random_yaml_not_detected(self):
49
+ with tempfile.TemporaryDirectory() as tmp:
50
+ p = Path(tmp) / "random" / "config.yml"
51
+ p.parent.mkdir(parents=True)
52
+ p.write_text("---\nkey: value\n")
53
+ assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False
54
+
55
+ def test_non_yaml_not_detected(self):
56
+ with tempfile.TemporaryDirectory() as tmp:
57
+ p = Path(tmp) / "some_file.py"
58
+ p.write_text("print('hello')\n")
59
+ assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False
60
+
61
+
62
+class TestParsePlaybook:
63
+ """Tests for parse_file() with a full playbook (list with hosts)."""
64
+
65
+ def test_creates_module_class_and_function_nodes(self):
66
+ store = _make_store()
67
+ parser = AnsibleParser()
68
+ with tempfile.TemporaryDirectory() as tmp:
69
+ tmp_path = Path(tmp)
70
+ playbook = tmp_path / "playbooks" / "deploy.yml"
71
+ playbook.parent.mkdir(parents=True)
72
+ playbook.write_text(
73
+ "---\n"
74
+ "- name: Deploy web app\n"
75
+ " hosts: webservers\n"
76
+ " tasks:\n"
77
+ " - name: Install nginx\n"
78
+ " apt:\n"
79
+ " name: nginx\n"
80
+ " state: present\n"
81
+ " - name: Start nginx\n"
82
+ " service:\n"
83
+ " name: nginx\n"
84
+ " state: started\n"
85
+ )
86
+ stats = parser.parse_file(playbook, tmp_path, store)
87
+
88
+ assert stats["functions"] >= 2
89
+ assert stats["classes"] >= 1
90
+
91
+ # Verify Module node created for playbook
92
+ create_calls = store.create_node.call_args_list
93
+ labels = [c[0][0] for c in create_calls]
94
+ assert NodeLabel.Module in labels
95
+ assert NodeLabel.Class in labels
96
+ assert NodeLabel.Function in labels
97
+
98
+ def test_edges_created_for_containment(self):
99
+ store = _make_store()
100
+ parser = AnsibleParser()
101
+ with tempfile.TemporaryDirectory() as tmp:
102
+ tmp_path = Path(tmp)
103
+ playbook = tmp_path / "playbooks" / "site.yml"
104
+ playbook.parent.mkdir(parents=True)
105
+ playbook.write_text(
106
+ "---\n- name: Main play\n hosts: all\n tasks:\n - name: Ping\n ping:\n"
107
+ )
108
+ stats = parser.parse_file(playbook, tmp_path, store)
109
+
110
+ assert stats["edges"] >= 3 # File->Module, Module->Class, Class->Func
111
+
112
+
113
+class TestParseTaskFile:
114
+ """Tests for parse_file() with a standalone task file."""
115
+
116
+ def test_task_file_creates_class_and_functions(self):
117
+ store = _make_store()
118
+ parser = AnsibleParser()
119
+ with tempfile.TemporaryDirectory() as tmp:
120
+ tmp_path = Path(tmp)
121
+ task_file = tmp_path / "roles" / "web" / "tasks" / "main.yml"
122
+ task_file.parent.mkdir(parents=True)
123
+ task_file.write_text(
124
+ "---\n"
125
+ "- name: Install packages\n"
126
+ " apt:\n"
127
+ " name: curl\n"
128
+ "- name: Copy config\n"
129
+ " copy:\n"
130
+ " src: app.conf\n"
131
+ " dest: /etc/app.conf\n"
132
+ )
133
+ stats = parser.parse_file(task_file, tmp_path, store)
134
+
135
+ assert stats["classes"] == 1 # synthetic parent
136
+ assert stats["functions"] == 2
137
+
138
+
139
+class TestParseVariableFile:
140
+ """Tests for parse_file() with a variable file."""
141
+
142
+ def test_variable_file_creates_variables(self):
143
+ store = _make_store()
144
+ parser = AnsibleParser()
145
+ with tempfile.TemporaryDirectory() as tmp:
146
+ tmp_path = Path(tmp)
147
+ var_file = tmp_path / "roles" / "web" / "defaults" / "main.yml"
148
+ var_file.parent.mkdir(parents=True)
149
+ var_file.write_text("---\nhttp_port: 80\nmax_clients: 200\napp_env: production\n")
150
+ stats = parser.parse_file(var_file, tmp_path, store)
151
+
152
+ # Each variable creates a CONTAINS edge
153
+ assert stats["edges"] >= 3
154
+ create_calls = store.create_node.call_args_list
155
+ labels = [c[0][0] for c in create_calls]
156
+ assert labels.count(NodeLabel.Variable) == 3
157
+
158
+
159
+class TestHandlerAndNotify:
160
+ """Tests for handler detection and CALLS edges from notify."""
161
+
162
+ def test_notify_creates_calls_edge(self):
163
+ store = _make_store()
164
+ parser = AnsibleParser()
165
+ with tempfile.TemporaryDirectory() as tmp:
166
+ tmp_path = Path(tmp)
167
+ playbook = tmp_path / "playbooks" / "handlers.yml"
168
+ playbook.parent.mkdir(parents=True)
169
+ playbook.write_text(
170
+ "---\n"
171
+ "- name: Handler play\n"
172
+ " hosts: all\n"
173
+ " tasks:\n"
174
+ " - name: Update config\n"
175
+ " copy:\n"
176
+ " src: app.conf\n"
177
+ " dest: /etc/app.conf\n"
178
+ " notify: Restart app\n"
179
+ " handlers:\n"
180
+ " - name: Restart app\n"
181
+ " service:\n"
182
+ " name: app\n"
183
+ " state: restarted\n"
184
+ )
185
+ parser.parse_file(playbook, tmp_path, store)
186
+
187
+ # Should have a CALLS edge from task to handler
188
+ edge_calls = store.create_edge.call_args_list
189
+ calls_edges = [c for c in edge_calls if c[0][2] == EdgeType.CALLS]
190
+ assert len(calls_edges) >= 1
191
+ # The CALLS edge target should be the handler name
192
+ target_props = calls_edges[0][0][4]
193
+ assert target_props["name"] == "Restart app"
194
+
195
+ def test_handler_file_creates_handler_functions(self):
196
+ store = _make_store()
197
+ parser = AnsibleParser()
198
+ with tempfile.TemporaryDirectory() as tmp:
199
+ tmp_path = Path(tmp)
200
+ handler_file = tmp_path / "roles" / "web" / "handlers" / "main.yml"
201
+ handler_file.parent.mkdir(parents=True)
202
+ handler_file.write_text(
203
+ "---\n"
204
+ "- name: Restart nginx\n"
205
+ " service:\n"
206
+ " name: nginx\n"
207
+ " state: restarted\n"
208
+ "- name: Reload nginx\n"
209
+ " service:\n"
210
+ " name: nginx\n"
211
+ " state: reloaded\n"
212
+ )
213
+ stats = parser.parse_file(handler_file, tmp_path, store)
214
+
215
+ assert stats["functions"] == 2
216
+ assert stats["classes"] == 1
217
+
218
+
219
+class TestRoleImport:
220
+ """Tests for role import extraction."""
221
+
222
+ def test_role_references_create_import_nodes(self):
223
+ store = _make_store()
224
+ parser = AnsibleParser()
225
+ with tempfile.TemporaryDirectory() as tmp:
226
+ tmp_path = Path(tmp)
227
+ playbook = tmp_path / "playbooks" / "roles.yml"
228
+ playbook.parent.mkdir(parents=True)
229
+ playbook.write_text(
230
+ "---\n"
231
+ "- name: Apply roles\n"
232
+ " hosts: all\n"
233
+ " roles:\n"
234
+ " -
--- a/tests/test_ansible_parser.py
+++ b/tests/test_ansible_parser.py
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_ansible_parser.py
+++ b/tests/test_ansible_parser.py
@@ -0,0 +1,234 @@
1 """Tests for navegador.ingestion.ansible — AnsibleParser."""
2
3 import tempfile
4 from pathlib import Path
5 from unittest.mock import MagicMock
6
7 from navegador.graph.schema
8 from navegador.ingestion.ansible import AnsibleParserAnsibleParser # noqa: E402
9
10
11 def _make_store():
12 store = MagicMock()
13 store.query.return_value = MagicMock(result_set=[])
14 return store
15
16
17 class TestIsAnsibleFile:
18 """Tests for AnsibleParser.is_ansible_file() path detection."""
19
20 def test_role_tasks_detected(self):
21 with tempfile.TemporaryDirectory() as tmp:
22 p = Path(tmp) / "roles" / "webserver" / "tasks" / "main.yml"
23 p.parent.mkdir(parents=True)
24 p.write_text("---\n- name: test\n debug:\n")
25 assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
26
27 def test_role_handlers_detected(self):
28 with tempfile.TemporaryDirectory() as tmp:
29 p = Path(tmp) / "roles" / "webserver" / "handlers" / "main.yml"
30 p.parent.mkdir(parents=True)
31 p.write_text("---\n- name: restart nginx\n service:\n")
32 assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
33
34 def test_playbooks_dir_detected(self):
35 with tempfile.TemporaryDirectory() as tmp:
36 p = Path(tmp) / "playbooks" / "deploy.yml"
37 p.parent.mkdir(parents=True)
38 p.write_text("---\n- hosts: all\n tasks: []\n")
39 assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
40
41 def test_group_vars_detected(self):
42 with tempfile.TemporaryDirectory() as tmp:
43 p = Path(tmp) / "group_vars" / "all.yml"
44 p.parent.mkdir(parents=True)
45 p.write_text("---\nhttp_port: 80\n")
46 assert AnsibleParser.is_ansible_file(p, Path(tmp)) is True
47
48 def test_random_yaml_not_detected(self):
49 with tempfile.TemporaryDirectory() as tmp:
50 p = Path(tmp) / "random" / "config.yml"
51 p.parent.mkdir(parents=True)
52 p.write_text("---\nkey: value\n")
53 assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False
54
55 def test_non_yaml_not_detected(self):
56 with tempfile.TemporaryDirectory() as tmp:
57 p = Path(tmp) / "some_file.py"
58 p.write_text("print('hello')\n")
59 assert AnsibleParser.is_ansible_file(p, Path(tmp)) is False
60
61
62 class TestParsePlaybook:
63 """Tests for parse_file() with a full playbook (list with hosts)."""
64
65 def test_creates_module_class_and_function_nodes(self):
66 store = _make_store()
67 parser = AnsibleParser()
68 with tempfile.TemporaryDirectory() as tmp:
69 tmp_path = Path(tmp)
70 playbook = tmp_path / "playbooks" / "deploy.yml"
71 playbook.parent.mkdir(parents=True)
72 playbook.write_text(
73 "---\n"
74 "- name: Deploy web app\n"
75 " hosts: webservers\n"
76 " tasks:\n"
77 " - name: Install nginx\n"
78 " apt:\n"
79 " name: nginx\n"
80 " state: present\n"
81 " - name: Start nginx\n"
82 " service:\n"
83 " name: nginx\n"
84 " state: started\n"
85 )
86 stats = parser.parse_file(playbook, tmp_path, store)
87
88 assert stats["functions"] >= 2
89 assert stats["classes"] >= 1
90
91 # Verify Module node created for playbook
92 create_calls = store.create_node.call_args_list
93 labels = [c[0][0] for c in create_calls]
94 assert NodeLabel.Module in labels
95 assert NodeLabel.Class in labels
96 assert NodeLabel.Function in labels
97
98 def test_edges_created_for_containment(self):
99 store = _make_store()
100 parser = AnsibleParser()
101 with tempfile.TemporaryDirectory() as tmp:
102 tmp_path = Path(tmp)
103 playbook = tmp_path / "playbooks" / "site.yml"
104 playbook.parent.mkdir(parents=True)
105 playbook.write_text(
106 "---\n- name: Main play\n hosts: all\n tasks:\n - name: Ping\n ping:\n"
107 )
108 stats = parser.parse_file(playbook, tmp_path, store)
109
110 assert stats["edges"] >= 3 # File->Module, Module->Class, Class->Func
111
112
113 class TestParseTaskFile:
114 """Tests for parse_file() with a standalone task file."""
115
116 def test_task_file_creates_class_and_functions(self):
117 store = _make_store()
118 parser = AnsibleParser()
119 with tempfile.TemporaryDirectory() as tmp:
120 tmp_path = Path(tmp)
121 task_file = tmp_path / "roles" / "web" / "tasks" / "main.yml"
122 task_file.parent.mkdir(parents=True)
123 task_file.write_text(
124 "---\n"
125 "- name: Install packages\n"
126 " apt:\n"
127 " name: curl\n"
128 "- name: Copy config\n"
129 " copy:\n"
130 " src: app.conf\n"
131 " dest: /etc/app.conf\n"
132 )
133 stats = parser.parse_file(task_file, tmp_path, store)
134
135 assert stats["classes"] == 1 # synthetic parent
136 assert stats["functions"] == 2
137
138
139 class TestParseVariableFile:
140 """Tests for parse_file() with a variable file."""
141
142 def test_variable_file_creates_variables(self):
143 store = _make_store()
144 parser = AnsibleParser()
145 with tempfile.TemporaryDirectory() as tmp:
146 tmp_path = Path(tmp)
147 var_file = tmp_path / "roles" / "web" / "defaults" / "main.yml"
148 var_file.parent.mkdir(parents=True)
149 var_file.write_text("---\nhttp_port: 80\nmax_clients: 200\napp_env: production\n")
150 stats = parser.parse_file(var_file, tmp_path, store)
151
152 # Each variable creates a CONTAINS edge
153 assert stats["edges"] >= 3
154 create_calls = store.create_node.call_args_list
155 labels = [c[0][0] for c in create_calls]
156 assert labels.count(NodeLabel.Variable) == 3
157
158
159 class TestHandlerAndNotify:
160 """Tests for handler detection and CALLS edges from notify."""
161
162 def test_notify_creates_calls_edge(self):
163 store = _make_store()
164 parser = AnsibleParser()
165 with tempfile.TemporaryDirectory() as tmp:
166 tmp_path = Path(tmp)
167 playbook = tmp_path / "playbooks" / "handlers.yml"
168 playbook.parent.mkdir(parents=True)
169 playbook.write_text(
170 "---\n"
171 "- name: Handler play\n"
172 " hosts: all\n"
173 " tasks:\n"
174 " - name: Update config\n"
175 " copy:\n"
176 " src: app.conf\n"
177 " dest: /etc/app.conf\n"
178 " notify: Restart app\n"
179 " handlers:\n"
180 " - name: Restart app\n"
181 " service:\n"
182 " name: app\n"
183 " state: restarted\n"
184 )
185 parser.parse_file(playbook, tmp_path, store)
186
187 # Should have a CALLS edge from task to handler
188 edge_calls = store.create_edge.call_args_list
189 calls_edges = [c for c in edge_calls if c[0][2] == EdgeType.CALLS]
190 assert len(calls_edges) >= 1
191 # The CALLS edge target should be the handler name
192 target_props = calls_edges[0][0][4]
193 assert target_props["name"] == "Restart app"
194
195 def test_handler_file_creates_handler_functions(self):
196 store = _make_store()
197 parser = AnsibleParser()
198 with tempfile.TemporaryDirectory() as tmp:
199 tmp_path = Path(tmp)
200 handler_file = tmp_path / "roles" / "web" / "handlers" / "main.yml"
201 handler_file.parent.mkdir(parents=True)
202 handler_file.write_text(
203 "---\n"
204 "- name: Restart nginx\n"
205 " service:\n"
206 " name: nginx\n"
207 " state: restarted\n"
208 "- name: Reload nginx\n"
209 " service:\n"
210 " name: nginx\n"
211 " state: reloaded\n"
212 )
213 stats = parser.parse_file(handler_file, tmp_path, store)
214
215 assert stats["functions"] == 2
216 assert stats["classes"] == 1
217
218
219 class TestRoleImport:
220 """Tests for role import extraction."""
221
222 def test_role_references_create_import_nodes(self):
223 store = _make_store()
224 parser = AnsibleParser()
225 with tempfile.TemporaryDirectory() as tmp:
226 tmp_path = Path(tmp)
227 playbook = tmp_path / "playbooks" / "roles.yml"
228 playbook.parent.mkdir(parents=True)
229 playbook.write_text(
230 "---\n"
231 "- name: Apply roles\n"
232 " hosts: all\n"
233 " roles:\n"
234 " -
--- a/tests/test_bash_parser.py
+++ b/tests/test_bash_parser.py
@@ -0,0 +1,534 @@
1
+"""Tests for navegador.ingestion.bash — BashParser internal methods."""
2
+
3
+from unittest.mock import MagicMock, patch
4
+
5
+import pytest
6
+
7
+from navegador.graph.schema import EdgeType, NodeLabel
8
+
9
+
10
+class MockNode:
11
+ _id_counter = 0
12
+
13
+ def __init__(
14
+ self,
15
+ type_: str,
16
+ text: bytes = b"",
17
+ children: list = None,
18
+ start_byte: int = 0,
19
+ end_byte: int = 0,
20
+ start_point: tuple = (0, 0),
21
+ end_point: tuple = (0, 0),
22
+ parent=None,
23
+ ):
24
+ MockNode._id_counter += 1
25
+ self.id = MockNode._id_counter
26
+ self.type = type_
27
+ self._text = text
28
+ self.children = children or []
29
+ self.start_byte = start_byte
30
+ self.end_byte = end_byte
31
+ self.start_point = start_point
32
+ self.end_point = end_point
33
+ self.parent = parent
34
+ self._fields: dict = {}
35
+ for child in self.children:
36
+ child.parent = self
37
+
38
+ def child_by_field_name(self, name: str):
39
+ return self._fields.get(name)
40
+
41
+ def set_field(self, name: str, node):
42
+ self._fields[name] = node
43
+ node.parent = self
44
+ return self
45
+
46
+
47
+def _text_node(text: bytes, type_: str = "identifier") -> MockNode:
48
+ return MockNode(type_, text, start_byte=0, end_byte=len(text))
49
+
50
+
51
+def _make_store():
52
+ store = MagicMock()
53
+ store.query.return_value = MagicMock(result_set=[])
54
+ return store
55
+
56
+
57
+def _make_parser():
58
+ from navegador.ingestion.bash import BashParser
59
+
60
+ parser = BashParser.__new__(BashParser)
61
+ parser._parser = MagicMock()
62
+ return parser
63
+
64
+
65
+class TestBashGetLanguage:
66
+ def test_raises_when_not_installed(self):
67
+ from navegador.ingestion.bash import _get_bash_language
68
+
69
+ with patch.dict(
70
+ "sys.modules",
71
+ {
72
+ "tree_sitter_bash": None,
73
+ "tree_sitter": None,
74
+ },
75
+ ):
76
+ with pytest.raises(ImportError, match="tree-sitter-bash"):
77
+ _get_bash_language()
78
+
79
+ def test_returns_language_object(self):
80
+ from navegador.ingestion.bash import _get_bash_language
81
+
82
+ mock_tsbash = MagicMock()
83
+ mock_ts = MagicMock()
84
+ with patch.dict(
85
+ "sys.modules",
86
+ {
87
+ "tree_sitter_bash": mock_tsbash,
88
+ "tree_sitter": mock_ts,
89
+ },
90
+ ):
91
+ result = _get_bash_language()
92
+ assert result is mock_ts.Language.return_value
93
+
94
+
95
+class TestBashNodeText:
96
+ def test_extracts_bytes(self):
97
+ from navegador.ingestion.bash import _node_text
98
+
99
+ source = b"#!/bin/bash\nmy_func() {"
100
+ node = MockNode(
101
+ "identifier",
102
+ start_byte=12,
103
+ end_byte=19,
104
+ )
105
+ assert _node_text(node, source) == "my_func"
106
+
107
+
108
+class TestBashHandleFunction:
109
+ def test_creates_function_node(self):
110
+ parser = _make_parser()
111
+ store = _make_store()
112
+ source = b"deploy"
113
+ name_node = MockNode(
114
+ "word",
115
+ start_byte=0,
116
+ end_byte=6,
117
+ )
118
+ node = MockNode(
119
+ "function_definition",
120
+ start_point=(0, 0),
121
+ end_point=(5, 1),
122
+ )
123
+ node.set_field("name", name_node)
124
+ stats = {"functions": 0, "classes": 0, "edges": 0}
125
+ parser._handle_function(node, source, "deploy.sh", store, stats)
126
+ assert stats["functions"] == 1
127
+ assert stats["edges"] == 1
128
+ label = store.create_node.call_args[0][0]
129
+ props = store.create_node.call_args[0][1]
130
+ assert label == NodeLabel.Function
131
+ assert props["name"] == "deploy"
132
+ assert props["semantic_type"] == "shell_function"
133
+
134
+ def test_skips_if_no_name_node(self):
135
+ parser = _make_parser()
136
+ store = _make_store()
137
+ node = MockNode(
138
+ "function_definition",
139
+ start_point=(0, 0),
140
+ end_point=(0, 5),
141
+ )
142
+ stats = {"functions": 0, "classes": 0, "edges": 0}
143
+ parser._handle_function(node, b"", "test.sh", store, stats)
144
+ assert stats["functions"] == 0
145
+ store.create_node.assert_not_called()
146
+
147
+ def test_extracts_calls_from_body(self):
148
+ parser = _make_parser()
149
+ store = _make_store()
150
+ source = b"deploy helper"
151
+ name_node = MockNode(
152
+ "word",
153
+ start_byte=0,
154
+ end_byte=6,
155
+ )
156
+ callee_name = MockNode(
157
+ "word",
158
+ start_byte=7,
159
+ end_byte=13,
160
+ )
161
+ cmd = MockNode("command")
162
+ cmd.set_field("name", callee_name)
163
+ body = MockNode(
164
+ "compound_statement",
165
+ children=[cmd],
166
+ )
167
+ node = MockNode(
168
+ "function_definition",
169
+ start_point=(0, 0),
170
+ end_point=(5, 1),
171
+ )
172
+ node.set_field("name", name_node)
173
+ node.set_field("body", body)
174
+ stats = {"functions": 0, "classes": 0, "edges": 0}
175
+ parser._handle_function(node, source, "deploy.sh", store, stats)
176
+ # 1 CONTAINS edge + 1 CALLS edge
177
+ assert stats["edges"] == 2
178
+
179
+
180
+class TestBashHandleVariable:
181
+ def test_creates_variable_node_for_top_level(self):
182
+ parser = _make_parser()
183
+ store = _make_store()
184
+ source = b'VERSION="1.0"'
185
+ name_node = MockNode(
186
+ "variable_name",
187
+ start_byte=0,
188
+ end_byte=7,
189
+ )
190
+ value_node = MockNode(
191
+ "string",
192
+ start_byte=8,
193
+ end_byte=13,
194
+ )
195
+ program = MockNode("program")
196
+ node = MockNode(
197
+ "variable_assignment",
198
+ start_point=(0, 0),
199
+ end_point=(0, 13),
200
+ parent=program,
201
+ )
202
+ node.set_field("name", name_node)
203
+ node.set_field("value", value_node)
204
+ # Re-set parent after construction since constructor
205
+ # overwrites it
206
+ node.parent = program
207
+ stats = {"functions": 0, "classes": 0, "edges": 0}
208
+ parser._handle_variable(node, source, "env.sh", store, stats)
209
+ assert stats["edges"] == 1
210
+ label = store.create_node.call_args[0][0]
211
+ props = store.create_node.call_args[0][1]
212
+ assert label == NodeLabel.Variable
213
+ assert props["name"] == "VERSION"
214
+ assert props["semantic_type"] == "shell_variable"
215
+
216
+ def test_skips_non_top_level_variable(self):
217
+ parser = _make_parser()
218
+ store = _make_store()
219
+ source = b"x=1"
220
+ name_node = MockNode(
221
+ "variable_name",
222
+ start_byte=0,
223
+ end_byte=1,
224
+ )
225
+ func_parent = MockNode("function_definition")
226
+ node = MockNode(
227
+ "variable_assignment",
228
+ start_point=(0, 0),
229
+ end_point=(0, 3),
230
+ parent=func_parent,
231
+ )
232
+ node.set_field("name", name_node)
233
+ node.parent = func_parent
234
+ stats = {"functions": 0, "classes": 0, "edges": 0}
235
+ parser._handle_variable(node, source, "test.sh", store, stats)
236
+ assert stats["edges"] == 0
237
+ store.create_node.assert_not_called()
238
+
239
+ def test_skips_variable_without_name(self):
240
+ parser = _make_parser()
241
+ store = _make_store()
242
+ program = MockNode("program")
243
+ node = MockNode(
244
+ "variable_assignment",
245
+ start_point=(0, 0),
246
+ end_point=(0, 3),
247
+ parent=program,
248
+ )
249
+ node.parent = program
250
+ stats = {"functions": 0, "classes": 0, "edges": 0}
251
+ parser._handle_variable(node, b"", "test.sh", store, stats)
252
+ store.create_node.assert_not_called()
253
+
254
+
255
+class TestBashHandleSource:
256
+ def test_creates_import_for_source_command(self):
257
+ parser = _make_parser()
258
+ store = _make_store()
259
+ source = b"source ./lib.sh"
260
+ name_node = MockNode(
261
+ "word",
262
+ start_byte=0,
263
+ end_byte=6,
264
+ )
265
+ arg_node = MockNode(
266
+ "word",
267
+ start_byte=7,
268
+ end_byte=15,
269
+ )
270
+ node = MockNode(
271
+ "command",
272
+ children=[name_node, arg_node],
273
+ start_point=(0, 0),
274
+ end_point=(0, 15),
275
+ )
276
+ node.set_field("name", name_node)
277
+ stats = {"functions": 0, "classes": 0, "edges": 0}
278
+ parser._handle_command(node, source, "main.sh", store, stats)
279
+ assert stats["edges"] == 1
280
+ label = store.create_node.call_args[0][0]
281
+ props = store.create_node.call_args[0][1]
282
+ assert label == NodeLabel.Import
283
+ assert props["name"] == "./lib.sh"
284
+ assert props["semantic_type"] == "shell_source"
285
+
286
+ def test_creates_import_for_dot_command(self):
287
+ parser = _make_parser()
288
+ store = _make_store()
289
+ source = b". /etc/profile"
290
+ name_node = MockNode(
291
+ "word",
292
+ start_byte=0,
293
+ end_byte=1,
294
+ )
295
+ arg_node = MockNode(
296
+ "word",
297
+ start_byte=2,
298
+ end_byte=14,
299
+ )
300
+ node = MockNode(
301
+ "command",
302
+ children=[name_node, arg_node],
303
+ start_point=(0, 0),
304
+ end_point=(0, 14),
305
+ )
306
+ node.set_field("name", name_node)
307
+ stats = {"functions": 0, "classes": 0, "edges": 0}
308
+ parser._handle_command(node, source, "main.sh", store, stats)
309
+ assert stats["edges"] == 1
310
+ props = store.create_node.call_args[0][1]
311
+ assert props["name"] == "/etc/profile"
312
+
313
+ def test_ignores_non_source_commands(self):
314
+ parser = _make_parser()
315
+ store = _make_store()
316
+ source = b"echo hello"
317
+ name_node = MockNode(
318
+ "word",
319
+ start_byte=0,
320
+ end_byte=4,
321
+ )
322
+ node = MockNode(
323
+ "command",
324
+ children=[name_node],
325
+ start_point=(0, 0),
326
+ end_point=(0, 10),
327
+ )
328
+ node.set_field("name", name_node)
329
+ stats = {"functions": 0, "classes": 0, "edges": 0}
330
+ parser._handle_command(node, source, "main.sh", store, stats)
331
+ assert stats["edges"] == 0
332
+ store.create_node.assert_not_called()
333
+
334
+ def test_skips_source_without_arguments(self):
335
+ parser = _make_parser()
336
+ store = _make_store()
337
+ source = b"source"
338
+ name_node = MockNode(
339
+ "word",
340
+ start_byte=0,
341
+ end_byte=6,
342
+ )
343
+ node = MockNode(
344
+ "command",
345
+ children=[name_node],
346
+ start_point=(0, 0),
347
+ end_point=(0, 6),
348
+ )
349
+ node.set_field("name", name_node)
350
+ stats = {"functions": 0, "classes": 0, "edges": 0}
351
+ parser._handle_command(node, source, "main.sh", store, stats)
352
+ assert stats["edges"] == 0
353
+ store.create_node.assert_not_called()
354
+
355
+
356
+class TestBashExtractCalls:
357
+ def test_finds_command_calls(self):
358
+ parser = _make_parser()
359
+ store = _make_store()
360
+ source = b"build_app"
361
+ callee = MockNode(
362
+ "word",
363
+ start_byte=0,
364
+ end_byte=9,
365
+ )
366
+ cmd = MockNode("command")
367
+ cmd.set_field("name", callee)
368
+ body = MockNode(
369
+ "compound_statement",
370
+ children=[cmd],
371
+ )
372
+ fn_node = MockNode("function_definition")
373
+ fn_node.set_field("body", body)
374
+ stats = {"functions": 0, "classes": 0, "edges": 0}
375
+ parser._extract_calls(fn_node, source, "deploy.sh", "deploy", store, stats)
376
+ assert stats["edges"] == 1
377
+ edge_call = store.create_edge.call_args[0]
378
+ assert edge_call[2] == EdgeType.CALLS
379
+ assert edge_call[4]["name"] == "build_app"
380
+
381
+ def test_skips_builtins(self):
382
+ parser = _make_parser()
383
+ store = _make_store()
384
+ source = b"echo"
385
+ callee = MockNode(
386
+ "word",
387
+ start_byte=0,
388
+ end_byte=4,
389
+ )
390
+ cmd = MockNode("command")
391
+ cmd.set_field("name", callee)
392
+ body = MockNode(
393
+ "compound_statement",
394
+ children=[cmd],
395
+ )
396
+ fn_node = MockNode("function_definition")
397
+ fn_node.set_field("body", body)
398
+ stats = {"functions": 0, "classes": 0, "edges": 0}
399
+ parser._extract_calls(fn_node, source, "test.sh", "myfunc", store, stats)
400
+ assert stats["edges"] == 0
401
+
402
+ def test_no_calls_in_empty_body(self):
403
+ parser = _make_parser()
404
+ store = _make_store()
405
+ fn_node = MockNode("function_definition")
406
+ fn_node.set_field("body", MockNode("compound_statement"))
407
+ stats = {"functions": 0, "classes": 0, "edges": 0}
408
+ parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats)
409
+ assert stats["edges"] == 0
410
+
411
+ def test_no_body_means_no_calls(self):
412
+ parser = _make_parser()
413
+ store = _make_store()
414
+ fn_node = MockNode("function_definition")
415
+ stats = {"functions": 0, "classes": 0, "edges": 0}
416
+ parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats)
417
+ assert stats["edges"] == 0
418
+
419
+
420
+class TestBashWalkDispatch:
421
+ def test_walk_handles_function_definition(self):
422
+ parser = _make_parser()
423
+ store = _make_store()
424
+ source = b"deploy"
425
+ name_node = MockNode(
426
+ "word",
427
+ start_byte=0,
428
+ end_byte=6,
429
+ )
430
+ fn = MockNode(
431
+ "function_definition",
432
+ start_point=(0, 0),
433
+ end_point=(5, 1),
434
+ )
435
+ fn.set_field("name", name_node)
436
+ root = MockNode("program", children=[fn])
437
+ stats = {"functions": 0, "classes": 0, "edges": 0}
438
+ parser._walk(root, source, "deploy.sh", store, stats)
439
+ assert stats["functions"] == 1
440
+
441
+ def test_walk_handles_variable_assignment(self):
442
+ parser = _make_parser()
443
+ store = _make_store()
444
+ source = b"VERSION"
445
+ name_node = MockNode(
446
+ "variable_name",
447
+ start_byte=0,
448
+ end_byte=7,
449
+ )
450
+ program = MockNode("program")
451
+ var = MockNode(
452
+ "variable_assignment",
453
+ start_point=(0, 0),
454
+ end_point=(0, 13),
455
+ )
456
+ var.set_field("name", name_node)
457
+ program.children = [var]
458
+ for child in program.children:
459
+ child.parent = program
460
+ stats = {"functions": 0, "classes": 0, "edges": 0}
461
+ parser._walk(program, source, "env.sh", store, stats)
462
+ assert stats["edges"] == 1
463
+
464
+ def test_walk_handles_source_command(self):
465
+ parser = _make_parser()
466
+ store = _make_store()
467
+ source = b"source ./lib.sh"
468
+ name_node = MockNode(
469
+ "word",
470
+ start_byte=0,
471
+ end_byte=6,
472
+ )
473
+ arg_node = MockNode(
474
+ "word",
475
+ start_byte=7,
476
+ end_byte=15,
477
+ )
478
+ cmd = MockNode(
479
+ "command",
480
+ children=[name_node, arg_node],
481
+ start_point=(0, 0),
482
+ end_point=(0, 15),
483
+ )
484
+ cmd.set_field("name", name_node)
485
+ root = MockNode("program", children=[cmd])
486
+ stats = {"functions": 0, "classes": 0, "edges": 0}
487
+ parser._walk(root, source, "main.sh", store, stats)
488
+ assert stats["edges"] == 1
489
+
490
+ def test_walk_recurses_into_children(self):
491
+ parser = _make_parser()
492
+ store = _make_store()
493
+ source = b"deploy"
494
+ name_node = MockNode(
495
+ "word",
496
+ start_byte=0,
497
+ end_byte=6,
498
+ )
499
+ fn = MockNode(
500
+ "function_definition",
501
+ start_point=(0, 0),
502
+ end_point=(5, 1),
503
+ )
504
+ fn.set_field("name", name_node)
505
+ wrapper = MockNode("if_statement", children=[fn])
506
+ root = MockNode("program", children=[wrapper])
507
+ stats = {"functions": 0, "classes": 0, "edges": 0}
508
+ parser._walk(root, source, "deploy.sh", store, stats)
509
+ assert stats["functions"] == 1
510
+
511
+
512
+class TestBashParseFile:
513
+ def test_creates_file_node(self):
514
+ import tempfile
515
+ from pathlib import Path
516
+
517
+ parser = _make_parser()
518
+ store = _make_store()
519
+ mock_tree = MagicMock()
520
+ mock_tree.root_node.type = "program"
521
+ mock_tree.root_node.children = []
522
+ parser._parser.parse.return_value = mock_tree
523
+ with tempfile.NamedTemporaryFile(suffix=".sh", delete=False) as f:
524
+ f.write(b"#!/bin/bash\necho hello\n")
525
+ fpath = Path(f.name)
526
+ try:
527
+ parser.parse_file(fpath, fpath.parent, store)
528
+ store.create_node.assert_called_once()
529
+ label = store.create_node.call_args[0][0]
530
+ props = store.create_node.call_args[0][1]
531
+ assert label == NodeLabel.File
532
+ assert props["language"] == "bash"
533
+ finally:
534
+ fpath.unlink()
--- a/tests/test_bash_parser.py
+++ b/tests/test_bash_parser.py
@@ -0,0 +1,534 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_bash_parser.py
+++ b/tests/test_bash_parser.py
@@ -0,0 +1,534 @@
1 """Tests for navegador.ingestion.bash — BashParser internal methods."""
2
3 from unittest.mock import MagicMock, patch
4
5 import pytest
6
7 from navegador.graph.schema import EdgeType, NodeLabel
8
9
10 class MockNode:
11 _id_counter = 0
12
13 def __init__(
14 self,
15 type_: str,
16 text: bytes = b"",
17 children: list = None,
18 start_byte: int = 0,
19 end_byte: int = 0,
20 start_point: tuple = (0, 0),
21 end_point: tuple = (0, 0),
22 parent=None,
23 ):
24 MockNode._id_counter += 1
25 self.id = MockNode._id_counter
26 self.type = type_
27 self._text = text
28 self.children = children or []
29 self.start_byte = start_byte
30 self.end_byte = end_byte
31 self.start_point = start_point
32 self.end_point = end_point
33 self.parent = parent
34 self._fields: dict = {}
35 for child in self.children:
36 child.parent = self
37
38 def child_by_field_name(self, name: str):
39 return self._fields.get(name)
40
41 def set_field(self, name: str, node):
42 self._fields[name] = node
43 node.parent = self
44 return self
45
46
47 def _text_node(text: bytes, type_: str = "identifier") -> MockNode:
48 return MockNode(type_, text, start_byte=0, end_byte=len(text))
49
50
51 def _make_store():
52 store = MagicMock()
53 store.query.return_value = MagicMock(result_set=[])
54 return store
55
56
57 def _make_parser():
58 from navegador.ingestion.bash import BashParser
59
60 parser = BashParser.__new__(BashParser)
61 parser._parser = MagicMock()
62 return parser
63
64
65 class TestBashGetLanguage:
66 def test_raises_when_not_installed(self):
67 from navegador.ingestion.bash import _get_bash_language
68
69 with patch.dict(
70 "sys.modules",
71 {
72 "tree_sitter_bash": None,
73 "tree_sitter": None,
74 },
75 ):
76 with pytest.raises(ImportError, match="tree-sitter-bash"):
77 _get_bash_language()
78
79 def test_returns_language_object(self):
80 from navegador.ingestion.bash import _get_bash_language
81
82 mock_tsbash = MagicMock()
83 mock_ts = MagicMock()
84 with patch.dict(
85 "sys.modules",
86 {
87 "tree_sitter_bash": mock_tsbash,
88 "tree_sitter": mock_ts,
89 },
90 ):
91 result = _get_bash_language()
92 assert result is mock_ts.Language.return_value
93
94
95 class TestBashNodeText:
96 def test_extracts_bytes(self):
97 from navegador.ingestion.bash import _node_text
98
99 source = b"#!/bin/bash\nmy_func() {"
100 node = MockNode(
101 "identifier",
102 start_byte=12,
103 end_byte=19,
104 )
105 assert _node_text(node, source) == "my_func"
106
107
108 class TestBashHandleFunction:
109 def test_creates_function_node(self):
110 parser = _make_parser()
111 store = _make_store()
112 source = b"deploy"
113 name_node = MockNode(
114 "word",
115 start_byte=0,
116 end_byte=6,
117 )
118 node = MockNode(
119 "function_definition",
120 start_point=(0, 0),
121 end_point=(5, 1),
122 )
123 node.set_field("name", name_node)
124 stats = {"functions": 0, "classes": 0, "edges": 0}
125 parser._handle_function(node, source, "deploy.sh", store, stats)
126 assert stats["functions"] == 1
127 assert stats["edges"] == 1
128 label = store.create_node.call_args[0][0]
129 props = store.create_node.call_args[0][1]
130 assert label == NodeLabel.Function
131 assert props["name"] == "deploy"
132 assert props["semantic_type"] == "shell_function"
133
134 def test_skips_if_no_name_node(self):
135 parser = _make_parser()
136 store = _make_store()
137 node = MockNode(
138 "function_definition",
139 start_point=(0, 0),
140 end_point=(0, 5),
141 )
142 stats = {"functions": 0, "classes": 0, "edges": 0}
143 parser._handle_function(node, b"", "test.sh", store, stats)
144 assert stats["functions"] == 0
145 store.create_node.assert_not_called()
146
147 def test_extracts_calls_from_body(self):
148 parser = _make_parser()
149 store = _make_store()
150 source = b"deploy helper"
151 name_node = MockNode(
152 "word",
153 start_byte=0,
154 end_byte=6,
155 )
156 callee_name = MockNode(
157 "word",
158 start_byte=7,
159 end_byte=13,
160 )
161 cmd = MockNode("command")
162 cmd.set_field("name", callee_name)
163 body = MockNode(
164 "compound_statement",
165 children=[cmd],
166 )
167 node = MockNode(
168 "function_definition",
169 start_point=(0, 0),
170 end_point=(5, 1),
171 )
172 node.set_field("name", name_node)
173 node.set_field("body", body)
174 stats = {"functions": 0, "classes": 0, "edges": 0}
175 parser._handle_function(node, source, "deploy.sh", store, stats)
176 # 1 CONTAINS edge + 1 CALLS edge
177 assert stats["edges"] == 2
178
179
180 class TestBashHandleVariable:
181 def test_creates_variable_node_for_top_level(self):
182 parser = _make_parser()
183 store = _make_store()
184 source = b'VERSION="1.0"'
185 name_node = MockNode(
186 "variable_name",
187 start_byte=0,
188 end_byte=7,
189 )
190 value_node = MockNode(
191 "string",
192 start_byte=8,
193 end_byte=13,
194 )
195 program = MockNode("program")
196 node = MockNode(
197 "variable_assignment",
198 start_point=(0, 0),
199 end_point=(0, 13),
200 parent=program,
201 )
202 node.set_field("name", name_node)
203 node.set_field("value", value_node)
204 # Re-set parent after construction since constructor
205 # overwrites it
206 node.parent = program
207 stats = {"functions": 0, "classes": 0, "edges": 0}
208 parser._handle_variable(node, source, "env.sh", store, stats)
209 assert stats["edges"] == 1
210 label = store.create_node.call_args[0][0]
211 props = store.create_node.call_args[0][1]
212 assert label == NodeLabel.Variable
213 assert props["name"] == "VERSION"
214 assert props["semantic_type"] == "shell_variable"
215
216 def test_skips_non_top_level_variable(self):
217 parser = _make_parser()
218 store = _make_store()
219 source = b"x=1"
220 name_node = MockNode(
221 "variable_name",
222 start_byte=0,
223 end_byte=1,
224 )
225 func_parent = MockNode("function_definition")
226 node = MockNode(
227 "variable_assignment",
228 start_point=(0, 0),
229 end_point=(0, 3),
230 parent=func_parent,
231 )
232 node.set_field("name", name_node)
233 node.parent = func_parent
234 stats = {"functions": 0, "classes": 0, "edges": 0}
235 parser._handle_variable(node, source, "test.sh", store, stats)
236 assert stats["edges"] == 0
237 store.create_node.assert_not_called()
238
239 def test_skips_variable_without_name(self):
240 parser = _make_parser()
241 store = _make_store()
242 program = MockNode("program")
243 node = MockNode(
244 "variable_assignment",
245 start_point=(0, 0),
246 end_point=(0, 3),
247 parent=program,
248 )
249 node.parent = program
250 stats = {"functions": 0, "classes": 0, "edges": 0}
251 parser._handle_variable(node, b"", "test.sh", store, stats)
252 store.create_node.assert_not_called()
253
254
255 class TestBashHandleSource:
256 def test_creates_import_for_source_command(self):
257 parser = _make_parser()
258 store = _make_store()
259 source = b"source ./lib.sh"
260 name_node = MockNode(
261 "word",
262 start_byte=0,
263 end_byte=6,
264 )
265 arg_node = MockNode(
266 "word",
267 start_byte=7,
268 end_byte=15,
269 )
270 node = MockNode(
271 "command",
272 children=[name_node, arg_node],
273 start_point=(0, 0),
274 end_point=(0, 15),
275 )
276 node.set_field("name", name_node)
277 stats = {"functions": 0, "classes": 0, "edges": 0}
278 parser._handle_command(node, source, "main.sh", store, stats)
279 assert stats["edges"] == 1
280 label = store.create_node.call_args[0][0]
281 props = store.create_node.call_args[0][1]
282 assert label == NodeLabel.Import
283 assert props["name"] == "./lib.sh"
284 assert props["semantic_type"] == "shell_source"
285
286 def test_creates_import_for_dot_command(self):
287 parser = _make_parser()
288 store = _make_store()
289 source = b". /etc/profile"
290 name_node = MockNode(
291 "word",
292 start_byte=0,
293 end_byte=1,
294 )
295 arg_node = MockNode(
296 "word",
297 start_byte=2,
298 end_byte=14,
299 )
300 node = MockNode(
301 "command",
302 children=[name_node, arg_node],
303 start_point=(0, 0),
304 end_point=(0, 14),
305 )
306 node.set_field("name", name_node)
307 stats = {"functions": 0, "classes": 0, "edges": 0}
308 parser._handle_command(node, source, "main.sh", store, stats)
309 assert stats["edges"] == 1
310 props = store.create_node.call_args[0][1]
311 assert props["name"] == "/etc/profile"
312
313 def test_ignores_non_source_commands(self):
314 parser = _make_parser()
315 store = _make_store()
316 source = b"echo hello"
317 name_node = MockNode(
318 "word",
319 start_byte=0,
320 end_byte=4,
321 )
322 node = MockNode(
323 "command",
324 children=[name_node],
325 start_point=(0, 0),
326 end_point=(0, 10),
327 )
328 node.set_field("name", name_node)
329 stats = {"functions": 0, "classes": 0, "edges": 0}
330 parser._handle_command(node, source, "main.sh", store, stats)
331 assert stats["edges"] == 0
332 store.create_node.assert_not_called()
333
334 def test_skips_source_without_arguments(self):
335 parser = _make_parser()
336 store = _make_store()
337 source = b"source"
338 name_node = MockNode(
339 "word",
340 start_byte=0,
341 end_byte=6,
342 )
343 node = MockNode(
344 "command",
345 children=[name_node],
346 start_point=(0, 0),
347 end_point=(0, 6),
348 )
349 node.set_field("name", name_node)
350 stats = {"functions": 0, "classes": 0, "edges": 0}
351 parser._handle_command(node, source, "main.sh", store, stats)
352 assert stats["edges"] == 0
353 store.create_node.assert_not_called()
354
355
356 class TestBashExtractCalls:
357 def test_finds_command_calls(self):
358 parser = _make_parser()
359 store = _make_store()
360 source = b"build_app"
361 callee = MockNode(
362 "word",
363 start_byte=0,
364 end_byte=9,
365 )
366 cmd = MockNode("command")
367 cmd.set_field("name", callee)
368 body = MockNode(
369 "compound_statement",
370 children=[cmd],
371 )
372 fn_node = MockNode("function_definition")
373 fn_node.set_field("body", body)
374 stats = {"functions": 0, "classes": 0, "edges": 0}
375 parser._extract_calls(fn_node, source, "deploy.sh", "deploy", store, stats)
376 assert stats["edges"] == 1
377 edge_call = store.create_edge.call_args[0]
378 assert edge_call[2] == EdgeType.CALLS
379 assert edge_call[4]["name"] == "build_app"
380
381 def test_skips_builtins(self):
382 parser = _make_parser()
383 store = _make_store()
384 source = b"echo"
385 callee = MockNode(
386 "word",
387 start_byte=0,
388 end_byte=4,
389 )
390 cmd = MockNode("command")
391 cmd.set_field("name", callee)
392 body = MockNode(
393 "compound_statement",
394 children=[cmd],
395 )
396 fn_node = MockNode("function_definition")
397 fn_node.set_field("body", body)
398 stats = {"functions": 0, "classes": 0, "edges": 0}
399 parser._extract_calls(fn_node, source, "test.sh", "myfunc", store, stats)
400 assert stats["edges"] == 0
401
402 def test_no_calls_in_empty_body(self):
403 parser = _make_parser()
404 store = _make_store()
405 fn_node = MockNode("function_definition")
406 fn_node.set_field("body", MockNode("compound_statement"))
407 stats = {"functions": 0, "classes": 0, "edges": 0}
408 parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats)
409 assert stats["edges"] == 0
410
411 def test_no_body_means_no_calls(self):
412 parser = _make_parser()
413 store = _make_store()
414 fn_node = MockNode("function_definition")
415 stats = {"functions": 0, "classes": 0, "edges": 0}
416 parser._extract_calls(fn_node, b"", "test.sh", "myfunc", store, stats)
417 assert stats["edges"] == 0
418
419
420 class TestBashWalkDispatch:
421 def test_walk_handles_function_definition(self):
422 parser = _make_parser()
423 store = _make_store()
424 source = b"deploy"
425 name_node = MockNode(
426 "word",
427 start_byte=0,
428 end_byte=6,
429 )
430 fn = MockNode(
431 "function_definition",
432 start_point=(0, 0),
433 end_point=(5, 1),
434 )
435 fn.set_field("name", name_node)
436 root = MockNode("program", children=[fn])
437 stats = {"functions": 0, "classes": 0, "edges": 0}
438 parser._walk(root, source, "deploy.sh", store, stats)
439 assert stats["functions"] == 1
440
441 def test_walk_handles_variable_assignment(self):
442 parser = _make_parser()
443 store = _make_store()
444 source = b"VERSION"
445 name_node = MockNode(
446 "variable_name",
447 start_byte=0,
448 end_byte=7,
449 )
450 program = MockNode("program")
451 var = MockNode(
452 "variable_assignment",
453 start_point=(0, 0),
454 end_point=(0, 13),
455 )
456 var.set_field("name", name_node)
457 program.children = [var]
458 for child in program.children:
459 child.parent = program
460 stats = {"functions": 0, "classes": 0, "edges": 0}
461 parser._walk(program, source, "env.sh", store, stats)
462 assert stats["edges"] == 1
463
464 def test_walk_handles_source_command(self):
465 parser = _make_parser()
466 store = _make_store()
467 source = b"source ./lib.sh"
468 name_node = MockNode(
469 "word",
470 start_byte=0,
471 end_byte=6,
472 )
473 arg_node = MockNode(
474 "word",
475 start_byte=7,
476 end_byte=15,
477 )
478 cmd = MockNode(
479 "command",
480 children=[name_node, arg_node],
481 start_point=(0, 0),
482 end_point=(0, 15),
483 )
484 cmd.set_field("name", name_node)
485 root = MockNode("program", children=[cmd])
486 stats = {"functions": 0, "classes": 0, "edges": 0}
487 parser._walk(root, source, "main.sh", store, stats)
488 assert stats["edges"] == 1
489
490 def test_walk_recurses_into_children(self):
491 parser = _make_parser()
492 store = _make_store()
493 source = b"deploy"
494 name_node = MockNode(
495 "word",
496 start_byte=0,
497 end_byte=6,
498 )
499 fn = MockNode(
500 "function_definition",
501 start_point=(0, 0),
502 end_point=(5, 1),
503 )
504 fn.set_field("name", name_node)
505 wrapper = MockNode("if_statement", children=[fn])
506 root = MockNode("program", children=[wrapper])
507 stats = {"functions": 0, "classes": 0, "edges": 0}
508 parser._walk(root, source, "deploy.sh", store, stats)
509 assert stats["functions"] == 1
510
511
512 class TestBashParseFile:
513 def test_creates_file_node(self):
514 import tempfile
515 from pathlib import Path
516
517 parser = _make_parser()
518 store = _make_store()
519 mock_tree = MagicMock()
520 mock_tree.root_node.type = "program"
521 mock_tree.root_node.children = []
522 parser._parser.parse.return_value = mock_tree
523 with tempfile.NamedTemporaryFile(suffix=".sh", delete=False) as f:
524 f.write(b"#!/bin/bash\necho hello\n")
525 fpath = Path(f.name)
526 try:
527 parser.parse_file(fpath, fpath.parent, store)
528 store.create_node.assert_called_once()
529 label = store.create_node.call_args[0][0]
530 props = store.create_node.call_args[0][1]
531 assert label == NodeLabel.File
532 assert props["language"] == "bash"
533 finally:
534 fpath.unlink()
--- a/tests/test_chef_enricher.py
+++ b/tests/test_chef_enricher.py
@@ -0,0 +1,210 @@
1
+"""Tests for navegador.enrichment.chef — ChefEnricher."""
2
+
3
+from unittest.mock import MagicMock
4
+
5
+from navegador.enrichment.chef import ChefEnricher
6
+
7
+
8
+def _make_store(query_results=None):
9
+ """Create a mock GraphStore.
10
+
11
+ *query_results* maps Cypher query substrings to result_set lists.
12
+ Unmatched queries return an empty result_set.
13
+ """
14
+ store = MagicMock()
15
+ mapping = query_results or {}
16
+
17
+ def _side_effect(query, params=None):
18
+ result = MagicMock()
19
+ for substr, rows in mapping.items():
20
+ if substr in query:
21
+ result.result_set = rows
22
+ return result
23
+ result.result_set = []
24
+ return result
25
+
26
+ store.query.side_effect = _side_effect
27
+ return store
28
+
29
+
30
+class TestIdentity:
31
+ """Framework identity properties."""
32
+
33
+ def test_framework_name(self):
34
+ store = _make_store()
35
+ enricher = ChefEnricher(store)
36
+ assert enricher.framework_name == "chef"
37
+
38
+ def test_detection_files(self):
39
+ store = _make_store()
40
+ enricher = ChefEnricher(store)
41
+ assert "metadata.rb" in enricher.detection_files
42
+ assert "Berksfile" in enricher.detection_files
43
+
44
+ def test_detection_patterns(self):
45
+ store = _make_store()
46
+ enricher = ChefEnricher(store)
47
+ assert "chef" in enricher.detection_patterns
48
+
49
+
50
+class TestDetect:
51
+ """Tests for detect() — framework presence detection."""
52
+
53
+ def test_detect_true_when_metadata_rb_exists(self):
54
+ store = _make_store(
55
+ {
56
+ "f.name = $name": [[1]],
57
+ }
58
+ )
59
+ enricher = ChefEnricher(store)
60
+ assert enricher.detect() is True
61
+
62
+ def test_detect_false_when_no_markers(self):
63
+ store = _make_store()
64
+ enricher = ChefEnricher(store)
65
+ assert enricher.detect() is False
66
+
67
+ def test_detect_true_via_import_pattern(self):
68
+ store = _make_store(
69
+ {
70
+ "n.name = $name OR n.module = $name": [[1]],
71
+ }
72
+ )
73
+ enricher = ChefEnricher(store)
74
+ assert enricher.detect() is True
75
+
76
+
77
+class TestEnrichRecipes:
78
+ """Tests for enrich() promoting recipe files."""
79
+
80
+ def test_promotes_recipe_files(self):
81
+ store = _make_store(
82
+ {
83
+ "n.file_path CONTAINS $pattern": [
84
+ ["default.rb", "cookbooks/web/recipes/default.rb"],
85
+ ["install.rb", "cookbooks/web/recipes/install.rb"],
86
+ ],
87
+ }
88
+ )
89
+ enricher = ChefEnricher(store)
90
+ result = enricher.enrich()
91
+
92
+ assert result.patterns_found["recipes"] == 2
93
+ assert result.promoted >= 2
94
+
95
+ # Verify _promote_node was called via store.query SET
96
+ set_calls = [c for c in store.query.call_args_list if "SET n.semantic_type" in str(c)]
97
+ assert len(set_calls) >= 2
98
+
99
+
100
+class TestEnrichResources:
101
+ """Tests for enrich() promoting Chef resource calls."""
102
+
103
+ def test_promotes_resource_functions(self):
104
+ # _enrich_resources queries twice (recipes/ and libraries/),
105
+ # so we use a custom side_effect to return data only once.
106
+ call_count = {"resource": 0}
107
+ original_results = [
108
+ ["package", "cookbooks/web/recipes/default.rb"],
109
+ ["template", "cookbooks/web/recipes/default.rb"],
110
+ ["not_a_resource", "cookbooks/web/recipes/default.rb"],
111
+ ]
112
+
113
+ def _side_effect(query, params=None):
114
+ result = MagicMock()
115
+ if "(n:Function OR n:Method)" in query:
116
+ call_count["resource"] += 1
117
+ if call_count["resource"] == 1:
118
+ result.result_set = original_results
119
+ else:
120
+ result.result_set = []
121
+ else:
122
+ result.result_set = []
123
+ return result
124
+
125
+ store = MagicMock()
126
+ store.query.side_effect = _side_effect
127
+ enricher = ChefEnricher(store)
128
+ result = enricher.enrich()
129
+
130
+ # "package" and "template" match, "not_a_resource" does not
131
+ assert result.patterns_found["resources"] == 2
132
+
133
+ def test_skips_non_resource_functions(self):
134
+ store = _make_store(
135
+ {
136
+ "(n:Function OR n:Method)": [
137
+ ["my_helper", "cookbooks/web/libraries/helpers.rb"],
138
+ ],
139
+ }
140
+ )
141
+ enricher = ChefEnricher(store)
142
+ result = enricher.enrich()
143
+
144
+ assert result.patterns_found["resources"] == 0
145
+
146
+
147
+class TestEnrichIncludeRecipe:
148
+ """Tests for enrich() handling include_recipe edges."""
149
+
150
+ def test_creates_depends_on_edge(self):
151
+ # Strategy 1: follow CALLS edges from include_recipe nodes
152
+ def _query_side_effect(query, params=None):
153
+ result = MagicMock()
154
+ if "[:CALLS]" in query and "n.name = $name" in query:
155
+ result.result_set = [
156
+ [
157
+ "cookbooks/web/recipes/default.rb",
158
+ "database::install",
159
+ ],
160
+ ]
161
+ elif "f.file_path CONTAINS $recipes" in query:
162
+ result.result_set = [["install.rb"]]
163
+ elif "f.file_path = $path" in query:
164
+ result.result_set = [["default.rb"]]
165
+ elif "MERGE" in query:
166
+ result.result_set = []
167
+ else:
168
+ result.result_set = []
169
+ return result
170
+
171
+ store = MagicMock()
172
+ store.query.side_effect = _query_side_effect
173
+ enricher = ChefEnricher(store)
174
+ result = enricher.enrich()
175
+
176
+ assert result.edges_added >= 1
177
+ assert result.patterns_found["include_recipe"] >= 1
178
+
179
+ # Verify MERGE query was issued for the DEPENDS_ON edge
180
+ merge_calls = [
181
+ c for c in store.query.call_args_list if "MERGE" in str(c) and "DEPENDS_ON" in str(c)
182
+ ]
183
+ assert len(merge_calls) >= 1
184
+
185
+ def test_no_edges_when_no_include_recipe(self):
186
+ store = _make_store()
187
+ enricher = ChefEnricher(store)
188
+ result = enricher.enrich()
189
+
190
+ assert result.edges_added == 0
191
+ assert result.patterns_found["include_recipe"] == 0
192
+
193
+
194
+class TestEnrichCookbooks:
195
+ """Tests for enrich() promoting cookbook metadata files."""
196
+
197
+ def test_promotes_metadata_rb(self):
198
+ store = _make_store(
199
+ {
200
+ "n.name = $name": [
201
+ ["metadata.rb", "cookbooks/web/metadata.rb"],
202
+ ],
203
+ }
204
+ )
205
+ enricher = ChefEnricher(store)
206
+ result = enricher.enrich()
207
+
208
+ assert result.patterns_found["cookbooks"] == 1
209
+ set_calls = [c for c in store.query.call_args_list if "chef_cookbook" in str(c)]
210
+ assert len(set_calls) >= 1
--- a/tests/test_chef_enricher.py
+++ b/tests/test_chef_enricher.py
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_chef_enricher.py
+++ b/tests/test_chef_enricher.py
@@ -0,0 +1,210 @@
1 """Tests for navegador.enrichment.chef — ChefEnricher."""
2
3 from unittest.mock import MagicMock
4
5 from navegador.enrichment.chef import ChefEnricher
6
7
8 def _make_store(query_results=None):
9 """Create a mock GraphStore.
10
11 *query_results* maps Cypher query substrings to result_set lists.
12 Unmatched queries return an empty result_set.
13 """
14 store = MagicMock()
15 mapping = query_results or {}
16
17 def _side_effect(query, params=None):
18 result = MagicMock()
19 for substr, rows in mapping.items():
20 if substr in query:
21 result.result_set = rows
22 return result
23 result.result_set = []
24 return result
25
26 store.query.side_effect = _side_effect
27 return store
28
29
30 class TestIdentity:
31 """Framework identity properties."""
32
33 def test_framework_name(self):
34 store = _make_store()
35 enricher = ChefEnricher(store)
36 assert enricher.framework_name == "chef"
37
38 def test_detection_files(self):
39 store = _make_store()
40 enricher = ChefEnricher(store)
41 assert "metadata.rb" in enricher.detection_files
42 assert "Berksfile" in enricher.detection_files
43
44 def test_detection_patterns(self):
45 store = _make_store()
46 enricher = ChefEnricher(store)
47 assert "chef" in enricher.detection_patterns
48
49
50 class TestDetect:
51 """Tests for detect() — framework presence detection."""
52
53 def test_detect_true_when_metadata_rb_exists(self):
54 store = _make_store(
55 {
56 "f.name = $name": [[1]],
57 }
58 )
59 enricher = ChefEnricher(store)
60 assert enricher.detect() is True
61
62 def test_detect_false_when_no_markers(self):
63 store = _make_store()
64 enricher = ChefEnricher(store)
65 assert enricher.detect() is False
66
67 def test_detect_true_via_import_pattern(self):
68 store = _make_store(
69 {
70 "n.name = $name OR n.module = $name": [[1]],
71 }
72 )
73 enricher = ChefEnricher(store)
74 assert enricher.detect() is True
75
76
77 class TestEnrichRecipes:
78 """Tests for enrich() promoting recipe files."""
79
80 def test_promotes_recipe_files(self):
81 store = _make_store(
82 {
83 "n.file_path CONTAINS $pattern": [
84 ["default.rb", "cookbooks/web/recipes/default.rb"],
85 ["install.rb", "cookbooks/web/recipes/install.rb"],
86 ],
87 }
88 )
89 enricher = ChefEnricher(store)
90 result = enricher.enrich()
91
92 assert result.patterns_found["recipes"] == 2
93 assert result.promoted >= 2
94
95 # Verify _promote_node was called via store.query SET
96 set_calls = [c for c in store.query.call_args_list if "SET n.semantic_type" in str(c)]
97 assert len(set_calls) >= 2
98
99
100 class TestEnrichResources:
101 """Tests for enrich() promoting Chef resource calls."""
102
103 def test_promotes_resource_functions(self):
104 # _enrich_resources queries twice (recipes/ and libraries/),
105 # so we use a custom side_effect to return data only once.
106 call_count = {"resource": 0}
107 original_results = [
108 ["package", "cookbooks/web/recipes/default.rb"],
109 ["template", "cookbooks/web/recipes/default.rb"],
110 ["not_a_resource", "cookbooks/web/recipes/default.rb"],
111 ]
112
113 def _side_effect(query, params=None):
114 result = MagicMock()
115 if "(n:Function OR n:Method)" in query:
116 call_count["resource"] += 1
117 if call_count["resource"] == 1:
118 result.result_set = original_results
119 else:
120 result.result_set = []
121 else:
122 result.result_set = []
123 return result
124
125 store = MagicMock()
126 store.query.side_effect = _side_effect
127 enricher = ChefEnricher(store)
128 result = enricher.enrich()
129
130 # "package" and "template" match, "not_a_resource" does not
131 assert result.patterns_found["resources"] == 2
132
133 def test_skips_non_resource_functions(self):
134 store = _make_store(
135 {
136 "(n:Function OR n:Method)": [
137 ["my_helper", "cookbooks/web/libraries/helpers.rb"],
138 ],
139 }
140 )
141 enricher = ChefEnricher(store)
142 result = enricher.enrich()
143
144 assert result.patterns_found["resources"] == 0
145
146
147 class TestEnrichIncludeRecipe:
148 """Tests for enrich() handling include_recipe edges."""
149
150 def test_creates_depends_on_edge(self):
151 # Strategy 1: follow CALLS edges from include_recipe nodes
152 def _query_side_effect(query, params=None):
153 result = MagicMock()
154 if "[:CALLS]" in query and "n.name = $name" in query:
155 result.result_set = [
156 [
157 "cookbooks/web/recipes/default.rb",
158 "database::install",
159 ],
160 ]
161 elif "f.file_path CONTAINS $recipes" in query:
162 result.result_set = [["install.rb"]]
163 elif "f.file_path = $path" in query:
164 result.result_set = [["default.rb"]]
165 elif "MERGE" in query:
166 result.result_set = []
167 else:
168 result.result_set = []
169 return result
170
171 store = MagicMock()
172 store.query.side_effect = _query_side_effect
173 enricher = ChefEnricher(store)
174 result = enricher.enrich()
175
176 assert result.edges_added >= 1
177 assert result.patterns_found["include_recipe"] >= 1
178
179 # Verify MERGE query was issued for the DEPENDS_ON edge
180 merge_calls = [
181 c for c in store.query.call_args_list if "MERGE" in str(c) and "DEPENDS_ON" in str(c)
182 ]
183 assert len(merge_calls) >= 1
184
185 def test_no_edges_when_no_include_recipe(self):
186 store = _make_store()
187 enricher = ChefEnricher(store)
188 result = enricher.enrich()
189
190 assert result.edges_added == 0
191 assert result.patterns_found["include_recipe"] == 0
192
193
194 class TestEnrichCookbooks:
195 """Tests for enrich() promoting cookbook metadata files."""
196
197 def test_promotes_metadata_rb(self):
198 store = _make_store(
199 {
200 "n.name = $name": [
201 ["metadata.rb", "cookbooks/web/metadata.rb"],
202 ],
203 }
204 )
205 enricher = ChefEnricher(store)
206 result = enricher.enrich()
207
208 assert result.patterns_found["cookbooks"] == 1
209 set_calls = [c for c in store.query.call_args_list if "chef_cookbook" in str(c)]
210 assert len(set_calls) >= 1
--- a/tests/test_hcl_parser.py
+++ b/tests/test_hcl_parser.py
@@ -0,0 +1,503 @@
1
+"""Tests for navegador.ingestion.hcl — HCLParser internal methods."""
2
+
3
+from unittest.mock import MagicMock, patch
4
+
5
+import pytest
6
+
7
+from navegador.graph.schema import EdgeType, NodeLabel
8
+
9
+
10
+class MockNode:
11
+ _id_counter = 0
12
+
13
+ def __init__(
14
+ self,
15
+ type_: str,
16
+ text: bytes = b"",
17
+ children: list = None,
18
+ start_byte: int = 0,
19
+ end_byte: int = 0,
20
+ start_point: tuple = (0, 0),
21
+ end_point: tuple = (0, 0),
22
+ parent=None,
23
+ ):
24
+ MockNode._id_counter += 1
25
+ self.id = MockNode._id_counter
26
+ self.type = type_
27
+ self._text = text
28
+ self.children = children or []
29
+ self.start_byte = start_byte
30
+ self.end_byte = end_byte
31
+ self.start_point = start_point
32
+ self.end_point = end_point
33
+ self.parent = parent
34
+ self._fields: dict = {}
35
+ for child in self.children:
36
+ child.parent = self
37
+
38
+ def child_by_field_name(self, name: str):
39
+ return self._fields.get(name)
40
+
41
+ def set_field(self, name: str, node):
42
+ self._fields[name] = node
43
+ node.parent = self
44
+ return self
45
+
46
+
47
+def _text_node(text: bytes, type_: str = "identifier") -> MockNode:
48
+ return MockNode(type_, text, start_byte=0, end_byte=len(text))
49
+
50
+
51
+def _make_store():
52
+ store = MagicMock()
53
+ store.query.return_value = MagicMock(result_set=[])
54
+ return store
55
+
56
+
57
+def _make_parser():
58
+ from navegador.ingestion.hcl import HCLParser
59
+
60
+ parser = HCLParser.__new__(HCLParser)
61
+ parser._parser = MagicMock()
62
+ return parser
63
+
64
+
65
+class TestHCLGetLanguage:
66
+ def test_raises_when_not_installed(self):
67
+ from navegador.ingestion.hcl import _get_hcl_language
68
+
69
+ with patch.dict(
70
+ "sys.modules",
71
+ {
72
+ "tree_sitter_hcl": None,
73
+ "tree_sitter": None,
74
+ },
75
+ ):
76
+ with pytest.raises(ImportError, match="tree-sitter-hcl"):
77
+ _get_hcl_language()
78
+
79
+ def test_returns_language_object(self):
80
+ from navegador.ingestion.hcl import _get_hcl_language
81
+
82
+ mock_tshcl = MagicMock()
83
+ mock_ts = MagicMock()
84
+ with patch.dict(
85
+ "sys.modules",
86
+ {
87
+ "tree_sitter_hcl": mock_tshcl,
88
+ "tree_sitter": mock_ts,
89
+ },
90
+ ):
91
+ result = _get_hcl_language()
92
+ assert result is mock_ts.Language.return_value
93
+
94
+
95
+class TestHCLNodeText:
96
+ def test_extracts_bytes(self):
97
+ from navegador.ingestion.hcl import _node_text
98
+
99
+ source = b'resource "aws_instance" "web" {}'
100
+ node = MockNode("identifier", start_byte=10, end_byte=22)
101
+ assert _node_text(node, source) == "aws_instance"
102
+
103
+
104
+class TestHCLHandleResource:
105
+ def test_creates_class_node_with_semantic_type(self):
106
+ parser = _make_parser()
107
+ store = _make_store()
108
+ source = b'resource "aws_instance" "web" {}'
109
+ node = MockNode(
110
+ "block",
111
+ start_point=(0, 0),
112
+ end_point=(0, 30),
113
+ )
114
+ labels = ["aws_instance", "web"]
115
+ stats = {"functions": 0, "classes": 0, "edges": 0}
116
+ parser._handle_resource(node, source, "main.tf", store, stats, labels, None)
117
+ assert stats["classes"] == 1
118
+ assert stats["edges"] == 1
119
+ store.create_node.assert_called_once()
120
+ label = store.create_node.call_args[0][0]
121
+ props = store.create_node.call_args[0][1]
122
+ assert label == NodeLabel.Class
123
+ assert props["name"] == "aws_instance.web"
124
+ assert props["semantic_type"] == "terraform_resource"
125
+
126
+ def test_extracts_references_from_body(self):
127
+ parser = _make_parser()
128
+ store = _make_store()
129
+ source = b"var.region"
130
+ body = MockNode("body", start_byte=0, end_byte=10)
131
+ node = MockNode(
132
+ "block",
133
+ start_point=(0, 0),
134
+ end_point=(0, 30),
135
+ )
136
+ labels = ["aws_instance", "web"]
137
+ stats = {"functions": 0, "classes": 0, "edges": 0}
138
+ parser._handle_resource(node, source, "main.tf", store, stats, labels, body)
139
+ # 1 CONTAINS edge + 1 REFERENCES edge from var.region
140
+ assert stats["edges"] == 2
141
+
142
+
143
+class TestHCLHandleVariable:
144
+ def test_creates_variable_node(self):
145
+ parser = _make_parser()
146
+ store = _make_store()
147
+ source = b'variable "region" {}'
148
+ node = MockNode(
149
+ "block",
150
+ start_point=(0, 0),
151
+ end_point=(0, 19),
152
+ )
153
+ labels = ["region"]
154
+ stats = {"functions": 0, "classes": 0, "edges": 0}
155
+ parser._handle_variable(node, source, "vars.tf", store, stats, labels, None)
156
+ assert stats["functions"] == 1
157
+ assert stats["edges"] == 1
158
+ label = store.create_node.call_args[0][0]
159
+ props = store.create_node.call_args[0][1]
160
+ assert label == NodeLabel.Variable
161
+ assert props["name"] == "region"
162
+ assert props["semantic_type"] == "terraform_variable"
163
+
164
+
165
+class TestHCLHandleModule:
166
+ def test_creates_module_node(self):
167
+ parser = _make_parser()
168
+ store = _make_store()
169
+ source = b'module "vpc" {}'
170
+ node = MockNode(
171
+ "block",
172
+ start_point=(0, 0),
173
+ end_point=(0, 14),
174
+ )
175
+ labels = ["vpc"]
176
+ stats = {"functions": 0, "classes": 0, "edges": 0}
177
+ parser._handle_module(node, source, "main.tf", store, stats, labels, None)
178
+ assert stats["classes"] == 1
179
+ assert stats["edges"] == 1
180
+ label = store.create_node.call_args[0][0]
181
+ props = store.create_node.call_args[0][1]
182
+ assert label == NodeLabel.Module
183
+ assert props["name"] == "vpc"
184
+ assert props["semantic_type"] == "terraform_module"
185
+
186
+ def test_extracts_source_attribute(self):
187
+ parser = _make_parser()
188
+ store = _make_store()
189
+ full_src = b"source./modules/vpc"
190
+ ident_node = MockNode(
191
+ "identifier",
192
+ start_byte=0,
193
+ end_byte=6,
194
+ )
195
+ expr_node = MockNode(
196
+ "expression",
197
+ start_byte=6,
198
+ end_byte=19,
199
+ )
200
+ expr_node.is_named = True
201
+ attr_node = MockNode(
202
+ "attribute",
203
+ children=[ident_node, expr_node],
204
+ )
205
+ body_node = MockNode("body", children=[attr_node])
206
+ node = MockNode(
207
+ "block",
208
+ start_point=(0, 0),
209
+ end_point=(0, 30),
210
+ )
211
+ labels = ["vpc"]
212
+ stats = {"functions": 0, "classes": 0, "edges": 0}
213
+ parser._handle_module(node, full_src, "main.tf", store, stats, labels, body_node)
214
+ props = store.create_node.call_args[0][1]
215
+ assert props["source"] == "./modules/vpc"
216
+
217
+
218
+class TestHCLHandleOutput:
219
+ def test_creates_variable_node(self):
220
+ parser = _make_parser()
221
+ store = _make_store()
222
+ source = b'output "vpc_id" {}'
223
+ node = MockNode(
224
+ "block",
225
+ start_point=(0, 0),
226
+ end_point=(0, 17),
227
+ )
228
+ labels = ["vpc_id"]
229
+ stats = {"functions": 0, "classes": 0, "edges": 0}
230
+ parser._handle_output(node, source, "outputs.tf", store, stats, labels, None)
231
+ assert stats["functions"] == 1
232
+ assert stats["edges"] == 1
233
+ label = store.create_node.call_args[0][0]
234
+ props = store.create_node.call_args[0][1]
235
+ assert label == NodeLabel.Variable
236
+ assert props["semantic_type"] == "terraform_output"
237
+
238
+ def test_extracts_references_from_body(self):
239
+ parser = _make_parser()
240
+ store = _make_store()
241
+ source = b"module.vpc"
242
+ body = MockNode("body", start_byte=0, end_byte=10)
243
+ node = MockNode(
244
+ "block",
245
+ start_point=(0, 0),
246
+ end_point=(0, 17),
247
+ )
248
+ labels = ["vpc_id"]
249
+ stats = {"functions": 0, "classes": 0, "edges": 0}
250
+ parser._handle_output(node, source, "outputs.tf", store, stats, labels, body)
251
+ # 1 CONTAINS + 1 REFERENCES (module.vpc)
252
+ assert stats["edges"] == 2
253
+
254
+
255
+class TestHCLHandleProvider:
256
+ def test_creates_class_node(self):
257
+ parser = _make_parser()
258
+ store = _make_store()
259
+ source = b'provider "aws" {}'
260
+ node = MockNode(
261
+ "block",
262
+ start_point=(0, 0),
263
+ end_point=(0, 16),
264
+ )
265
+ labels = ["aws"]
266
+ stats = {"functions": 0, "classes": 0, "edges": 0}
267
+ parser._handle_provider(node, source, "provider.tf", store, stats, labels, None)
268
+ assert stats["classes"] == 1
269
+ assert stats["edges"] == 1
270
+ label = store.create_node.call_args[0][0]
271
+ props = store.create_node.call_args[0][1]
272
+ assert label == NodeLabel.Class
273
+ assert props["name"] == "aws"
274
+ assert props["semantic_type"] == "terraform_provider"
275
+
276
+
277
+class TestHCLHandleLocals:
278
+ def test_creates_variable_nodes(self):
279
+ parser = _make_parser()
280
+ store = _make_store()
281
+ source = b"region"
282
+ ident = MockNode(
283
+ "identifier",
284
+ start_byte=0,
285
+ end_byte=6,
286
+ )
287
+ attr = MockNode(
288
+ "attribute",
289
+ children=[ident],
290
+ start_point=(1, 0),
291
+ end_point=(1, 20),
292
+ )
293
+ body = MockNode("body", children=[attr])
294
+ node = MockNode(
295
+ "block",
296
+ start_point=(0, 0),
297
+ end_point=(2, 1),
298
+ )
299
+ stats = {"functions": 0, "classes": 0, "edges": 0}
300
+ parser._handle_locals(node, source, "locals.tf", store, stats, body)
301
+ assert stats["functions"] == 1
302
+ assert stats["edges"] >= 1
303
+ label = store.create_node.call_args[0][0]
304
+ props = store.create_node.call_args[0][1]
305
+ assert label == NodeLabel.Variable
306
+ assert props["semantic_type"] == "terraform_local"
307
+
308
+ def test_skips_when_no_body(self):
309
+ parser = _make_parser()
310
+ store = _make_store()
311
+ node = MockNode("block", start_point=(0, 0), end_point=(0, 5))
312
+ stats = {"functions": 0, "classes": 0, "edges": 0}
313
+ parser._handle_locals(node, b"", "locals.tf", store, stats, None)
314
+ assert stats["functions"] == 0
315
+ store.create_node.assert_not_called()
316
+
317
+
318
+class TestHCLWalkDispatch:
319
+ def test_walk_dispatches_block_in_body(self):
320
+ parser = _make_parser()
321
+ store = _make_store()
322
+ # Build: root > body > block(variable "region")
323
+ source = b'variable "region" {}'
324
+ ident = MockNode(
325
+ "identifier",
326
+ start_byte=0,
327
+ end_byte=8,
328
+ )
329
+ string_lit_inner = MockNode(
330
+ "template_literal",
331
+ start_byte=10,
332
+ end_byte=16,
333
+ )
334
+ string_lit = MockNode(
335
+ "string_lit",
336
+ children=[string_lit_inner],
337
+ start_byte=9,
338
+ end_byte=17,
339
+ )
340
+ block = MockNode(
341
+ "block",
342
+ children=[ident, string_lit],
343
+ start_point=(0, 0),
344
+ end_point=(0, 19),
345
+ )
346
+ body = MockNode("body", children=[block])
347
+ root = MockNode("config_file", children=[body])
348
+ stats = {"functions": 0, "classes": 0, "edges": 0}
349
+ parser._walk(root, source, "vars.tf", store, stats)
350
+ assert stats["functions"] == 1
351
+
352
+ def test_walk_dispatches_top_level_block(self):
353
+ parser = _make_parser()
354
+ store = _make_store()
355
+ source = b'provider "aws" {}'
356
+ ident = MockNode(
357
+ "identifier",
358
+ start_byte=0,
359
+ end_byte=8,
360
+ )
361
+ string_lit_inner = MockNode(
362
+ "template_literal",
363
+ start_byte=10,
364
+ end_byte=13,
365
+ )
366
+ string_lit = MockNode(
367
+ "string_lit",
368
+ children=[string_lit_inner],
369
+ start_byte=9,
370
+ end_byte=14,
371
+ )
372
+ block = MockNode(
373
+ "block",
374
+ children=[ident, string_lit],
375
+ start_point=(0, 0),
376
+ end_point=(0, 16),
377
+ )
378
+ root = MockNode("config_file", children=[block])
379
+ stats = {"functions": 0, "classes": 0, "edges": 0}
380
+ parser._walk(root, source, "main.tf", store, stats)
381
+ assert stats["classes"] == 1
382
+
383
+
384
+class TestHCLExtractReferences:
385
+ def test_finds_var_reference(self):
386
+ parser = _make_parser()
387
+ store = _make_store()
388
+ source = b"var.region"
389
+ node = MockNode("body", start_byte=0, end_byte=10)
390
+ stats = {"functions": 0, "classes": 0, "edges": 0}
391
+ parser._extract_references(
392
+ node,
393
+ source,
394
+ "main.tf",
395
+ "aws_instance.web",
396
+ NodeLabel.Class,
397
+ store,
398
+ stats,
399
+ )
400
+ assert stats["edges"] == 1
401
+ edge_call = store.create_edge.call_args[0]
402
+ assert edge_call[2] == EdgeType.REFERENCES
403
+ assert edge_call[4]["name"] == "region"
404
+
405
+ def test_finds_resource_reference(self):
406
+ parser = _make_parser()
407
+ store = _make_store()
408
+ source = b"aws_security_group.default"
409
+ node = MockNode("body", start_byte=0, end_byte=25)
410
+ stats = {"functions": 0, "classes": 0, "edges": 0}
411
+ parser._extract_references(
412
+ node,
413
+ source,
414
+ "main.tf",
415
+ "aws_instance.web",
416
+ NodeLabel.Class,
417
+ store,
418
+ stats,
419
+ )
420
+ assert stats["edges"] == 1
421
+ edge_call = store.create_edge.call_args[0]
422
+ assert edge_call[2] == EdgeType.DEPENDS_ON
423
+
424
+ def test_finds_local_reference(self):
425
+ parser = _make_parser()
426
+ store = _make_store()
427
+ source = b"local.common_tags"
428
+ node = MockNode("body", start_byte=0, end_byte=17)
429
+ stats = {"functions": 0, "classes": 0, "edges": 0}
430
+ parser._extract_references(
431
+ node,
432
+ source,
433
+ "main.tf",
434
+ "aws_instance.web",
435
+ NodeLabel.Class,
436
+ store,
437
+ stats,
438
+ )
439
+ assert stats["edges"] == 1
440
+
441
+ def test_finds_module_reference(self):
442
+ parser = _make_parser()
443
+ store = _make_store()
444
+ source = b"module.vpc"
445
+ node = MockNode("body", start_byte=0, end_byte=10)
446
+ stats = {"functions": 0, "classes": 0, "edges": 0}
447
+ parser._extract_references(
448
+ node,
449
+ source,
450
+ "main.tf",
451
+ "output_vpc",
452
+ NodeLabel.Variable,
453
+ store,
454
+ stats,
455
+ )
456
+ assert stats["edges"] == 1
457
+ edge_call = store.create_edge.call_args[0]
458
+ assert edge_call[3] == NodeLabel.Module
459
+
460
+ def test_finds_data_reference(self):
461
+ parser = _make_parser()
462
+ store = _make_store()
463
+ source = b"data.http.myip"
464
+ node = MockNode("body", start_byte=0, end_byte=14)
465
+ stats = {"functions": 0, "classes": 0, "edges": 0}
466
+ parser._extract_references(
467
+ node,
468
+ source,
469
+ "main.tf",
470
+ "aws_instance.web",
471
+ NodeLabel.Class,
472
+ store,
473
+ stats,
474
+ )
475
+ assert stats["edges"] == 1
476
+ edge_call = store.create_edge.call_args[0]
477
+ assert edge_call[2] == EdgeType.DEPENDS_ON
478
+ assert edge_call[4]["name"] == "http.myip"
479
+
480
+
481
+class TestHCLParseFile:
482
+ def test_creates_file_node(self):
483
+ import tempfile
484
+ from pathlib import Path
485
+
486
+ parser = _make_parser()
487
+ store = _make_store()
488
+ mock_tree = MagicMock()
489
+ mock_tree.root_node.type = "config_file"
490
+ mock_tree.root_node.children = []
491
+ parser._parser.parse.return_value = mock_tree
492
+ with tempfile.NamedTemporaryFile(suffix=".tf", delete=False) as f:
493
+ f.write(b'resource "aws_instance" "web" {}\n')
494
+ fpath = Path(f.name)
495
+ try:
496
+ parser.parse_file(fpath, fpath.parent, store)
497
+ store.create_node.assert_called_once()
498
+ label = store.create_node.call_args[0][0]
499
+ props = store.create_node.call_args[0][1]
500
+ assert label == NodeLabel.File
501
+ assert props["language"] == "hcl"
502
+ finally:
503
+ fpath.unlink()
--- a/tests/test_hcl_parser.py
+++ b/tests/test_hcl_parser.py
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_hcl_parser.py
+++ b/tests/test_hcl_parser.py
@@ -0,0 +1,503 @@
1 """Tests for navegador.ingestion.hcl — HCLParser internal methods."""
2
3 from unittest.mock import MagicMock, patch
4
5 import pytest
6
7 from navegador.graph.schema import EdgeType, NodeLabel
8
9
10 class MockNode:
11 _id_counter = 0
12
13 def __init__(
14 self,
15 type_: str,
16 text: bytes = b"",
17 children: list = None,
18 start_byte: int = 0,
19 end_byte: int = 0,
20 start_point: tuple = (0, 0),
21 end_point: tuple = (0, 0),
22 parent=None,
23 ):
24 MockNode._id_counter += 1
25 self.id = MockNode._id_counter
26 self.type = type_
27 self._text = text
28 self.children = children or []
29 self.start_byte = start_byte
30 self.end_byte = end_byte
31 self.start_point = start_point
32 self.end_point = end_point
33 self.parent = parent
34 self._fields: dict = {}
35 for child in self.children:
36 child.parent = self
37
38 def child_by_field_name(self, name: str):
39 return self._fields.get(name)
40
41 def set_field(self, name: str, node):
42 self._fields[name] = node
43 node.parent = self
44 return self
45
46
47 def _text_node(text: bytes, type_: str = "identifier") -> MockNode:
48 return MockNode(type_, text, start_byte=0, end_byte=len(text))
49
50
51 def _make_store():
52 store = MagicMock()
53 store.query.return_value = MagicMock(result_set=[])
54 return store
55
56
57 def _make_parser():
58 from navegador.ingestion.hcl import HCLParser
59
60 parser = HCLParser.__new__(HCLParser)
61 parser._parser = MagicMock()
62 return parser
63
64
65 class TestHCLGetLanguage:
66 def test_raises_when_not_installed(self):
67 from navegador.ingestion.hcl import _get_hcl_language
68
69 with patch.dict(
70 "sys.modules",
71 {
72 "tree_sitter_hcl": None,
73 "tree_sitter": None,
74 },
75 ):
76 with pytest.raises(ImportError, match="tree-sitter-hcl"):
77 _get_hcl_language()
78
79 def test_returns_language_object(self):
80 from navegador.ingestion.hcl import _get_hcl_language
81
82 mock_tshcl = MagicMock()
83 mock_ts = MagicMock()
84 with patch.dict(
85 "sys.modules",
86 {
87 "tree_sitter_hcl": mock_tshcl,
88 "tree_sitter": mock_ts,
89 },
90 ):
91 result = _get_hcl_language()
92 assert result is mock_ts.Language.return_value
93
94
95 class TestHCLNodeText:
96 def test_extracts_bytes(self):
97 from navegador.ingestion.hcl import _node_text
98
99 source = b'resource "aws_instance" "web" {}'
100 node = MockNode("identifier", start_byte=10, end_byte=22)
101 assert _node_text(node, source) == "aws_instance"
102
103
104 class TestHCLHandleResource:
105 def test_creates_class_node_with_semantic_type(self):
106 parser = _make_parser()
107 store = _make_store()
108 source = b'resource "aws_instance" "web" {}'
109 node = MockNode(
110 "block",
111 start_point=(0, 0),
112 end_point=(0, 30),
113 )
114 labels = ["aws_instance", "web"]
115 stats = {"functions": 0, "classes": 0, "edges": 0}
116 parser._handle_resource(node, source, "main.tf", store, stats, labels, None)
117 assert stats["classes"] == 1
118 assert stats["edges"] == 1
119 store.create_node.assert_called_once()
120 label = store.create_node.call_args[0][0]
121 props = store.create_node.call_args[0][1]
122 assert label == NodeLabel.Class
123 assert props["name"] == "aws_instance.web"
124 assert props["semantic_type"] == "terraform_resource"
125
126 def test_extracts_references_from_body(self):
127 parser = _make_parser()
128 store = _make_store()
129 source = b"var.region"
130 body = MockNode("body", start_byte=0, end_byte=10)
131 node = MockNode(
132 "block",
133 start_point=(0, 0),
134 end_point=(0, 30),
135 )
136 labels = ["aws_instance", "web"]
137 stats = {"functions": 0, "classes": 0, "edges": 0}
138 parser._handle_resource(node, source, "main.tf", store, stats, labels, body)
139 # 1 CONTAINS edge + 1 REFERENCES edge from var.region
140 assert stats["edges"] == 2
141
142
143 class TestHCLHandleVariable:
144 def test_creates_variable_node(self):
145 parser = _make_parser()
146 store = _make_store()
147 source = b'variable "region" {}'
148 node = MockNode(
149 "block",
150 start_point=(0, 0),
151 end_point=(0, 19),
152 )
153 labels = ["region"]
154 stats = {"functions": 0, "classes": 0, "edges": 0}
155 parser._handle_variable(node, source, "vars.tf", store, stats, labels, None)
156 assert stats["functions"] == 1
157 assert stats["edges"] == 1
158 label = store.create_node.call_args[0][0]
159 props = store.create_node.call_args[0][1]
160 assert label == NodeLabel.Variable
161 assert props["name"] == "region"
162 assert props["semantic_type"] == "terraform_variable"
163
164
165 class TestHCLHandleModule:
166 def test_creates_module_node(self):
167 parser = _make_parser()
168 store = _make_store()
169 source = b'module "vpc" {}'
170 node = MockNode(
171 "block",
172 start_point=(0, 0),
173 end_point=(0, 14),
174 )
175 labels = ["vpc"]
176 stats = {"functions": 0, "classes": 0, "edges": 0}
177 parser._handle_module(node, source, "main.tf", store, stats, labels, None)
178 assert stats["classes"] == 1
179 assert stats["edges"] == 1
180 label = store.create_node.call_args[0][0]
181 props = store.create_node.call_args[0][1]
182 assert label == NodeLabel.Module
183 assert props["name"] == "vpc"
184 assert props["semantic_type"] == "terraform_module"
185
186 def test_extracts_source_attribute(self):
187 parser = _make_parser()
188 store = _make_store()
189 full_src = b"source./modules/vpc"
190 ident_node = MockNode(
191 "identifier",
192 start_byte=0,
193 end_byte=6,
194 )
195 expr_node = MockNode(
196 "expression",
197 start_byte=6,
198 end_byte=19,
199 )
200 expr_node.is_named = True
201 attr_node = MockNode(
202 "attribute",
203 children=[ident_node, expr_node],
204 )
205 body_node = MockNode("body", children=[attr_node])
206 node = MockNode(
207 "block",
208 start_point=(0, 0),
209 end_point=(0, 30),
210 )
211 labels = ["vpc"]
212 stats = {"functions": 0, "classes": 0, "edges": 0}
213 parser._handle_module(node, full_src, "main.tf", store, stats, labels, body_node)
214 props = store.create_node.call_args[0][1]
215 assert props["source"] == "./modules/vpc"
216
217
218 class TestHCLHandleOutput:
219 def test_creates_variable_node(self):
220 parser = _make_parser()
221 store = _make_store()
222 source = b'output "vpc_id" {}'
223 node = MockNode(
224 "block",
225 start_point=(0, 0),
226 end_point=(0, 17),
227 )
228 labels = ["vpc_id"]
229 stats = {"functions": 0, "classes": 0, "edges": 0}
230 parser._handle_output(node, source, "outputs.tf", store, stats, labels, None)
231 assert stats["functions"] == 1
232 assert stats["edges"] == 1
233 label = store.create_node.call_args[0][0]
234 props = store.create_node.call_args[0][1]
235 assert label == NodeLabel.Variable
236 assert props["semantic_type"] == "terraform_output"
237
238 def test_extracts_references_from_body(self):
239 parser = _make_parser()
240 store = _make_store()
241 source = b"module.vpc"
242 body = MockNode("body", start_byte=0, end_byte=10)
243 node = MockNode(
244 "block",
245 start_point=(0, 0),
246 end_point=(0, 17),
247 )
248 labels = ["vpc_id"]
249 stats = {"functions": 0, "classes": 0, "edges": 0}
250 parser._handle_output(node, source, "outputs.tf", store, stats, labels, body)
251 # 1 CONTAINS + 1 REFERENCES (module.vpc)
252 assert stats["edges"] == 2
253
254
255 class TestHCLHandleProvider:
256 def test_creates_class_node(self):
257 parser = _make_parser()
258 store = _make_store()
259 source = b'provider "aws" {}'
260 node = MockNode(
261 "block",
262 start_point=(0, 0),
263 end_point=(0, 16),
264 )
265 labels = ["aws"]
266 stats = {"functions": 0, "classes": 0, "edges": 0}
267 parser._handle_provider(node, source, "provider.tf", store, stats, labels, None)
268 assert stats["classes"] == 1
269 assert stats["edges"] == 1
270 label = store.create_node.call_args[0][0]
271 props = store.create_node.call_args[0][1]
272 assert label == NodeLabel.Class
273 assert props["name"] == "aws"
274 assert props["semantic_type"] == "terraform_provider"
275
276
277 class TestHCLHandleLocals:
278 def test_creates_variable_nodes(self):
279 parser = _make_parser()
280 store = _make_store()
281 source = b"region"
282 ident = MockNode(
283 "identifier",
284 start_byte=0,
285 end_byte=6,
286 )
287 attr = MockNode(
288 "attribute",
289 children=[ident],
290 start_point=(1, 0),
291 end_point=(1, 20),
292 )
293 body = MockNode("body", children=[attr])
294 node = MockNode(
295 "block",
296 start_point=(0, 0),
297 end_point=(2, 1),
298 )
299 stats = {"functions": 0, "classes": 0, "edges": 0}
300 parser._handle_locals(node, source, "locals.tf", store, stats, body)
301 assert stats["functions"] == 1
302 assert stats["edges"] >= 1
303 label = store.create_node.call_args[0][0]
304 props = store.create_node.call_args[0][1]
305 assert label == NodeLabel.Variable
306 assert props["semantic_type"] == "terraform_local"
307
308 def test_skips_when_no_body(self):
309 parser = _make_parser()
310 store = _make_store()
311 node = MockNode("block", start_point=(0, 0), end_point=(0, 5))
312 stats = {"functions": 0, "classes": 0, "edges": 0}
313 parser._handle_locals(node, b"", "locals.tf", store, stats, None)
314 assert stats["functions"] == 0
315 store.create_node.assert_not_called()
316
317
318 class TestHCLWalkDispatch:
319 def test_walk_dispatches_block_in_body(self):
320 parser = _make_parser()
321 store = _make_store()
322 # Build: root > body > block(variable "region")
323 source = b'variable "region" {}'
324 ident = MockNode(
325 "identifier",
326 start_byte=0,
327 end_byte=8,
328 )
329 string_lit_inner = MockNode(
330 "template_literal",
331 start_byte=10,
332 end_byte=16,
333 )
334 string_lit = MockNode(
335 "string_lit",
336 children=[string_lit_inner],
337 start_byte=9,
338 end_byte=17,
339 )
340 block = MockNode(
341 "block",
342 children=[ident, string_lit],
343 start_point=(0, 0),
344 end_point=(0, 19),
345 )
346 body = MockNode("body", children=[block])
347 root = MockNode("config_file", children=[body])
348 stats = {"functions": 0, "classes": 0, "edges": 0}
349 parser._walk(root, source, "vars.tf", store, stats)
350 assert stats["functions"] == 1
351
352 def test_walk_dispatches_top_level_block(self):
353 parser = _make_parser()
354 store = _make_store()
355 source = b'provider "aws" {}'
356 ident = MockNode(
357 "identifier",
358 start_byte=0,
359 end_byte=8,
360 )
361 string_lit_inner = MockNode(
362 "template_literal",
363 start_byte=10,
364 end_byte=13,
365 )
366 string_lit = MockNode(
367 "string_lit",
368 children=[string_lit_inner],
369 start_byte=9,
370 end_byte=14,
371 )
372 block = MockNode(
373 "block",
374 children=[ident, string_lit],
375 start_point=(0, 0),
376 end_point=(0, 16),
377 )
378 root = MockNode("config_file", children=[block])
379 stats = {"functions": 0, "classes": 0, "edges": 0}
380 parser._walk(root, source, "main.tf", store, stats)
381 assert stats["classes"] == 1
382
383
384 class TestHCLExtractReferences:
385 def test_finds_var_reference(self):
386 parser = _make_parser()
387 store = _make_store()
388 source = b"var.region"
389 node = MockNode("body", start_byte=0, end_byte=10)
390 stats = {"functions": 0, "classes": 0, "edges": 0}
391 parser._extract_references(
392 node,
393 source,
394 "main.tf",
395 "aws_instance.web",
396 NodeLabel.Class,
397 store,
398 stats,
399 )
400 assert stats["edges"] == 1
401 edge_call = store.create_edge.call_args[0]
402 assert edge_call[2] == EdgeType.REFERENCES
403 assert edge_call[4]["name"] == "region"
404
405 def test_finds_resource_reference(self):
406 parser = _make_parser()
407 store = _make_store()
408 source = b"aws_security_group.default"
409 node = MockNode("body", start_byte=0, end_byte=25)
410 stats = {"functions": 0, "classes": 0, "edges": 0}
411 parser._extract_references(
412 node,
413 source,
414 "main.tf",
415 "aws_instance.web",
416 NodeLabel.Class,
417 store,
418 stats,
419 )
420 assert stats["edges"] == 1
421 edge_call = store.create_edge.call_args[0]
422 assert edge_call[2] == EdgeType.DEPENDS_ON
423
424 def test_finds_local_reference(self):
425 parser = _make_parser()
426 store = _make_store()
427 source = b"local.common_tags"
428 node = MockNode("body", start_byte=0, end_byte=17)
429 stats = {"functions": 0, "classes": 0, "edges": 0}
430 parser._extract_references(
431 node,
432 source,
433 "main.tf",
434 "aws_instance.web",
435 NodeLabel.Class,
436 store,
437 stats,
438 )
439 assert stats["edges"] == 1
440
441 def test_finds_module_reference(self):
442 parser = _make_parser()
443 store = _make_store()
444 source = b"module.vpc"
445 node = MockNode("body", start_byte=0, end_byte=10)
446 stats = {"functions": 0, "classes": 0, "edges": 0}
447 parser._extract_references(
448 node,
449 source,
450 "main.tf",
451 "output_vpc",
452 NodeLabel.Variable,
453 store,
454 stats,
455 )
456 assert stats["edges"] == 1
457 edge_call = store.create_edge.call_args[0]
458 assert edge_call[3] == NodeLabel.Module
459
460 def test_finds_data_reference(self):
461 parser = _make_parser()
462 store = _make_store()
463 source = b"data.http.myip"
464 node = MockNode("body", start_byte=0, end_byte=14)
465 stats = {"functions": 0, "classes": 0, "edges": 0}
466 parser._extract_references(
467 node,
468 source,
469 "main.tf",
470 "aws_instance.web",
471 NodeLabel.Class,
472 store,
473 stats,
474 )
475 assert stats["edges"] == 1
476 edge_call = store.create_edge.call_args[0]
477 assert edge_call[2] == EdgeType.DEPENDS_ON
478 assert edge_call[4]["name"] == "http.myip"
479
480
481 class TestHCLParseFile:
482 def test_creates_file_node(self):
483 import tempfile
484 from pathlib import Path
485
486 parser = _make_parser()
487 store = _make_store()
488 mock_tree = MagicMock()
489 mock_tree.root_node.type = "config_file"
490 mock_tree.root_node.children = []
491 parser._parser.parse.return_value = mock_tree
492 with tempfile.NamedTemporaryFile(suffix=".tf", delete=False) as f:
493 f.write(b'resource "aws_instance" "web" {}\n')
494 fpath = Path(f.name)
495 try:
496 parser.parse_file(fpath, fpath.parent, store)
497 store.create_node.assert_called_once()
498 label = store.create_node.call_args[0][0]
499 props = store.create_node.call_args[0][1]
500 assert label == NodeLabel.File
501 assert props["language"] == "hcl"
502 finally:
503 fpath.unlink()
--- a/tests/test_puppet_parser.py
+++ b/tests/test_puppet_parser.py
@@ -0,0 +1,509 @@
1
+"""Tests for navegador.ingestion.puppet — PuppetParser internal methods."""
2
+
3
+from unittest.mock import MagicMock, patch
4
+
5
+import pytest
6
+
7
+from navegador.graph.schema import NodeLabel
8
+
9
+
10
+class MockNode:
11
+ _id_counter = 0
12
+
13
+ def __init__(
14
+ self,
15
+ type_: str,
16
+ text: bytes = b"",
17
+ children: list = None,
18
+ start_byte: int = 0,
19
+ end_byte: int = 0,
20
+ start_point: tuple = (0, 0),
21
+ end_point: tuple = (0, 0),
22
+ parent=None,
23
+ ):
24
+ MockNode._id_counter += 1
25
+ self.id = MockNode._id_counter
26
+ self.type = type_
27
+ self._text = text
28
+ self.children = children or []
29
+ self.start_byte = start_byte
30
+ self.end_byte = end_byte
31
+ self.start_point = start_point
32
+ self.end_point = end_point
33
+ self.parent = parent
34
+ self._fields: dict = {}
35
+ for child in self.children:
36
+ child.parent = self
37
+
38
+ def child_by_field_name(self, name: str):
39
+ return self._fields.get(name)
40
+
41
+ def set_field(self, name: str, node):
42
+ self._fields[name] = node
43
+ node.parent = self
44
+ return self
45
+
46
+
47
+def _text_node(text: bytes, type_: str = "identifier") -> MockNode:
48
+ return MockNode(type_, text, start_byte=0, end_byte=len(text))
49
+
50
+
51
+def _make_store():
52
+ store = MagicMock()
53
+ store.query.return_value = MagicMock(result_set=[])
54
+ return store
55
+
56
+
57
+def _make_parser():
58
+ from navegador.ingestion.puppet import PuppetParser
59
+
60
+ parser = PuppetParser.__new__(PuppetParser)
61
+ parser._parser = MagicMock()
62
+ return parser
63
+
64
+
65
+class TestPuppetGetLanguage:
66
+ def test_raises_when_not_installed(self):
67
+ from navegador.ingestion.puppet import _get_puppet_language
68
+
69
+ with patch.dict(
70
+ "sys.modules",
71
+ {
72
+ "tree_sitter_puppet": None,
73
+ "tree_sitter": None,
74
+ },
75
+ ):
76
+ with pytest.raises(ImportError, match="tree-sitter-puppet"):
77
+ _get_puppet_language()
78
+
79
+ def test_returns_language_object(self):
80
+ from navegador.ingestion.puppet import _get_puppet_language
81
+
82
+ mock_tspuppet = MagicMock()
83
+ mock_ts = MagicMock()
84
+ with patch.dict(
85
+ "sys.modules",
86
+ {
87
+ "tree_sitter_puppet": mock_tspuppet,
88
+ "tree_sitter": mock_ts,
89
+ },
90
+ ):
91
+ result = _get_puppet_language()
92
+ assert result is mock_ts.Language.return_value
93
+
94
+
95
+class TestPuppetHandleClass:
96
+ def test_creates_class_with_puppet_class_semantic_type(self):
97
+ parser = _make_parser()
98
+ store = _make_store()
99
+ source = b"nginx"
100
+ class_ident = MockNode(
101
+ "class_identifier",
102
+ children=[
103
+ MockNode(
104
+ "identifier",
105
+ start_byte=0,
106
+ end_byte=5,
107
+ ),
108
+ ],
109
+ start_byte=0,
110
+ end_byte=5,
111
+ )
112
+ node = MockNode(
113
+ "class_definition",
114
+ children=[class_ident],
115
+ start_point=(0, 0),
116
+ end_point=(5, 1),
117
+ )
118
+ stats = {"functions": 0, "classes": 0, "edges": 0}
119
+ parser._handle_class(node, source, "nginx.pp", store, stats)
120
+ assert stats["classes"] == 1
121
+ assert stats["edges"] == 1
122
+ label = store.create_node.call_args[0][0]
123
+ props = store.create_node.call_args[0][1]
124
+ assert label == NodeLabel.Class
125
+ assert props["name"] == "nginx"
126
+ assert props["semantic_type"] == "puppet_class"
127
+
128
+ def test_skips_when_no_class_identifier(self):
129
+ parser = _make_parser()
130
+ store = _make_store()
131
+ node = MockNode(
132
+ "class_definition",
133
+ children=[],
134
+ start_point=(0, 0),
135
+ end_point=(0, 5),
136
+ )
137
+ stats = {"functions": 0, "classes": 0, "edges": 0}
138
+ parser._handle_class(node, b"", "test.pp", store, stats)
139
+ assert stats["classes"] == 0
140
+ store.create_node.assert_not_called()
141
+
142
+
143
+class TestPuppetHandleDefinedType:
144
+ def test_creates_class_with_puppet_defined_type(self):
145
+ parser = _make_parser()
146
+ store = _make_store()
147
+ source = b"nginx::vhost"
148
+ class_ident = MockNode(
149
+ "class_identifier",
150
+ children=[
151
+ MockNode(
152
+ "identifier",
153
+ start_byte=0,
154
+ end_byte=5,
155
+ ),
156
+ MockNode(
157
+ "identifier",
158
+ start_byte=7,
159
+ end_byte=12,
160
+ ),
161
+ ],
162
+ start_byte=0,
163
+ end_byte=12,
164
+ )
165
+ node = MockNode(
166
+ "defined_resource_type",
167
+ children=[class_ident],
168
+ start_point=(0, 0),
169
+ end_point=(3, 1),
170
+ )
171
+ stats = {"functions": 0, "classes": 0, "edges": 0}
172
+ parser._handle_defined_type(node, source, "vhost.pp", store, stats)
173
+ assert stats["classes"] == 1
174
+ label = store.create_node.call_args[0][0]
175
+ props = store.create_node.call_args[0][1]
176
+ assert label == NodeLabel.Class
177
+ assert props["name"] == "nginx::vhost"
178
+ assert props["semantic_type"] == "puppet_defined_type"
179
+
180
+
181
+class TestPuppetHandleNode:
182
+ def test_creates_class_with_puppet_node(self):
183
+ parser = _make_parser()
184
+ store = _make_store()
185
+ source = b"'webserver'"
186
+ string_node = MockNode(
187
+ "string",
188
+ start_byte=0,
189
+ end_byte=11,
190
+ )
191
+ node_name = MockNode(
192
+ "node_name",
193
+ children=[string_node],
194
+ )
195
+ node = MockNode(
196
+ "node_definition",
197
+ children=[node_name],
198
+ start_point=(0, 0),
199
+ end_point=(3, 1),
200
+ )
201
+ stats = {"functions": 0, "classes": 0, "edges": 0}
202
+ parser._handle_node(node, source, "nodes.pp", store, stats)
203
+ assert stats["classes"] == 1
204
+ label = store.create_node.call_args[0][0]
205
+ props = store.create_node.call_args[0][1]
206
+ assert label == NodeLabel.Class
207
+ assert props["name"] == "webserver"
208
+ assert props["semantic_type"] == "puppet_node"
209
+
210
+ def test_skips_when_no_node_name(self):
211
+ parser = _make_parser()
212
+ store = _make_store()
213
+ node = MockNode(
214
+ "node_definition",
215
+ children=[],
216
+ start_point=(0, 0),
217
+ end_point=(0, 5),
218
+ )
219
+ stats = {"functions": 0, "classes": 0, "edges": 0}
220
+ parser._handle_node(node, b"", "nodes.pp", store, stats)
221
+ assert stats["classes"] == 0
222
+
223
+
224
+class TestPuppetHandleResource:
225
+ def test_creates_function_with_puppet_resource(self):
226
+ parser = _make_parser()
227
+ store = _make_store()
228
+ source = b"package 'nginx'"
229
+ ident = MockNode(
230
+ "identifier",
231
+ start_byte=0,
232
+ end_byte=7,
233
+ )
234
+ title = MockNode(
235
+ "string",
236
+ start_byte=8,
237
+ end_byte=15,
238
+ )
239
+ node = MockNode(
240
+ "resource_declaration",
241
+ children=[ident, title],
242
+ start_point=(1, 0),
243
+ end_point=(3, 1),
244
+ )
245
+ stats = {"functions": 0, "classes": 0, "edges": 0}
246
+ parser._handle_resource(node, source, "nginx.pp", "nginx", store, stats)
247
+ assert stats["functions"] == 1
248
+ assert stats["edges"] == 1
249
+ label = store.create_node.call_args[0][0]
250
+ props = store.create_node.call_args[0][1]
251
+ assert label == NodeLabel.Function
252
+ assert props["name"] == "package[nginx]"
253
+ assert props["semantic_type"] == "puppet_resource"
254
+
255
+ def test_skips_when_no_type_identifier(self):
256
+ parser = _make_parser()
257
+ store = _make_store()
258
+ node = MockNode(
259
+ "resource_declaration",
260
+ children=[],
261
+ start_point=(0, 0),
262
+ end_point=(0, 5),
263
+ )
264
+ stats = {"functions": 0, "classes": 0, "edges": 0}
265
+ parser._handle_resource(node, b"", "test.pp", "myclass", store, stats)
266
+ assert stats["functions"] == 0
267
+
268
+
269
+class TestPuppetHandleInclude:
270
+ def test_creates_import_node(self):
271
+ parser = _make_parser()
272
+ store = _make_store()
273
+ source = b"stdlib"
274
+ class_ident = MockNode(
275
+ "class_identifier",
276
+ children=[
277
+ MockNode(
278
+ "identifier",
279
+ start_byte=0,
280
+ end_byte=6,
281
+ ),
282
+ ],
283
+ )
284
+ node = MockNode(
285
+ "include_statement",
286
+ children=[class_ident],
287
+ start_point=(0, 0),
288
+ end_point=(0, 14),
289
+ )
290
+ stats = {"functions": 0, "classes": 0, "edges": 0}
291
+ parser._handle_include(node, source, "init.pp", store, stats)
292
+ assert stats["edges"] == 1
293
+ label = store.create_node.call_args[0][0]
294
+ props = store.create_node.call_args[0][1]
295
+ assert label == NodeLabel.Import
296
+ assert props["name"] == "stdlib"
297
+ assert props["semantic_type"] == "puppet_include"
298
+
299
+ def test_skips_when_no_class_identifier(self):
300
+ parser = _make_parser()
301
+ store = _make_store()
302
+ node = MockNode(
303
+ "include_statement",
304
+ children=[],
305
+ start_point=(0, 0),
306
+ end_point=(0, 7),
307
+ )
308
+ stats = {"functions": 0, "classes": 0, "edges": 0}
309
+ parser._handle_include(node, b"", "init.pp", store, stats)
310
+ assert stats["edges"] == 0
311
+ store.create_node.assert_not_called()
312
+
313
+
314
+class TestPuppetHandleParameters:
315
+ def test_creates_variable_nodes(self):
316
+ parser = _make_parser()
317
+ store = _make_store()
318
+ source = b"$port"
319
+ var_node = MockNode(
320
+ "variable",
321
+ start_byte=0,
322
+ end_byte=5,
323
+ )
324
+ param = MockNode(
325
+ "parameter",
326
+ children=[var_node],
327
+ start_point=(1, 2),
328
+ end_point=(1, 7),
329
+ )
330
+ param_list = MockNode(
331
+ "parameter_list",
332
+ children=[param],
333
+ )
334
+ class_ident = MockNode(
335
+ "class_identifier",
336
+ children=[
337
+ MockNode(
338
+ "identifier",
339
+ start_byte=0,
340
+ end_byte=5,
341
+ ),
342
+ ],
343
+ start_byte=0,
344
+ end_byte=5,
345
+ )
346
+ node = MockNode(
347
+ "class_definition",
348
+ children=[class_ident, param_list],
349
+ start_point=(0, 0),
350
+ end_point=(5, 1),
351
+ )
352
+ stats = {"functions": 0, "classes": 0, "edges": 0}
353
+ parser._extract_parameters(node, source, "nginx.pp", "nginx", store, stats)
354
+ store.create_node.assert_called_once()
355
+ label = store.create_node.call_args[0][0]
356
+ props = store.create_node.call_args[0][1]
357
+ assert label == NodeLabel.Variable
358
+ assert props["name"] == "port"
359
+ assert props["semantic_type"] == "puppet_parameter"
360
+ assert stats["edges"] == 1
361
+
362
+ def test_skips_param_without_variable(self):
363
+ parser = _make_parser()
364
+ store = _make_store()
365
+ param = MockNode(
366
+ "parameter",
367
+ children=[MockNode("type")],
368
+ start_point=(1, 2),
369
+ end_point=(1, 7),
370
+ )
371
+ param_list = MockNode(
372
+ "parameter_list",
373
+ children=[param],
374
+ )
375
+ node = MockNode(
376
+ "class_definition",
377
+ children=[param_list],
378
+ start_point=(0, 0),
379
+ end_point=(5, 1),
380
+ )
381
+ stats = {"functions": 0, "classes": 0, "edges": 0}
382
+ parser._extract_parameters(node, b"", "test.pp", "myclass", store, stats)
383
+ store.create_node.assert_not_called()
384
+
385
+
386
+class TestPuppetWalkDispatch:
387
+ def test_walk_dispatches_class_definition(self):
388
+ parser = _make_parser()
389
+ store = _make_store()
390
+ source = b"nginx"
391
+ class_ident = MockNode(
392
+ "class_identifier",
393
+ children=[
394
+ MockNode(
395
+ "identifier",
396
+ start_byte=0,
397
+ end_byte=5,
398
+ ),
399
+ ],
400
+ )
401
+ class_def = MockNode(
402
+ "class_definition",
403
+ children=[class_ident],
404
+ start_point=(0, 0),
405
+ end_point=(5, 1),
406
+ )
407
+ root = MockNode("program", children=[class_def])
408
+ stats = {"functions": 0, "classes": 0, "edges": 0}
409
+ parser._walk(root, source, "nginx.pp", store, stats)
410
+ assert stats["classes"] == 1
411
+
412
+ def test_walk_dispatches_defined_resource_type(self):
413
+ parser = _make_parser()
414
+ store = _make_store()
415
+ source = b"vhost"
416
+ class_ident = MockNode(
417
+ "class_identifier",
418
+ children=[
419
+ MockNode(
420
+ "identifier",
421
+ start_byte=0,
422
+ end_byte=5,
423
+ ),
424
+ ],
425
+ )
426
+ define_node = MockNode(
427
+ "defined_resource_type",
428
+ children=[class_ident],
429
+ start_point=(0, 0),
430
+ end_point=(3, 1),
431
+ )
432
+ root = MockNode("program", children=[define_node])
433
+ stats = {"functions": 0, "classes": 0, "edges": 0}
434
+ parser._walk(root, source, "vhost.pp", store, stats)
435
+ assert stats["classes"] == 1
436
+
437
+ def test_walk_dispatches_node_definition(self):
438
+ parser = _make_parser()
439
+ store = _make_store()
440
+ source = b"'webserver'"
441
+ string_node = MockNode(
442
+ "string",
443
+ start_byte=0,
444
+ end_byte=11,
445
+ )
446
+ node_name = MockNode(
447
+ "node_name",
448
+ children=[string_node],
449
+ )
450
+ node_def = MockNode(
451
+ "node_definition",
452
+ children=[node_name],
453
+ start_point=(0, 0),
454
+ end_point=(3, 1),
455
+ )
456
+ root = MockNode("program", children=[node_def])
457
+ stats = {"functions": 0, "classes": 0, "edges": 0}
458
+ parser._walk(root, source, "nodes.pp", store, stats)
459
+ assert stats["classes"] == 1
460
+
461
+ def test_walk_dispatches_include_statement(self):
462
+ parser = _make_parser()
463
+ store = _make_store()
464
+ source = b"stdlib"
465
+ class_ident = MockNode(
466
+ "class_identifier",
467
+ children=[
468
+ MockNode(
469
+ "identifier",
470
+ start_byte=0,
471
+ end_byte=6,
472
+ ),
473
+ ],
474
+ )
475
+ include = MockNode(
476
+ "include_statement",
477
+ children=[class_ident],
478
+ start_point=(0, 0),
479
+ end_point=(0, 14),
480
+ )
481
+ root = MockNode("program", children=[include])
482
+ stats = {"functions": 0, "classes": 0, "edges": 0}
483
+ parser._walk(root, source, "init.pp", store, stats)
484
+ assert stats["edges"] == 1
485
+
486
+
487
+class TestPuppetParseFile:
488
+ def test_creates_file_node(self):
489
+ import tempfile
490
+ from pathlib import Path
491
+
492
+ parser = _make_parser()
493
+ store = _make_store()
494
+ mock_tree = MagicMock()
495
+ mock_tree.root_node.type = "program"
496
+ mock_tree.root_node.children = []
497
+ parser._parser.parse.return_value = mock_tree
498
+ with tempfile.NamedTemporaryFile(suffix=".pp", delete=False) as f:
499
+ f.write(b"class nginx {}\n")
500
+ fpath = Path(f.name)
501
+ try:
502
+ parser.parse_file(fpath, fpath.parent, store)
503
+ store.create_node.assert_called_once()
504
+ label = store.create_node.call_args[0][0]
505
+ props = store.create_node.call_args[0][1]
506
+ assert label == NodeLabel.File
507
+ assert props["language"] == "puppet"
508
+ finally:
509
+ fpath.unlink()
--- a/tests/test_puppet_parser.py
+++ b/tests/test_puppet_parser.py
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
--- a/tests/test_puppet_parser.py
+++ b/tests/test_puppet_parser.py
@@ -0,0 +1,509 @@
1 """Tests for navegador.ingestion.puppet — PuppetParser internal methods."""
2
3 from unittest.mock import MagicMock, patch
4
5 import pytest
6
7 from navegador.graph.schema import NodeLabel
8
9
10 class MockNode:
11 _id_counter = 0
12
13 def __init__(
14 self,
15 type_: str,
16 text: bytes = b"",
17 children: list = None,
18 start_byte: int = 0,
19 end_byte: int = 0,
20 start_point: tuple = (0, 0),
21 end_point: tuple = (0, 0),
22 parent=None,
23 ):
24 MockNode._id_counter += 1
25 self.id = MockNode._id_counter
26 self.type = type_
27 self._text = text
28 self.children = children or []
29 self.start_byte = start_byte
30 self.end_byte = end_byte
31 self.start_point = start_point
32 self.end_point = end_point
33 self.parent = parent
34 self._fields: dict = {}
35 for child in self.children:
36 child.parent = self
37
38 def child_by_field_name(self, name: str):
39 return self._fields.get(name)
40
41 def set_field(self, name: str, node):
42 self._fields[name] = node
43 node.parent = self
44 return self
45
46
47 def _text_node(text: bytes, type_: str = "identifier") -> MockNode:
48 return MockNode(type_, text, start_byte=0, end_byte=len(text))
49
50
51 def _make_store():
52 store = MagicMock()
53 store.query.return_value = MagicMock(result_set=[])
54 return store
55
56
57 def _make_parser():
58 from navegador.ingestion.puppet import PuppetParser
59
60 parser = PuppetParser.__new__(PuppetParser)
61 parser._parser = MagicMock()
62 return parser
63
64
65 class TestPuppetGetLanguage:
66 def test_raises_when_not_installed(self):
67 from navegador.ingestion.puppet import _get_puppet_language
68
69 with patch.dict(
70 "sys.modules",
71 {
72 "tree_sitter_puppet": None,
73 "tree_sitter": None,
74 },
75 ):
76 with pytest.raises(ImportError, match="tree-sitter-puppet"):
77 _get_puppet_language()
78
79 def test_returns_language_object(self):
80 from navegador.ingestion.puppet import _get_puppet_language
81
82 mock_tspuppet = MagicMock()
83 mock_ts = MagicMock()
84 with patch.dict(
85 "sys.modules",
86 {
87 "tree_sitter_puppet": mock_tspuppet,
88 "tree_sitter": mock_ts,
89 },
90 ):
91 result = _get_puppet_language()
92 assert result is mock_ts.Language.return_value
93
94
95 class TestPuppetHandleClass:
96 def test_creates_class_with_puppet_class_semantic_type(self):
97 parser = _make_parser()
98 store = _make_store()
99 source = b"nginx"
100 class_ident = MockNode(
101 "class_identifier",
102 children=[
103 MockNode(
104 "identifier",
105 start_byte=0,
106 end_byte=5,
107 ),
108 ],
109 start_byte=0,
110 end_byte=5,
111 )
112 node = MockNode(
113 "class_definition",
114 children=[class_ident],
115 start_point=(0, 0),
116 end_point=(5, 1),
117 )
118 stats = {"functions": 0, "classes": 0, "edges": 0}
119 parser._handle_class(node, source, "nginx.pp", store, stats)
120 assert stats["classes"] == 1
121 assert stats["edges"] == 1
122 label = store.create_node.call_args[0][0]
123 props = store.create_node.call_args[0][1]
124 assert label == NodeLabel.Class
125 assert props["name"] == "nginx"
126 assert props["semantic_type"] == "puppet_class"
127
128 def test_skips_when_no_class_identifier(self):
129 parser = _make_parser()
130 store = _make_store()
131 node = MockNode(
132 "class_definition",
133 children=[],
134 start_point=(0, 0),
135 end_point=(0, 5),
136 )
137 stats = {"functions": 0, "classes": 0, "edges": 0}
138 parser._handle_class(node, b"", "test.pp", store, stats)
139 assert stats["classes"] == 0
140 store.create_node.assert_not_called()
141
142
143 class TestPuppetHandleDefinedType:
144 def test_creates_class_with_puppet_defined_type(self):
145 parser = _make_parser()
146 store = _make_store()
147 source = b"nginx::vhost"
148 class_ident = MockNode(
149 "class_identifier",
150 children=[
151 MockNode(
152 "identifier",
153 start_byte=0,
154 end_byte=5,
155 ),
156 MockNode(
157 "identifier",
158 start_byte=7,
159 end_byte=12,
160 ),
161 ],
162 start_byte=0,
163 end_byte=12,
164 )
165 node = MockNode(
166 "defined_resource_type",
167 children=[class_ident],
168 start_point=(0, 0),
169 end_point=(3, 1),
170 )
171 stats = {"functions": 0, "classes": 0, "edges": 0}
172 parser._handle_defined_type(node, source, "vhost.pp", store, stats)
173 assert stats["classes"] == 1
174 label = store.create_node.call_args[0][0]
175 props = store.create_node.call_args[0][1]
176 assert label == NodeLabel.Class
177 assert props["name"] == "nginx::vhost"
178 assert props["semantic_type"] == "puppet_defined_type"
179
180
181 class TestPuppetHandleNode:
182 def test_creates_class_with_puppet_node(self):
183 parser = _make_parser()
184 store = _make_store()
185 source = b"'webserver'"
186 string_node = MockNode(
187 "string",
188 start_byte=0,
189 end_byte=11,
190 )
191 node_name = MockNode(
192 "node_name",
193 children=[string_node],
194 )
195 node = MockNode(
196 "node_definition",
197 children=[node_name],
198 start_point=(0, 0),
199 end_point=(3, 1),
200 )
201 stats = {"functions": 0, "classes": 0, "edges": 0}
202 parser._handle_node(node, source, "nodes.pp", store, stats)
203 assert stats["classes"] == 1
204 label = store.create_node.call_args[0][0]
205 props = store.create_node.call_args[0][1]
206 assert label == NodeLabel.Class
207 assert props["name"] == "webserver"
208 assert props["semantic_type"] == "puppet_node"
209
210 def test_skips_when_no_node_name(self):
211 parser = _make_parser()
212 store = _make_store()
213 node = MockNode(
214 "node_definition",
215 children=[],
216 start_point=(0, 0),
217 end_point=(0, 5),
218 )
219 stats = {"functions": 0, "classes": 0, "edges": 0}
220 parser._handle_node(node, b"", "nodes.pp", store, stats)
221 assert stats["classes"] == 0
222
223
224 class TestPuppetHandleResource:
225 def test_creates_function_with_puppet_resource(self):
226 parser = _make_parser()
227 store = _make_store()
228 source = b"package 'nginx'"
229 ident = MockNode(
230 "identifier",
231 start_byte=0,
232 end_byte=7,
233 )
234 title = MockNode(
235 "string",
236 start_byte=8,
237 end_byte=15,
238 )
239 node = MockNode(
240 "resource_declaration",
241 children=[ident, title],
242 start_point=(1, 0),
243 end_point=(3, 1),
244 )
245 stats = {"functions": 0, "classes": 0, "edges": 0}
246 parser._handle_resource(node, source, "nginx.pp", "nginx", store, stats)
247 assert stats["functions"] == 1
248 assert stats["edges"] == 1
249 label = store.create_node.call_args[0][0]
250 props = store.create_node.call_args[0][1]
251 assert label == NodeLabel.Function
252 assert props["name"] == "package[nginx]"
253 assert props["semantic_type"] == "puppet_resource"
254
255 def test_skips_when_no_type_identifier(self):
256 parser = _make_parser()
257 store = _make_store()
258 node = MockNode(
259 "resource_declaration",
260 children=[],
261 start_point=(0, 0),
262 end_point=(0, 5),
263 )
264 stats = {"functions": 0, "classes": 0, "edges": 0}
265 parser._handle_resource(node, b"", "test.pp", "myclass", store, stats)
266 assert stats["functions"] == 0
267
268
269 class TestPuppetHandleInclude:
270 def test_creates_import_node(self):
271 parser = _make_parser()
272 store = _make_store()
273 source = b"stdlib"
274 class_ident = MockNode(
275 "class_identifier",
276 children=[
277 MockNode(
278 "identifier",
279 start_byte=0,
280 end_byte=6,
281 ),
282 ],
283 )
284 node = MockNode(
285 "include_statement",
286 children=[class_ident],
287 start_point=(0, 0),
288 end_point=(0, 14),
289 )
290 stats = {"functions": 0, "classes": 0, "edges": 0}
291 parser._handle_include(node, source, "init.pp", store, stats)
292 assert stats["edges"] == 1
293 label = store.create_node.call_args[0][0]
294 props = store.create_node.call_args[0][1]
295 assert label == NodeLabel.Import
296 assert props["name"] == "stdlib"
297 assert props["semantic_type"] == "puppet_include"
298
299 def test_skips_when_no_class_identifier(self):
300 parser = _make_parser()
301 store = _make_store()
302 node = MockNode(
303 "include_statement",
304 children=[],
305 start_point=(0, 0),
306 end_point=(0, 7),
307 )
308 stats = {"functions": 0, "classes": 0, "edges": 0}
309 parser._handle_include(node, b"", "init.pp", store, stats)
310 assert stats["edges"] == 0
311 store.create_node.assert_not_called()
312
313
314 class TestPuppetHandleParameters:
315 def test_creates_variable_nodes(self):
316 parser = _make_parser()
317 store = _make_store()
318 source = b"$port"
319 var_node = MockNode(
320 "variable",
321 start_byte=0,
322 end_byte=5,
323 )
324 param = MockNode(
325 "parameter",
326 children=[var_node],
327 start_point=(1, 2),
328 end_point=(1, 7),
329 )
330 param_list = MockNode(
331 "parameter_list",
332 children=[param],
333 )
334 class_ident = MockNode(
335 "class_identifier",
336 children=[
337 MockNode(
338 "identifier",
339 start_byte=0,
340 end_byte=5,
341 ),
342 ],
343 start_byte=0,
344 end_byte=5,
345 )
346 node = MockNode(
347 "class_definition",
348 children=[class_ident, param_list],
349 start_point=(0, 0),
350 end_point=(5, 1),
351 )
352 stats = {"functions": 0, "classes": 0, "edges": 0}
353 parser._extract_parameters(node, source, "nginx.pp", "nginx", store, stats)
354 store.create_node.assert_called_once()
355 label = store.create_node.call_args[0][0]
356 props = store.create_node.call_args[0][1]
357 assert label == NodeLabel.Variable
358 assert props["name"] == "port"
359 assert props["semantic_type"] == "puppet_parameter"
360 assert stats["edges"] == 1
361
362 def test_skips_param_without_variable(self):
363 parser = _make_parser()
364 store = _make_store()
365 param = MockNode(
366 "parameter",
367 children=[MockNode("type")],
368 start_point=(1, 2),
369 end_point=(1, 7),
370 )
371 param_list = MockNode(
372 "parameter_list",
373 children=[param],
374 )
375 node = MockNode(
376 "class_definition",
377 children=[param_list],
378 start_point=(0, 0),
379 end_point=(5, 1),
380 )
381 stats = {"functions": 0, "classes": 0, "edges": 0}
382 parser._extract_parameters(node, b"", "test.pp", "myclass", store, stats)
383 store.create_node.assert_not_called()
384
385
386 class TestPuppetWalkDispatch:
387 def test_walk_dispatches_class_definition(self):
388 parser = _make_parser()
389 store = _make_store()
390 source = b"nginx"
391 class_ident = MockNode(
392 "class_identifier",
393 children=[
394 MockNode(
395 "identifier",
396 start_byte=0,
397 end_byte=5,
398 ),
399 ],
400 )
401 class_def = MockNode(
402 "class_definition",
403 children=[class_ident],
404 start_point=(0, 0),
405 end_point=(5, 1),
406 )
407 root = MockNode("program", children=[class_def])
408 stats = {"functions": 0, "classes": 0, "edges": 0}
409 parser._walk(root, source, "nginx.pp", store, stats)
410 assert stats["classes"] == 1
411
412 def test_walk_dispatches_defined_resource_type(self):
413 parser = _make_parser()
414 store = _make_store()
415 source = b"vhost"
416 class_ident = MockNode(
417 "class_identifier",
418 children=[
419 MockNode(
420 "identifier",
421 start_byte=0,
422 end_byte=5,
423 ),
424 ],
425 )
426 define_node = MockNode(
427 "defined_resource_type",
428 children=[class_ident],
429 start_point=(0, 0),
430 end_point=(3, 1),
431 )
432 root = MockNode("program", children=[define_node])
433 stats = {"functions": 0, "classes": 0, "edges": 0}
434 parser._walk(root, source, "vhost.pp", store, stats)
435 assert stats["classes"] == 1
436
437 def test_walk_dispatches_node_definition(self):
438 parser = _make_parser()
439 store = _make_store()
440 source = b"'webserver'"
441 string_node = MockNode(
442 "string",
443 start_byte=0,
444 end_byte=11,
445 )
446 node_name = MockNode(
447 "node_name",
448 children=[string_node],
449 )
450 node_def = MockNode(
451 "node_definition",
452 children=[node_name],
453 start_point=(0, 0),
454 end_point=(3, 1),
455 )
456 root = MockNode("program", children=[node_def])
457 stats = {"functions": 0, "classes": 0, "edges": 0}
458 parser._walk(root, source, "nodes.pp", store, stats)
459 assert stats["classes"] == 1
460
461 def test_walk_dispatches_include_statement(self):
462 parser = _make_parser()
463 store = _make_store()
464 source = b"stdlib"
465 class_ident = MockNode(
466 "class_identifier",
467 children=[
468 MockNode(
469 "identifier",
470 start_byte=0,
471 end_byte=6,
472 ),
473 ],
474 )
475 include = MockNode(
476 "include_statement",
477 children=[class_ident],
478 start_point=(0, 0),
479 end_point=(0, 14),
480 )
481 root = MockNode("program", children=[include])
482 stats = {"functions": 0, "classes": 0, "edges": 0}
483 parser._walk(root, source, "init.pp", store, stats)
484 assert stats["edges"] == 1
485
486
487 class TestPuppetParseFile:
488 def test_creates_file_node(self):
489 import tempfile
490 from pathlib import Path
491
492 parser = _make_parser()
493 store = _make_store()
494 mock_tree = MagicMock()
495 mock_tree.root_node.type = "program"
496 mock_tree.root_node.children = []
497 parser._parser.parse.return_value = mock_tree
498 with tempfile.NamedTemporaryFile(suffix=".pp", delete=False) as f:
499 f.write(b"class nginx {}\n")
500 fpath = Path(f.name)
501 try:
502 parser.parse_file(fpath, fpath.parent, store)
503 store.create_node.assert_called_once()
504 label = store.create_node.call_args[0][0]
505 props = store.create_node.call_args[0][1]
506 assert label == NodeLabel.File
507 assert props["language"] == "puppet"
508 finally:
509 fpath.unlink()

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button