| | @@ -1,10 +1,11 @@ |
| 1 | 1 | """ |
| 2 | 2 | AI-powered HTML → Hugo template conversion. |
| 3 | 3 | |
| 4 | 4 | For already-Hugo themes, use hugoify_dir() to validate/augment. |
| 5 | 5 | For raw HTML, use hugoify_html() to produce Hugo layout files. |
| 6 | +For Next.js apps, use hugoify_nextjs() to convert React components to Hugo layouts. |
| 6 | 7 | """ |
| 7 | 8 | |
| 8 | 9 | import json |
| 9 | 10 | import logging |
| 10 | 11 | import os |
| | @@ -15,60 +16,421 @@ |
| 15 | 16 | SYSTEM = ( |
| 16 | 17 | "You are an expert Hugo theme developer. Convert HTML templates to valid Hugo Go template files. " |
| 17 | 18 | "Output only valid Hugo template syntax — no explanations, no markdown fences." |
| 18 | 19 | ) |
| 19 | 20 | |
| 21 | +NEXTJS_SYSTEM = ( |
| 22 | + "You are an expert at converting React/Next.js components to Hugo Go template files. " |
| 23 | + "You understand JSX, TSX, React component composition, and Hugo template syntax. " |
| 24 | + "Convert React components to static Hugo HTML templates, preserving all CSS classes and visual structure. " |
| 25 | + "Output only valid Hugo template syntax — no explanations, no markdown fences." |
| 26 | +) |
| 27 | + |
| 20 | 28 | |
| 21 | 29 | def hugoify_html(html_path: str) -> dict: |
| 22 | 30 | """ |
| 23 | 31 | Convert a raw HTML file to a set of Hugo layout files. |
| 24 | 32 | |
| 25 | | - Returns dict mapping relative layout paths to their content, e.g.: |
| 26 | | - { |
| 27 | | - "_default/baseof.html": "<!DOCTYPE html>...", |
| 28 | | - "partials/header.html": "<header>...", |
| 29 | | - "partials/footer.html": "<footer>...", |
| 30 | | - "index.html": "{{ define \"main\" }}...", |
| 31 | | - } |
| 33 | + Uses direct HTML extraction (no AI) to preserve content exactly as-is. |
| 34 | + Splits the HTML into Hugo's baseof.html (head/shell) and index.html (body content). |
| 35 | + |
| 36 | + Returns dict mapping relative layout paths to their content. |
| 32 | 37 | """ |
| 33 | 38 | logging.info(f"Hugoifying {html_path} ...") |
| 34 | 39 | |
| 35 | 40 | with open(html_path, 'r', errors='replace') as f: |
| 36 | 41 | html = f.read() |
| 37 | 42 | |
| 38 | | - # Truncate very large files to avoid token limits |
| 39 | | - if len(html) > 30000: |
| 40 | | - logging.warning(f"HTML is large ({len(html)} chars), truncating to 30000 for AI analysis") |
| 41 | | - html = html[:30000] |
| 42 | | - |
| 43 | | - prompt = f"""Convert the following HTML file into Hugo layout files. |
| 44 | | - |
| 45 | | -Return a JSON object where keys are relative file paths under layouts/ and values are the Hugo template content. |
| 46 | | - |
| 47 | | -Required keys to produce: |
| 48 | | -- "_default/baseof.html" — base template with blocks for head, header, main, footer |
| 49 | | -- "partials/header.html" — site header/nav extracted as partial |
| 50 | | -- "partials/footer.html" — footer extracted as partial |
| 51 | | -- "index.html" — homepage using {{ define "main" }} ... {{ end }} |
| 52 | | - |
| 53 | | -Rules: |
| 54 | | -- Replace hardcoded page titles with {{ .Title }} |
| 55 | | -- Replace hardcoded site name with {{ .Site.Title }} |
| 56 | | -- Replace hardcoded URLs with {{ .Site.BaseURL }} or {{ .Permalink }} |
| 57 | | -- Replace nav links with {{ range .Site.Menus.main }}<a href="{{ .URL }}">{{ .Name }}</a>{{ end }} |
| 58 | | -- Replace blog post lists with {{ range .Pages }} ... {{ end }} |
| 59 | | -- Replace copyright year with {{ now.Year }} |
| 60 | | -- Keep all CSS classes and HTML structure intact |
| 61 | | -- Use {{ partial "header.html" . }} and {{ partial "footer.html" . }} in baseof.html |
| 62 | | - |
| 63 | | -HTML to convert: |
| 64 | | -{html} |
| 65 | | - |
| 66 | | -Return ONLY a valid JSON object, no explanation.""" |
| 67 | | - |
| 68 | | - response = call_ai(prompt, SYSTEM) |
| 69 | | - return _parse_layout_json(response) |
| 43 | + logging.info(f"Read {len(html)} chars from {html_path}") |
| 44 | + |
| 45 | + # Extract <head> content (CSS links, meta, fonts, etc.) |
| 46 | + head_extras = _extract_head_content(html) |
| 47 | + |
| 48 | + # Extract and rewrite CSS/JS paths to be relative to Hugo static/ |
| 49 | + css_links = re.findall(r'<link[^>]+rel=["\']stylesheet["\'][^>]*/?>', |
| 50 | + html, re.DOTALL | re.IGNORECASE) |
| 51 | + js_links = re.findall(r'<script[^>]+src=["\'][^"\']+["\'][^>]*>.*?</script>', |
| 52 | + html, re.DOTALL) |
| 53 | + |
| 54 | + # Extract <body> content |
| 55 | + body_match = re.search(r'<body[^>]*>(.*?)</body>', html, re.DOTALL) |
| 56 | + body_content = body_match.group(1).strip() if body_match else html |
| 57 | + |
| 58 | + # Extract body attributes (class, style, etc.) |
| 59 | + body_attrs_match = re.search(r'<body([^>]*)>', html) |
| 60 | + body_attrs = body_attrs_match.group(1).strip() if body_attrs_match else '' |
| 61 | + |
| 62 | + # Build baseof.html preserving the original <head> structure |
| 63 | + head_match = re.search(r'<head[^>]*>(.*?)</head>', html, re.DOTALL) |
| 64 | + if head_match: |
| 65 | + head_content = head_match.group(1).strip() |
| 66 | + # Replace hardcoded <title> with Hugo template |
| 67 | + head_content = re.sub( |
| 68 | + r'<title>[^<]*</title>', |
| 69 | + '<title>{{ if .IsHome }}{{ .Site.Title }}{{ else }}{{ .Title }} | {{ .Site.Title }}{{ end }}</title>', |
| 70 | + head_content |
| 71 | + ) |
| 72 | + baseof = f'''<!DOCTYPE html> |
| 73 | +<html lang="{{{{ with .Site.LanguageCode }}}}{{{{ . }}}}{{{{ else }}}}en{{{{ end }}}}"> |
| 74 | +<head> |
| 75 | +{head_content} |
| 76 | +</head> |
| 77 | +<body{" " + body_attrs if body_attrs else ""}> |
| 78 | + {{{{- block "main" . }}}}{{{{- end }}}} |
| 79 | +</body> |
| 80 | +</html>''' |
| 81 | + else: |
| 82 | + baseof = _fallback_baseof() |
| 83 | + |
| 84 | + index_html = f'{{{{ define "main" }}}}\n{body_content}\n{{{{ end }}}}' |
| 85 | + |
| 86 | + layouts = { |
| 87 | + "_default/baseof.html": baseof, |
| 88 | + "index.html": index_html, |
| 89 | + } |
| 90 | + |
| 91 | + logging.info(f"Extracted {len(layouts)} layout files directly from HTML (no AI)") |
| 92 | + return layouts |
| 93 | + |
| 94 | + |
| 95 | +def hugoify_nextjs(info: dict, dev_url: str = None) -> dict: |
| 96 | + """ |
| 97 | + Convert a Next.js app to a set of Hugo layout files. |
| 98 | + |
| 99 | + If dev_url is provided (or auto-detected), captures the actual rendered HTML |
| 100 | + from the running Next.js dev server for pixel-perfect conversion. |
| 101 | + Otherwise falls back to AI-powered TSX source conversion. |
| 102 | + |
| 103 | + Args: |
| 104 | + info: dict from find_nextjs_app() with app_dir, router_type, etc. |
| 105 | + dev_url: URL of a running Next.js dev server (e.g. http://localhost:3000) |
| 106 | + |
| 107 | + Returns: |
| 108 | + dict mapping relative layout paths to their content, plus |
| 109 | + a '_captured_assets' key with any downloaded CSS/JS files. |
| 110 | + """ |
| 111 | + app_dir = info['app_dir'] |
| 112 | + logging.info(f"Hugoifying Next.js app at {app_dir} ...") |
| 113 | + |
| 114 | + # Try to auto-detect a running dev server |
| 115 | + if not dev_url: |
| 116 | + dev_url = _detect_nextjs_server(info) |
| 117 | + |
| 118 | + if dev_url: |
| 119 | + return _capture_rendered_html(dev_url, info) |
| 120 | + |
| 121 | + # Fallback: AI-powered source conversion (less faithful) |
| 122 | + return _ai_convert_nextjs_sources(info) |
| 123 | + |
| 124 | + |
| 125 | +def _detect_nextjs_server(info: dict) -> str | None: |
| 126 | + """Check if a Next.js dev server is running on common ports.""" |
| 127 | + import urllib.request |
| 128 | + for port in [3000, 3001, 3002]: |
| 129 | + url = f"http://localhost:{port}" |
| 130 | + try: |
| 131 | + req = urllib.request.Request(url, method='HEAD') |
| 132 | + resp = urllib.request.urlopen(req, timeout=2) |
| 133 | + if resp.status == 200: |
| 134 | + logging.info(f"Detected running Next.js server at {url}") |
| 135 | + return url |
| 136 | + except Exception: |
| 137 | + continue |
| 138 | + return None |
| 139 | + |
| 140 | + |
| 141 | +def _capture_rendered_html(dev_url: str, info: dict) -> dict: |
| 142 | + """ |
| 143 | + Capture the actual server-rendered HTML from a running Next.js app |
| 144 | + and convert it into Hugo layout files. This gives pixel-perfect results. |
| 145 | + """ |
| 146 | + import urllib.request |
| 147 | + import urllib.parse |
| 148 | + |
| 149 | + logging.info(f"Capturing rendered HTML from {dev_url} ...") |
| 150 | + |
| 151 | + # Fetch the full rendered page |
| 152 | + resp = urllib.request.urlopen(dev_url) |
| 153 | + html = resp.read().decode('utf-8') |
| 154 | + logging.info(f"Captured {len(html)} chars of rendered HTML") |
| 155 | + |
| 156 | + # Download compiled CSS |
| 157 | + css_urls = re.findall(r'href="(/_next/static/[^"]+\.css)"', html) |
| 158 | + captured_css = {} |
| 159 | + for css_path in css_urls: |
| 160 | + css_url = f"{dev_url}{css_path}" |
| 161 | + try: |
| 162 | + css_resp = urllib.request.urlopen(css_url) |
| 163 | + css_content = css_resp.read().decode('utf-8') |
| 164 | + captured_css['compiled.css'] = css_content |
| 165 | + logging.info(f"Captured CSS: {len(css_content)} chars") |
| 166 | + break # Usually just one CSS file |
| 167 | + except Exception as e: |
| 168 | + logging.warning(f"Failed to fetch CSS {css_url}: {e}") |
| 169 | + |
| 170 | + # Strip Next.js scripts, dev tooling, and React hydration markers |
| 171 | + body_html = _extract_and_clean_body(html) |
| 172 | + |
| 173 | + # Extract <head> content we want to keep (fonts, meta, etc.) |
| 174 | + head_extras = _extract_head_content(html) |
| 175 | + |
| 176 | + # Build Hugo layouts |
| 177 | + baseof = f'''<!DOCTYPE html> |
| 178 | +<html lang="en"> |
| 179 | +<head> |
| 180 | + <meta charset="utf-8"> |
| 181 | + <meta name="viewport" content="width=device-width, initial-scale=1"> |
| 182 | + <title>{{{{ if .IsHome }}}}{{{{ .Site.Title }}}}{{{{ else }}}}{{{{ .Title }}}} | {{{{ .Site.Title }}}}{{{{ end }}}}</title> |
| 183 | +{head_extras} |
| 184 | + <link rel="stylesheet" href="/css/compiled.css"> |
| 185 | + <link rel="stylesheet" href="/css/globals.css"> |
| 186 | +</head> |
| 187 | +<body class="antialiased"> |
| 188 | + {{{{- block "main" . }}}}{{{{- end }}}} |
| 189 | +</body> |
| 190 | +</html>''' |
| 191 | + |
| 192 | + index_html = f'{{{{ define "main" }}}}\n{body_html}\n{{{{ end }}}}' |
| 193 | + |
| 194 | + layouts = { |
| 195 | + "_default/baseof.html": baseof, |
| 196 | + "index.html": index_html, |
| 197 | + } |
| 198 | + |
| 199 | + # Attach captured CSS as metadata for the pipeline to handle |
| 200 | + if captured_css: |
| 201 | + layouts['_captured_css'] = captured_css |
| 202 | + |
| 203 | + return layouts |
| 204 | + |
| 205 | + |
| 206 | +def _extract_and_clean_body(html: str) -> str: |
| 207 | + """Extract <body> content and strip Next.js scripts/dev tooling.""" |
| 208 | + # Extract body content |
| 209 | + body_match = re.search(r'<body[^>]*>(.*?)</body>', html, re.DOTALL) |
| 210 | + if not body_match: |
| 211 | + return html |
| 212 | + |
| 213 | + body = body_match.group(1) |
| 214 | + |
| 215 | + # Strip all <script> tags (Next.js runtime, React hydration, HMR, etc.) |
| 216 | + body = re.sub(r'<script\b[^>]*>.*?</script>', '', body, flags=re.DOTALL) |
| 217 | + body = re.sub(r'<script\b[^>]*/?>', '', body) |
| 218 | + |
| 219 | + # Strip Next.js dev overlay and error boundary elements |
| 220 | + body = re.sub(r'<next-route-announcer[^>]*>.*?</next-route-announcer>', '', body, flags=re.DOTALL) |
| 221 | + body = re.sub(r'<nextjs-portal[^>]*>.*?</nextjs-portal>', '', body, flags=re.DOTALL) |
| 222 | + |
| 223 | + # Strip data-reactroot, data-nextjs, and other React/Next.js attributes |
| 224 | + body = re.sub(r'\s*data-(?:reactroot|nextjs[^=]*|rsc[^=]*)(?:="[^"]*")?', '', body) |
| 225 | + |
| 226 | + # Fix FadeIn components: they render with opacity:0 and translateY(32px) |
| 227 | + # because the IntersectionObserver JS isn't running. Force them visible. |
| 228 | + body = re.sub(r'opacity:\s*0', 'opacity:1', body) |
| 229 | + body = re.sub(r'translateY\(32px\)', 'translateY(0px)', body) |
| 230 | + |
| 231 | + # Replace /_next/static/ asset references with /static/ for Hugo |
| 232 | + body = re.sub(r'/_next/static/media/([^"]+)', r'/\1', body) |
| 233 | + |
| 234 | + return body.strip() |
| 235 | + |
| 236 | + |
| 237 | +def _extract_head_content(html: str) -> str: |
| 238 | + """Extract useful <head> elements (fonts, preloads) from rendered HTML.""" |
| 239 | + head_match = re.search(r'<head[^>]*>(.*?)</head>', html, re.DOTALL) |
| 240 | + if not head_match: |
| 241 | + return "" |
| 242 | + |
| 243 | + head = head_match.group(1) |
| 244 | + lines = [] |
| 245 | + |
| 246 | + # Keep font preload/stylesheet links |
| 247 | + for match in re.finditer(r'<link[^>]+(?:fonts\.googleapis|fonts\.gstatic|preload[^>]+font)[^>]*/?>', |
| 248 | + head, re.DOTALL): |
| 249 | + lines.append(f" {match.group(0)}") |
| 250 | + |
| 251 | + # Keep image preloads |
| 252 | + for match in re.finditer(r'<link[^>]+rel="preload"[^>]+as="image"[^>]*/?>', |
| 253 | + head, re.DOTALL): |
| 254 | + tag = match.group(0) |
| 255 | + # Fix /_next paths to local paths |
| 256 | + tag = re.sub(r'/_next/static/media/', '/', tag) |
| 257 | + lines.append(f" {tag}") |
| 258 | + |
| 259 | + return "\n".join(lines) |
| 260 | + |
| 261 | + |
| 262 | +def _ai_convert_nextjs_sources(info: dict) -> dict: |
| 263 | + """ |
| 264 | + Fallback: AI-powered conversion from TSX source files. |
| 265 | + Used when no running dev server is available. |
| 266 | + """ |
| 267 | + sources = _collect_nextjs_sources(info) |
| 268 | + if not sources: |
| 269 | + logging.warning("No source files collected from Next.js app") |
| 270 | + return _fallback_layouts() |
| 271 | + |
| 272 | + layouts = {} |
| 273 | + |
| 274 | + # Identify component vs structural files |
| 275 | + component_sources = {} |
| 276 | + layout_sources = {} |
| 277 | + for rel_path, content in sources.items(): |
| 278 | + if rel_path.endswith('.css'): |
| 279 | + continue |
| 280 | + elif 'layout.' in rel_path or 'page.' in rel_path: |
| 281 | + layout_sources[rel_path] = content |
| 282 | + else: |
| 283 | + component_sources[rel_path] = content |
| 284 | + |
| 285 | + # Convert each component individually |
| 286 | + for rel_path, content in component_sources.items(): |
| 287 | + basename = os.path.splitext(os.path.basename(rel_path))[0] |
| 288 | + partial_name = f"partials/{basename}.html" |
| 289 | + logging.info(f" Converting {rel_path} → {partial_name}") |
| 290 | + html = _convert_single_component(basename, content) |
| 291 | + if html: |
| 292 | + layouts[partial_name] = html |
| 293 | + |
| 294 | + # Build baseof and index |
| 295 | + partial_names = [os.path.splitext(os.path.basename(k))[0] for k in layouts.keys()] |
| 296 | + baseof, index_html = _convert_layout_and_page(layout_sources, component_sources, partial_names) |
| 297 | + layouts["_default/baseof.html"] = baseof |
| 298 | + layouts["index.html"] = index_html |
| 299 | + |
| 300 | + logging.info(f"Generated {len(layouts)} layout files via AI conversion") |
| 301 | + return layouts |
| 302 | + |
| 303 | + |
| 304 | +_COMPONENT_PROMPT = """Convert this React/Next.js component to static Hugo-compatible HTML. |
| 305 | + |
| 306 | +CRITICAL RULES: |
| 307 | +- Output ONLY the raw HTML. No markdown fences, no explanation, no JSON wrapping. |
| 308 | +- Convert ALL JSX `className` to HTML `class` |
| 309 | +- Unroll ALL `.map()` calls into full static HTML — every single item |
| 310 | +- Preserve EVERY Tailwind CSS class and inline style EXACTLY |
| 311 | +- Preserve ALL text content — do NOT summarize or shorten |
| 312 | +- Preserve ALL SVG content inline |
| 313 | +- Strip React hooks and event handlers, keep static HTML structure |
| 314 | + |
| 315 | +Component name: {name} |
| 316 | + |
| 317 | +Source code: |
| 318 | +{source}""" |
| 319 | + |
| 320 | + |
| 321 | +def _convert_single_component(name: str, source: str) -> str | None: |
| 322 | + """Convert a single React component to Hugo-compatible HTML via AI.""" |
| 323 | + prompt = _COMPONENT_PROMPT.format(name=name, source=source) |
| 324 | + try: |
| 325 | + response = call_ai(prompt, NEXTJS_SYSTEM, max_tokens=16384) |
| 326 | + html = re.sub(r'^```(?:html)?\s*', '', response.strip()) |
| 327 | + html = re.sub(r'```\s*$', '', html.strip()) |
| 328 | + return html |
| 329 | + except Exception as e: |
| 330 | + logging.warning(f"Failed to convert component {name}: {e}") |
| 331 | + return None |
| 332 | + |
| 333 | + |
| 334 | +def _convert_layout_and_page(layout_sources, component_sources, partial_names): |
| 335 | + """Build baseof.html and index.html from layout files and partial list.""" |
| 336 | + partial_includes = "\n".join( |
| 337 | + f' {{{{ partial "{name}.html" . }}}}' for name in partial_names |
| 338 | + ) |
| 339 | + baseof = _fallback_baseof() |
| 340 | + index_html = f'{{% define "main" %}}\n<div class="bg-[#121517] flex flex-col w-full">\n{partial_includes}\n</div>\n{{% end %}}' |
| 341 | + return baseof, index_html |
| 342 | + |
| 343 | + |
| 344 | +def _collect_nextjs_sources(info: dict) -> dict: |
| 345 | + """ |
| 346 | + Collect relevant source files from a Next.js app into a dict |
| 347 | + keyed by relative path. Applies priority-based context budgeting. |
| 348 | + """ |
| 349 | + app_dir = info['app_dir'] |
| 350 | + sources = {} |
| 351 | + budget = 80000 |
| 352 | + |
| 353 | + # Tier 1: Layout and page entry points (always include) |
| 354 | + tier1 = [] |
| 355 | + if info.get('layout_file'): |
| 356 | + tier1.append(info['layout_file']) |
| 357 | + if info.get('page_file'): |
| 358 | + tier1.append(info['page_file']) |
| 359 | + |
| 360 | + # Tier 2: Section-level components (most important for structure) |
| 361 | + tier2 = [] |
| 362 | + # Tier 3: Page components |
| 363 | + tier3 = [] |
| 364 | + # Tier 4: UI/marketing components |
| 365 | + tier4 = [] |
| 366 | + # Tier 5: CSS and config |
| 367 | + tier5 = list(info.get('css_files', [])) |
| 368 | + |
| 369 | + # Walk source directories looking for components |
| 370 | + for search_root in [os.path.join(app_dir, 'src'), os.path.join(app_dir, 'app'), app_dir]: |
| 371 | + if not os.path.isdir(search_root): |
| 372 | + continue |
| 373 | + for root, dirs, files in os.walk(search_root): |
| 374 | + # Skip junk |
| 375 | + dirs[:] = [d for d in dirs if d not in ('node_modules', '.next', '__MACOSX', '.git', '__tests__')] |
| 376 | + for f in files: |
| 377 | + if not f.endswith(('.tsx', '.jsx', '.ts', '.js')): |
| 378 | + continue |
| 379 | + full = os.path.join(root, f) |
| 380 | + # Skip test files, config files, API routes |
| 381 | + if '.test.' in f or '.spec.' in f: |
| 382 | + continue |
| 383 | + if '/api/' in full: |
| 384 | + continue |
| 385 | + # Skip files already in tier 1 |
| 386 | + if full in tier1: |
| 387 | + continue |
| 388 | + |
| 389 | + rel = os.path.relpath(root, app_dir) |
| 390 | + basename = f.lower() |
| 391 | + |
| 392 | + if 'section' in basename or 'section' in rel.lower(): |
| 393 | + tier2.append(full) |
| 394 | + elif 'page' in basename and 'page' not in rel.lower().split('app')[-1:]: |
| 395 | + tier3.append(full) |
| 396 | + elif any(k in rel.lower() for k in ('components', 'marketing')): |
| 397 | + tier4.append(full) |
| 398 | + |
| 399 | + # Assemble by priority, tracking budget |
| 400 | + used = 0 |
| 401 | + for tier_files in [tier1, tier2, tier3, tier4, tier5]: |
| 402 | + for fpath in tier_files: |
| 403 | + if not os.path.isfile(fpath): |
| 404 | + continue |
| 405 | + try: |
| 406 | + with open(fpath, 'r', errors='replace') as fh: |
| 407 | + content = fh.read() |
| 408 | + except OSError: |
| 409 | + continue |
| 410 | + |
| 411 | + rel_path = os.path.relpath(fpath, app_dir) |
| 412 | + # Skip if already collected (dedup across tiers) |
| 413 | + if rel_path in sources: |
| 414 | + continue |
| 415 | + |
| 416 | + # Truncate individual large files |
| 417 | + if len(content) > 8000: |
| 418 | + content = content[:8000] + '\n// ... [truncated]' |
| 419 | + |
| 420 | + if used + len(content) > budget: |
| 421 | + remaining = budget - used |
| 422 | + if remaining > 500: |
| 423 | + content = content[:remaining] + '\n// ... [truncated - budget]' |
| 424 | + sources[rel_path] = content |
| 425 | + used += len(content) |
| 426 | + break |
| 427 | + sources[rel_path] = content |
| 428 | + used += len(content) |
| 429 | + |
| 430 | + logging.info(f"Collected {len(sources)} source files ({used} chars) from Next.js app") |
| 431 | + return sources |
| 70 | 432 | |
| 71 | 433 | |
| 72 | 434 | def hugoify_dir(theme_dir: str) -> str: |
| 73 | 435 | """ |
| 74 | 436 | Validate and optionally augment an existing Hugo theme directory. |
| | @@ -101,18 +463,24 @@ |
| 101 | 463 | # CLI entry point (used by cli.py) |
| 102 | 464 | def hugoify(path: str) -> str: |
| 103 | 465 | """ |
| 104 | 466 | Entry point for the CLI 'hugoify' command. |
| 105 | 467 | If path is a Hugo theme dir: validate it. |
| 468 | + If path is a Next.js app: convert React components to Hugo. |
| 106 | 469 | If path is an HTML file or raw HTML dir: convert it. |
| 107 | 470 | """ |
| 108 | | - from .theme_finder import find_hugo_theme, find_raw_html_files |
| 471 | + from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files |
| 109 | 472 | |
| 110 | 473 | info = find_hugo_theme(path) |
| 111 | 474 | if info: |
| 112 | 475 | return hugoify_dir(info['theme_dir']) |
| 113 | 476 | |
| 477 | + nextjs_info = find_nextjs_app(path) |
| 478 | + if nextjs_info: |
| 479 | + layouts = hugoify_nextjs(nextjs_info) |
| 480 | + return f"Converted Next.js app to {len(layouts)} layout files: {list(layouts.keys())}" |
| 481 | + |
| 114 | 482 | if os.path.isfile(path) and path.endswith('.html'): |
| 115 | 483 | layouts = hugoify_html(path) |
| 116 | 484 | return f"Converted to {len(layouts)} layout files: {list(layouts.keys())}" |
| 117 | 485 | |
| 118 | 486 | html_files = find_raw_html_files(path) |
| | @@ -130,21 +498,72 @@ |
| 130 | 498 | # --------------------------------------------------------------------------- |
| 131 | 499 | # Helpers |
| 132 | 500 | # --------------------------------------------------------------------------- |
| 133 | 501 | |
| 134 | 502 | def _parse_layout_json(response: str) -> dict: |
| 135 | | - """Extract JSON from AI response, even if surrounded by prose.""" |
| 136 | | - # Try to find JSON block |
| 137 | | - match = re.search(r'\{.*\}', response, re.DOTALL) |
| 503 | + """Extract JSON from AI response, even if surrounded by prose or markdown fences.""" |
| 504 | + # Strip markdown fences if present |
| 505 | + stripped = re.sub(r'```(?:json)?\s*', '', response) |
| 506 | + stripped = re.sub(r'```\s*$', '', stripped.strip()) |
| 507 | + |
| 508 | + # Try the full stripped response as JSON first |
| 509 | + try: |
| 510 | + result = json.loads(stripped) |
| 511 | + if isinstance(result, dict): |
| 512 | + logging.info(f"Parsed {len(result)} layout files from AI response") |
| 513 | + return result |
| 514 | + except json.JSONDecodeError: |
| 515 | + pass |
| 516 | + |
| 517 | + # Try to find JSON block (outermost braces) |
| 518 | + match = re.search(r'\{.*\}', stripped, re.DOTALL) |
| 138 | 519 | if match: |
| 139 | 520 | try: |
| 140 | | - return json.loads(match.group(0)) |
| 521 | + result = json.loads(match.group(0)) |
| 522 | + if isinstance(result, dict): |
| 523 | + logging.info(f"Parsed {len(result)} layout files from AI response (extracted)") |
| 524 | + return result |
| 141 | 525 | except json.JSONDecodeError: |
| 142 | 526 | pass |
| 527 | + |
| 528 | + # AI sometimes uses backtick-delimited values instead of JSON strings. |
| 529 | + # Parse with a regex-based key-value extractor. |
| 530 | + backtick_result = _parse_backtick_json(match.group(0)) |
| 531 | + if backtick_result: |
| 532 | + logging.info(f"Parsed {len(backtick_result)} layout files from backtick-delimited response") |
| 533 | + return backtick_result |
| 143 | 534 | |
| 144 | 535 | # Fallback: return a minimal layout |
| 145 | 536 | logging.warning("Could not parse AI response as JSON, using fallback layouts") |
| 537 | + logging.debug(f"AI response was: {response[:500]!r}") |
| 538 | + return { |
| 539 | + "_default/baseof.html": _fallback_baseof(), |
| 540 | + "partials/header.html": "<header><!-- header --></header>", |
| 541 | + "partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>", |
| 542 | + "index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}', |
| 543 | + } |
| 544 | + |
| 545 | + |
| 546 | +def _parse_backtick_json(text: str) -> dict | None: |
| 547 | + """ |
| 548 | + Parse a JSON-like object where values are backtick-delimited template literals |
| 549 | + instead of proper JSON strings. This happens when the AI uses JS template syntax. |
| 550 | + e.g.: { "key": `<html>...</html>` } |
| 551 | + """ |
| 552 | + result = {} |
| 553 | + # Match "key": `value` pairs where value can span multiple lines |
| 554 | + pattern = re.compile(r'"([^"]+)"\s*:\s*`(.*?)`(?:\s*[,}])', re.DOTALL) |
| 555 | + for m in pattern.finditer(text): |
| 556 | + key = m.group(1) |
| 557 | + value = m.group(2).strip() |
| 558 | + result[key] = value |
| 559 | + |
| 560 | + return result if result else None |
| 561 | + |
| 562 | + |
| 563 | +def _fallback_layouts() -> dict: |
| 564 | + """Minimal fallback when source collection fails.""" |
| 146 | 565 | return { |
| 147 | 566 | "_default/baseof.html": _fallback_baseof(), |
| 148 | 567 | "partials/header.html": "<header><!-- header --></header>", |
| 149 | 568 | "partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>", |
| 150 | 569 | "index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}', |
| 151 | 570 | |