Hugoifier

Merge pull request #12 from ConflictHQ/feat/nextjs-support feat: add Next.js app conversion support

noreply 2026-03-17 15:35 trunk merge
Commit 77c5e9a6e72fe2cb7f5966a9a06377d08527f39e8ac5b931659c7a250d588bc7
--- hugoifier/cli.py
+++ hugoifier/cli.py
@@ -47,11 +47,11 @@
4747
# analyze
4848
analyze_parser = subparsers.add_parser("analyze", help="Analyze a theme and report structure")
4949
analyze_parser.add_argument("path", help="Path to the theme")
5050
5151
# hugoify
52
- hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML to Hugo theme (or validate existing Hugo theme)")
52
+ hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML or Next.js app to Hugo theme (or validate existing Hugo theme)")
5353
hugoify_parser.add_argument("path", help="Path to HTML file or theme directory")
5454
5555
# decapify
5656
decapify_parser = subparsers.add_parser("decapify", help="Add Decap CMS to an assembled Hugo site")
5757
decapify_parser.add_argument("path", help="Path to the Hugo site directory")
5858
--- hugoifier/cli.py
+++ hugoifier/cli.py
@@ -47,11 +47,11 @@
47 # analyze
48 analyze_parser = subparsers.add_parser("analyze", help="Analyze a theme and report structure")
49 analyze_parser.add_argument("path", help="Path to the theme")
50
51 # hugoify
52 hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML to Hugo theme (or validate existing Hugo theme)")
53 hugoify_parser.add_argument("path", help="Path to HTML file or theme directory")
54
55 # decapify
56 decapify_parser = subparsers.add_parser("decapify", help="Add Decap CMS to an assembled Hugo site")
57 decapify_parser.add_argument("path", help="Path to the Hugo site directory")
58
--- hugoifier/cli.py
+++ hugoifier/cli.py
@@ -47,11 +47,11 @@
47 # analyze
48 analyze_parser = subparsers.add_parser("analyze", help="Analyze a theme and report structure")
49 analyze_parser.add_argument("path", help="Path to the theme")
50
51 # hugoify
52 hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML or Next.js app to Hugo theme (or validate existing Hugo theme)")
53 hugoify_parser.add_argument("path", help="Path to HTML file or theme directory")
54
55 # decapify
56 decapify_parser = subparsers.add_parser("decapify", help="Add Decap CMS to an assembled Hugo site")
57 decapify_parser.add_argument("path", help="Path to the Hugo site directory")
58
--- hugoifier/config.py
+++ hugoifier/config.py
@@ -27,43 +27,44 @@
2727
GOOGLE_MODEL = os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro')
2828
2929
MAX_TOKENS = int(os.getenv('HUGOIFIER_MAX_TOKENS', '4096'))
3030
3131
32
-def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.") -> str:
32
+def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.", max_tokens: int = None) -> str:
3333
"""
3434
Call the configured AI backend and return the response text.
3535
This is the single entry point for all AI calls in the codebase.
3636
"""
37
+ tokens = max_tokens or MAX_TOKENS
3738
if BACKEND == 'anthropic':
38
- return _call_anthropic(prompt, system)
39
+ return _call_anthropic(prompt, system, tokens)
3940
elif BACKEND == 'openai':
40
- return _call_openai(prompt, system)
41
+ return _call_openai(prompt, system, tokens)
4142
elif BACKEND == 'google':
4243
return _call_google(prompt, system)
4344
else:
4445
raise ValueError(
4546
f"Unknown backend: {BACKEND!r}. "
4647
"Set HUGOIFIER_BACKEND to 'anthropic', 'openai', or 'google'."
4748
)
4849
4950
50
-def _call_anthropic(prompt: str, system: str) -> str:
51
+def _call_anthropic(prompt: str, system: str, max_tokens: int = None) -> str:
5152
if not ANTHROPIC_API_KEY:
5253
raise EnvironmentError("ANTHROPIC_API_KEY is not set")
5354
import anthropic
5455
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
5556
message = client.messages.create(
5657
model=ANTHROPIC_MODEL,
57
- max_tokens=MAX_TOKENS,
58
+ max_tokens=max_tokens or MAX_TOKENS,
5859
system=system,
5960
messages=[{"role": "user", "content": prompt}],
6061
)
6162
return message.content[0].text
6263
6364
64
-def _call_openai(prompt: str, system: str) -> str:
65
+def _call_openai(prompt: str, system: str, max_tokens: int = None) -> str:
6566
if not OPENAI_API_KEY:
6667
raise EnvironmentError("OPENAI_API_KEY is not set")
6768
from openai import OpenAI
6869
client = OpenAI(api_key=OPENAI_API_KEY)
6970
response = client.chat.completions.create(
@@ -70,11 +71,11 @@
7071
model=OPENAI_MODEL,
7172
messages=[
7273
{"role": "system", "content": system},
7374
{"role": "user", "content": prompt},
7475
],
75
- max_tokens=MAX_TOKENS,
76
+ max_tokens=max_tokens or MAX_TOKENS,
7677
)
7778
return response.choices[0].message.content.strip()
7879
7980
8081
def _call_google(prompt: str, system: str) -> str:
8182
--- hugoifier/config.py
+++ hugoifier/config.py
@@ -27,43 +27,44 @@
27 GOOGLE_MODEL = os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro')
28
29 MAX_TOKENS = int(os.getenv('HUGOIFIER_MAX_TOKENS', '4096'))
30
31
32 def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.") -> str:
33 """
34 Call the configured AI backend and return the response text.
35 This is the single entry point for all AI calls in the codebase.
36 """
 
37 if BACKEND == 'anthropic':
38 return _call_anthropic(prompt, system)
39 elif BACKEND == 'openai':
40 return _call_openai(prompt, system)
41 elif BACKEND == 'google':
42 return _call_google(prompt, system)
43 else:
44 raise ValueError(
45 f"Unknown backend: {BACKEND!r}. "
46 "Set HUGOIFIER_BACKEND to 'anthropic', 'openai', or 'google'."
47 )
48
49
50 def _call_anthropic(prompt: str, system: str) -> str:
51 if not ANTHROPIC_API_KEY:
52 raise EnvironmentError("ANTHROPIC_API_KEY is not set")
53 import anthropic
54 client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
55 message = client.messages.create(
56 model=ANTHROPIC_MODEL,
57 max_tokens=MAX_TOKENS,
58 system=system,
59 messages=[{"role": "user", "content": prompt}],
60 )
61 return message.content[0].text
62
63
64 def _call_openai(prompt: str, system: str) -> str:
65 if not OPENAI_API_KEY:
66 raise EnvironmentError("OPENAI_API_KEY is not set")
67 from openai import OpenAI
68 client = OpenAI(api_key=OPENAI_API_KEY)
69 response = client.chat.completions.create(
@@ -70,11 +71,11 @@
70 model=OPENAI_MODEL,
71 messages=[
72 {"role": "system", "content": system},
73 {"role": "user", "content": prompt},
74 ],
75 max_tokens=MAX_TOKENS,
76 )
77 return response.choices[0].message.content.strip()
78
79
80 def _call_google(prompt: str, system: str) -> str:
81
--- hugoifier/config.py
+++ hugoifier/config.py
@@ -27,43 +27,44 @@
27 GOOGLE_MODEL = os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro')
28
29 MAX_TOKENS = int(os.getenv('HUGOIFIER_MAX_TOKENS', '4096'))
30
31
32 def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.", max_tokens: int = None) -> str:
33 """
34 Call the configured AI backend and return the response text.
35 This is the single entry point for all AI calls in the codebase.
36 """
37 tokens = max_tokens or MAX_TOKENS
38 if BACKEND == 'anthropic':
39 return _call_anthropic(prompt, system, tokens)
40 elif BACKEND == 'openai':
41 return _call_openai(prompt, system, tokens)
42 elif BACKEND == 'google':
43 return _call_google(prompt, system)
44 else:
45 raise ValueError(
46 f"Unknown backend: {BACKEND!r}. "
47 "Set HUGOIFIER_BACKEND to 'anthropic', 'openai', or 'google'."
48 )
49
50
51 def _call_anthropic(prompt: str, system: str, max_tokens: int = None) -> str:
52 if not ANTHROPIC_API_KEY:
53 raise EnvironmentError("ANTHROPIC_API_KEY is not set")
54 import anthropic
55 client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
56 message = client.messages.create(
57 model=ANTHROPIC_MODEL,
58 max_tokens=max_tokens or MAX_TOKENS,
59 system=system,
60 messages=[{"role": "user", "content": prompt}],
61 )
62 return message.content[0].text
63
64
65 def _call_openai(prompt: str, system: str, max_tokens: int = None) -> str:
66 if not OPENAI_API_KEY:
67 raise EnvironmentError("OPENAI_API_KEY is not set")
68 from openai import OpenAI
69 client = OpenAI(api_key=OPENAI_API_KEY)
70 response = client.chat.completions.create(
@@ -70,11 +71,11 @@
71 model=OPENAI_MODEL,
72 messages=[
73 {"role": "system", "content": system},
74 {"role": "user", "content": prompt},
75 ],
76 max_tokens=max_tokens or MAX_TOKENS,
77 )
78 return response.choices[0].message.content.strip()
79
80
81 def _call_google(prompt: str, system: str) -> str:
82
--- hugoifier/utils/complete.py
+++ hugoifier/utils/complete.py
@@ -9,12 +9,12 @@
99
import os
1010
import shutil
1111
from pathlib import Path
1212
1313
from .decapify import decapify
14
-from .hugoify import hugoify_html
15
-from .theme_finder import find_hugo_theme, find_raw_html_files
14
+from .hugoify import hugoify_html, hugoify_nextjs
15
+from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
1616
from .theme_patcher import patch_config, patch_theme
1717
1818
1919
def complete(
2020
input_path: str,
@@ -41,16 +41,21 @@
4141
branding = {'cms_name': cms_name, 'cms_logo': cms_logo, 'cms_color': cms_color}
4242
info = find_hugo_theme(input_path)
4343
4444
if info:
4545
return _assemble_hugo_site(info, output_dir, branding)
46
- else:
47
- # Raw HTML path
48
- html_files = find_raw_html_files(input_path)
49
- if not html_files:
50
- raise ValueError(f"No Hugo theme or HTML files found in {input_path}")
51
- return _convert_raw_html(input_path, html_files, output_dir, branding)
46
+
47
+ # Next.js path (check before raw HTML since Next.js projects may contain .html files)
48
+ nextjs_info = find_nextjs_app(input_path)
49
+ if nextjs_info:
50
+ return _convert_nextjs(input_path, nextjs_info, output_dir, branding)
51
+
52
+ # Raw HTML path
53
+ html_files = find_raw_html_files(input_path)
54
+ if not html_files:
55
+ raise ValueError(f"No Hugo theme, Next.js app, or HTML files found in {input_path}")
56
+ return _convert_raw_html(input_path, html_files, output_dir, branding)
5257
5358
5459
# ---------------------------------------------------------------------------
5560
# Hugo theme path
5661
# ---------------------------------------------------------------------------
@@ -111,10 +116,87 @@
111116
112117
logging.info(f"Done. Site ready at: {output_dir}")
113118
logging.info(f"Run: cd {output_dir} && hugo serve")
114119
return output_dir
115120
121
+
122
+# ---------------------------------------------------------------------------
123
+# Next.js path
124
+# ---------------------------------------------------------------------------
125
+
126
+def _convert_nextjs(
127
+ input_path: str, nextjs_info: dict, output_dir: str = None, branding: dict = None
128
+) -> str:
129
+ app_dir = nextjs_info['app_dir']
130
+ theme_name = nextjs_info.get('app_name', os.path.basename(os.path.abspath(input_path)))
131
+
132
+ if output_dir is None:
133
+ output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
134
+
135
+ logging.info(f"Converting Next.js app: {theme_name}")
136
+
137
+ # Convert: capture rendered HTML if dev server running, else AI fallback
138
+ hugo_layouts = hugoify_nextjs(nextjs_info)
139
+
140
+ os.makedirs(output_dir, exist_ok=True)
141
+
142
+ # Extract captured CSS if present (from rendered HTML capture)
143
+ captured_css = hugo_layouts.pop('_captured_css', {})
144
+
145
+ # Write converted layouts
146
+ theme_layouts_dir = os.path.join(output_dir, 'themes', theme_name, 'layouts')
147
+ os.makedirs(os.path.join(theme_layouts_dir, '_default'), exist_ok=True)
148
+ os.makedirs(os.path.join(theme_layouts_dir, 'partials'), exist_ok=True)
149
+
150
+ for filename, content in hugo_layouts.items():
151
+ # Fix common AI mistake: partial "partials/X.html" → partial "X.html"
152
+ if isinstance(content, str):
153
+ content = content.replace('partial "partials/', 'partial "')
154
+ dest = os.path.join(theme_layouts_dir, filename)
155
+ os.makedirs(os.path.dirname(dest), exist_ok=True)
156
+ with open(dest, 'w') as f:
157
+ f.write(content)
158
+
159
+ # Copy public/ assets to theme static/
160
+ public_dir = os.path.join(app_dir, 'public')
161
+ theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
162
+ if os.path.isdir(public_dir):
163
+ _copy_dir(public_dir, theme_static)
164
+ logging.info("Copied public/ assets to static/")
165
+
166
+ # Write captured CSS (from rendered HTML capture)
167
+ css_dest = os.path.join(theme_static, 'css')
168
+ os.makedirs(css_dest, exist_ok=True)
169
+ for css_name, css_content in captured_css.items():
170
+ with open(os.path.join(css_dest, css_name), 'w') as f:
171
+ f.write(css_content)
172
+ logging.info(f"Wrote captured CSS: {css_name}")
173
+
174
+ # Also copy source CSS files (globals.css etc.)
175
+ for css_file in nextjs_info.get('css_files', []):
176
+ if os.path.isfile(css_file):
177
+ shutil.copy2(css_file, os.path.join(css_dest, os.path.basename(css_file)))
178
+ logging.info("Copied CSS files")
179
+
180
+ _write_minimal_hugo_toml(output_dir, theme_name)
181
+
182
+ # Create minimal content
183
+ content_dir = os.path.join(output_dir, 'content')
184
+ os.makedirs(content_dir, exist_ok=True)
185
+ with open(os.path.join(content_dir, '_index.md'), 'w') as f:
186
+ f.write('---\ntitle: Home\n---\n')
187
+
188
+ b = branding or {}
189
+ decapify(
190
+ output_dir,
191
+ cms_name=b.get('cms_name'), cms_logo=b.get('cms_logo'), cms_color=b.get('cms_color'),
192
+ )
193
+
194
+ logging.info(f"Done. Site ready at: {output_dir}")
195
+ logging.info(f"Run: cd {output_dir} && hugo serve")
196
+ return output_dir
197
+
116198
117199
# ---------------------------------------------------------------------------
118200
# Raw HTML path
119201
# ---------------------------------------------------------------------------
120202
@@ -126,13 +208,13 @@
126208
if output_dir is None:
127209
output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
128210
129211
logging.info(f"Converting raw HTML theme: {theme_name}")
130212
131
- # Use AI to convert the main HTML file to Hugo layouts
213
+ # Direct HTML extraction — use the actual HTML as-is, no AI reinterpretation
132214
main_html = _pick_main_html(html_files)
133
- logging.info(f"Converting {main_html} ...")
215
+ logging.info(f"Extracting {main_html} ...")
134216
hugo_layouts = hugoify_html(main_html)
135217
136218
os.makedirs(output_dir, exist_ok=True)
137219
138220
# Write converted layouts
@@ -144,15 +226,20 @@
144226
dest = os.path.join(theme_layouts_dir, filename)
145227
os.makedirs(os.path.dirname(dest), exist_ok=True)
146228
with open(dest, 'w') as f:
147229
f.write(content)
148230
149
- # Copy CSS/JS/images
150
- for ext_dir in ('css', 'js', 'images', 'img', 'assets', 'fonts'):
151
- src = os.path.join(input_path, ext_dir)
152
- if os.path.isdir(src):
153
- _copy_dir(src, os.path.join(output_dir, 'themes', theme_name, 'static', ext_dir))
231
+ # Copy ALL static assets from the HTML theme directory
232
+ theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
233
+ for item in os.listdir(input_path):
234
+ src = os.path.join(input_path, item)
235
+ if os.path.isdir(src) and item not in ('__MACOSX', '.git', 'node_modules'):
236
+ _copy_dir(src, os.path.join(theme_static, item))
237
+ elif os.path.isfile(src) and not item.endswith('.html'):
238
+ # Copy non-HTML files (images, fonts, etc.) to static root
239
+ os.makedirs(theme_static, exist_ok=True)
240
+ shutil.copy2(src, os.path.join(theme_static, item))
154241
155242
_write_minimal_hugo_toml(output_dir, theme_name)
156243
157244
# Create minimal content
158245
content_dir = os.path.join(output_dir, 'content')
159246
--- hugoifier/utils/complete.py
+++ hugoifier/utils/complete.py
@@ -9,12 +9,12 @@
9 import os
10 import shutil
11 from pathlib import Path
12
13 from .decapify import decapify
14 from .hugoify import hugoify_html
15 from .theme_finder import find_hugo_theme, find_raw_html_files
16 from .theme_patcher import patch_config, patch_theme
17
18
19 def complete(
20 input_path: str,
@@ -41,16 +41,21 @@
41 branding = {'cms_name': cms_name, 'cms_logo': cms_logo, 'cms_color': cms_color}
42 info = find_hugo_theme(input_path)
43
44 if info:
45 return _assemble_hugo_site(info, output_dir, branding)
46 else:
47 # Raw HTML path
48 html_files = find_raw_html_files(input_path)
49 if not html_files:
50 raise ValueError(f"No Hugo theme or HTML files found in {input_path}")
51 return _convert_raw_html(input_path, html_files, output_dir, branding)
 
 
 
 
 
52
53
54 # ---------------------------------------------------------------------------
55 # Hugo theme path
56 # ---------------------------------------------------------------------------
@@ -111,10 +116,87 @@
111
112 logging.info(f"Done. Site ready at: {output_dir}")
113 logging.info(f"Run: cd {output_dir} && hugo serve")
114 return output_dir
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
117 # ---------------------------------------------------------------------------
118 # Raw HTML path
119 # ---------------------------------------------------------------------------
120
@@ -126,13 +208,13 @@
126 if output_dir is None:
127 output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
128
129 logging.info(f"Converting raw HTML theme: {theme_name}")
130
131 # Use AI to convert the main HTML file to Hugo layouts
132 main_html = _pick_main_html(html_files)
133 logging.info(f"Converting {main_html} ...")
134 hugo_layouts = hugoify_html(main_html)
135
136 os.makedirs(output_dir, exist_ok=True)
137
138 # Write converted layouts
@@ -144,15 +226,20 @@
144 dest = os.path.join(theme_layouts_dir, filename)
145 os.makedirs(os.path.dirname(dest), exist_ok=True)
146 with open(dest, 'w') as f:
147 f.write(content)
148
149 # Copy CSS/JS/images
150 for ext_dir in ('css', 'js', 'images', 'img', 'assets', 'fonts'):
151 src = os.path.join(input_path, ext_dir)
152 if os.path.isdir(src):
153 _copy_dir(src, os.path.join(output_dir, 'themes', theme_name, 'static', ext_dir))
 
 
 
 
 
154
155 _write_minimal_hugo_toml(output_dir, theme_name)
156
157 # Create minimal content
158 content_dir = os.path.join(output_dir, 'content')
159
--- hugoifier/utils/complete.py
+++ hugoifier/utils/complete.py
@@ -9,12 +9,12 @@
9 import os
10 import shutil
11 from pathlib import Path
12
13 from .decapify import decapify
14 from .hugoify import hugoify_html, hugoify_nextjs
15 from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
16 from .theme_patcher import patch_config, patch_theme
17
18
19 def complete(
20 input_path: str,
@@ -41,16 +41,21 @@
41 branding = {'cms_name': cms_name, 'cms_logo': cms_logo, 'cms_color': cms_color}
42 info = find_hugo_theme(input_path)
43
44 if info:
45 return _assemble_hugo_site(info, output_dir, branding)
46
47 # Next.js path (check before raw HTML since Next.js projects may contain .html files)
48 nextjs_info = find_nextjs_app(input_path)
49 if nextjs_info:
50 return _convert_nextjs(input_path, nextjs_info, output_dir, branding)
51
52 # Raw HTML path
53 html_files = find_raw_html_files(input_path)
54 if not html_files:
55 raise ValueError(f"No Hugo theme, Next.js app, or HTML files found in {input_path}")
56 return _convert_raw_html(input_path, html_files, output_dir, branding)
57
58
59 # ---------------------------------------------------------------------------
60 # Hugo theme path
61 # ---------------------------------------------------------------------------
@@ -111,10 +116,87 @@
116
117 logging.info(f"Done. Site ready at: {output_dir}")
118 logging.info(f"Run: cd {output_dir} && hugo serve")
119 return output_dir
120
121
122 # ---------------------------------------------------------------------------
123 # Next.js path
124 # ---------------------------------------------------------------------------
125
126 def _convert_nextjs(
127 input_path: str, nextjs_info: dict, output_dir: str = None, branding: dict = None
128 ) -> str:
129 app_dir = nextjs_info['app_dir']
130 theme_name = nextjs_info.get('app_name', os.path.basename(os.path.abspath(input_path)))
131
132 if output_dir is None:
133 output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
134
135 logging.info(f"Converting Next.js app: {theme_name}")
136
137 # Convert: capture rendered HTML if dev server running, else AI fallback
138 hugo_layouts = hugoify_nextjs(nextjs_info)
139
140 os.makedirs(output_dir, exist_ok=True)
141
142 # Extract captured CSS if present (from rendered HTML capture)
143 captured_css = hugo_layouts.pop('_captured_css', {})
144
145 # Write converted layouts
146 theme_layouts_dir = os.path.join(output_dir, 'themes', theme_name, 'layouts')
147 os.makedirs(os.path.join(theme_layouts_dir, '_default'), exist_ok=True)
148 os.makedirs(os.path.join(theme_layouts_dir, 'partials'), exist_ok=True)
149
150 for filename, content in hugo_layouts.items():
151 # Fix common AI mistake: partial "partials/X.html" → partial "X.html"
152 if isinstance(content, str):
153 content = content.replace('partial "partials/', 'partial "')
154 dest = os.path.join(theme_layouts_dir, filename)
155 os.makedirs(os.path.dirname(dest), exist_ok=True)
156 with open(dest, 'w') as f:
157 f.write(content)
158
159 # Copy public/ assets to theme static/
160 public_dir = os.path.join(app_dir, 'public')
161 theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
162 if os.path.isdir(public_dir):
163 _copy_dir(public_dir, theme_static)
164 logging.info("Copied public/ assets to static/")
165
166 # Write captured CSS (from rendered HTML capture)
167 css_dest = os.path.join(theme_static, 'css')
168 os.makedirs(css_dest, exist_ok=True)
169 for css_name, css_content in captured_css.items():
170 with open(os.path.join(css_dest, css_name), 'w') as f:
171 f.write(css_content)
172 logging.info(f"Wrote captured CSS: {css_name}")
173
174 # Also copy source CSS files (globals.css etc.)
175 for css_file in nextjs_info.get('css_files', []):
176 if os.path.isfile(css_file):
177 shutil.copy2(css_file, os.path.join(css_dest, os.path.basename(css_file)))
178 logging.info("Copied CSS files")
179
180 _write_minimal_hugo_toml(output_dir, theme_name)
181
182 # Create minimal content
183 content_dir = os.path.join(output_dir, 'content')
184 os.makedirs(content_dir, exist_ok=True)
185 with open(os.path.join(content_dir, '_index.md'), 'w') as f:
186 f.write('---\ntitle: Home\n---\n')
187
188 b = branding or {}
189 decapify(
190 output_dir,
191 cms_name=b.get('cms_name'), cms_logo=b.get('cms_logo'), cms_color=b.get('cms_color'),
192 )
193
194 logging.info(f"Done. Site ready at: {output_dir}")
195 logging.info(f"Run: cd {output_dir} && hugo serve")
196 return output_dir
197
198
199 # ---------------------------------------------------------------------------
200 # Raw HTML path
201 # ---------------------------------------------------------------------------
202
@@ -126,13 +208,13 @@
208 if output_dir is None:
209 output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
210
211 logging.info(f"Converting raw HTML theme: {theme_name}")
212
213 # Direct HTML extraction — use the actual HTML as-is, no AI reinterpretation
214 main_html = _pick_main_html(html_files)
215 logging.info(f"Extracting {main_html} ...")
216 hugo_layouts = hugoify_html(main_html)
217
218 os.makedirs(output_dir, exist_ok=True)
219
220 # Write converted layouts
@@ -144,15 +226,20 @@
226 dest = os.path.join(theme_layouts_dir, filename)
227 os.makedirs(os.path.dirname(dest), exist_ok=True)
228 with open(dest, 'w') as f:
229 f.write(content)
230
231 # Copy ALL static assets from the HTML theme directory
232 theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
233 for item in os.listdir(input_path):
234 src = os.path.join(input_path, item)
235 if os.path.isdir(src) and item not in ('__MACOSX', '.git', 'node_modules'):
236 _copy_dir(src, os.path.join(theme_static, item))
237 elif os.path.isfile(src) and not item.endswith('.html'):
238 # Copy non-HTML files (images, fonts, etc.) to static root
239 os.makedirs(theme_static, exist_ok=True)
240 shutil.copy2(src, os.path.join(theme_static, item))
241
242 _write_minimal_hugo_toml(output_dir, theme_name)
243
244 # Create minimal content
245 content_dir = os.path.join(output_dir, 'content')
246
--- hugoifier/utils/hugoify.py
+++ hugoifier/utils/hugoify.py
@@ -1,10 +1,11 @@
11
"""
22
AI-powered HTML → Hugo template conversion.
33
44
For already-Hugo themes, use hugoify_dir() to validate/augment.
55
For raw HTML, use hugoify_html() to produce Hugo layout files.
6
+For Next.js apps, use hugoify_nextjs() to convert React components to Hugo layouts.
67
"""
78
89
import json
910
import logging
1011
import os
@@ -15,60 +16,421 @@
1516
SYSTEM = (
1617
"You are an expert Hugo theme developer. Convert HTML templates to valid Hugo Go template files. "
1718
"Output only valid Hugo template syntax — no explanations, no markdown fences."
1819
)
1920
21
+NEXTJS_SYSTEM = (
22
+ "You are an expert at converting React/Next.js components to Hugo Go template files. "
23
+ "You understand JSX, TSX, React component composition, and Hugo template syntax. "
24
+ "Convert React components to static Hugo HTML templates, preserving all CSS classes and visual structure. "
25
+ "Output only valid Hugo template syntax — no explanations, no markdown fences."
26
+)
27
+
2028
2129
def hugoify_html(html_path: str) -> dict:
2230
"""
2331
Convert a raw HTML file to a set of Hugo layout files.
2432
25
- Returns dict mapping relative layout paths to their content, e.g.:
26
- {
27
- "_default/baseof.html": "<!DOCTYPE html>...",
28
- "partials/header.html": "<header>...",
29
- "partials/footer.html": "<footer>...",
30
- "index.html": "{{ define \"main\" }}...",
31
- }
33
+ Uses direct HTML extraction (no AI) to preserve content exactly as-is.
34
+ Splits the HTML into Hugo's baseof.html (head/shell) and index.html (body content).
35
+
36
+ Returns dict mapping relative layout paths to their content.
3237
"""
3338
logging.info(f"Hugoifying {html_path} ...")
3439
3540
with open(html_path, 'r', errors='replace') as f:
3641
html = f.read()
3742
38
- # Truncate very large files to avoid token limits
39
- if len(html) > 30000:
40
- logging.warning(f"HTML is large ({len(html)} chars), truncating to 30000 for AI analysis")
41
- html = html[:30000]
42
-
43
- prompt = f"""Convert the following HTML file into Hugo layout files.
44
-
45
-Return a JSON object where keys are relative file paths under layouts/ and values are the Hugo template content.
46
-
47
-Required keys to produce:
48
-- "_default/baseof.html" — base template with blocks for head, header, main, footer
49
-- "partials/header.html" — site header/nav extracted as partial
50
-- "partials/footer.html" — footer extracted as partial
51
-- "index.html" — homepage using {{ define "main" }} ... {{ end }}
52
-
53
-Rules:
54
-- Replace hardcoded page titles with {{ .Title }}
55
-- Replace hardcoded site name with {{ .Site.Title }}
56
-- Replace hardcoded URLs with {{ .Site.BaseURL }} or {{ .Permalink }}
57
-- Replace nav links with {{ range .Site.Menus.main }}<a href="{{ .URL }}">{{ .Name }}</a>{{ end }}
58
-- Replace blog post lists with {{ range .Pages }} ... {{ end }}
59
-- Replace copyright year with {{ now.Year }}
60
-- Keep all CSS classes and HTML structure intact
61
-- Use {{ partial "header.html" . }} and {{ partial "footer.html" . }} in baseof.html
62
-
63
-HTML to convert:
64
-{html}
65
-
66
-Return ONLY a valid JSON object, no explanation."""
67
-
68
- response = call_ai(prompt, SYSTEM)
69
- return _parse_layout_json(response)
43
+ logging.info(f"Read {len(html)} chars from {html_path}")
44
+
45
+ # Extract <head> content (CSS links, meta, fonts, etc.)
46
+ head_extras = _extract_head_content(html)
47
+
48
+ # Extract and rewrite CSS/JS paths to be relative to Hugo static/
49
+ css_links = re.findall(r'<link[^>]+rel=["\']stylesheet["\'][^>]*/?>',
50
+ html, re.DOTALL | re.IGNORECASE)
51
+ js_links = re.findall(r'<script[^>]+src=["\'][^"\']+["\'][^>]*>.*?</script>',
52
+ html, re.DOTALL)
53
+
54
+ # Extract <body> content
55
+ body_match = re.search(r'<body[^>]*>(.*?)</body>', html, re.DOTALL)
56
+ body_content = body_match.group(1).strip() if body_match else html
57
+
58
+ # Extract body attributes (class, style, etc.)
59
+ body_attrs_match = re.search(r'<body([^>]*)>', html)
60
+ body_attrs = body_attrs_match.group(1).strip() if body_attrs_match else ''
61
+
62
+ # Build baseof.html preserving the original <head> structure
63
+ head_match = re.search(r'<head[^>]*>(.*?)</head>', html, re.DOTALL)
64
+ if head_match:
65
+ head_content = head_match.group(1).strip()
66
+ # Replace hardcoded <title> with Hugo template
67
+ head_content = re.sub(
68
+ r'<title>[^<]*</title>',
69
+ '<title>{{ if .IsHome }}{{ .Site.Title }}{{ else }}{{ .Title }} | {{ .Site.Title }}{{ end }}</title>',
70
+ head_content
71
+ )
72
+ baseof = f'''<!DOCTYPE html>
73
+<html lang="{{{{ with .Site.LanguageCode }}}}{{{{ . }}}}{{{{ else }}}}en{{{{ end }}}}">
74
+<head>
75
+{head_content}
76
+</head>
77
+<body{" " + body_attrs if body_attrs else ""}>
78
+ {{{{- block "main" . }}}}{{{{- end }}}}
79
+</body>
80
+</html>'''
81
+ else:
82
+ baseof = _fallback_baseof()
83
+
84
+ index_html = f'{{{{ define "main" }}}}\n{body_content}\n{{{{ end }}}}'
85
+
86
+ layouts = {
87
+ "_default/baseof.html": baseof,
88
+ "index.html": index_html,
89
+ }
90
+
91
+ logging.info(f"Extracted {len(layouts)} layout files directly from HTML (no AI)")
92
+ return layouts
93
+
94
+
95
+def hugoify_nextjs(info: dict, dev_url: str = None) -> dict:
96
+ """
97
+ Convert a Next.js app to a set of Hugo layout files.
98
+
99
+ If dev_url is provided (or auto-detected), captures the actual rendered HTML
100
+ from the running Next.js dev server for pixel-perfect conversion.
101
+ Otherwise falls back to AI-powered TSX source conversion.
102
+
103
+ Args:
104
+ info: dict from find_nextjs_app() with app_dir, router_type, etc.
105
+ dev_url: URL of a running Next.js dev server (e.g. http://localhost:3000)
106
+
107
+ Returns:
108
+ dict mapping relative layout paths to their content, plus
109
+ a '_captured_assets' key with any downloaded CSS/JS files.
110
+ """
111
+ app_dir = info['app_dir']
112
+ logging.info(f"Hugoifying Next.js app at {app_dir} ...")
113
+
114
+ # Try to auto-detect a running dev server
115
+ if not dev_url:
116
+ dev_url = _detect_nextjs_server(info)
117
+
118
+ if dev_url:
119
+ return _capture_rendered_html(dev_url, info)
120
+
121
+ # Fallback: AI-powered source conversion (less faithful)
122
+ return _ai_convert_nextjs_sources(info)
123
+
124
+
125
+def _detect_nextjs_server(info: dict) -> str | None:
126
+ """Check if a Next.js dev server is running on common ports."""
127
+ import urllib.request
128
+ for port in [3000, 3001, 3002]:
129
+ url = f"http://localhost:{port}"
130
+ try:
131
+ req = urllib.request.Request(url, method='HEAD')
132
+ resp = urllib.request.urlopen(req, timeout=2)
133
+ if resp.status == 200:
134
+ logging.info(f"Detected running Next.js server at {url}")
135
+ return url
136
+ except Exception:
137
+ continue
138
+ return None
139
+
140
+
141
+def _capture_rendered_html(dev_url: str, info: dict) -> dict:
142
+ """
143
+ Capture the actual server-rendered HTML from a running Next.js app
144
+ and convert it into Hugo layout files. This gives pixel-perfect results.
145
+ """
146
+ import urllib.request
147
+ import urllib.parse
148
+
149
+ logging.info(f"Capturing rendered HTML from {dev_url} ...")
150
+
151
+ # Fetch the full rendered page
152
+ resp = urllib.request.urlopen(dev_url)
153
+ html = resp.read().decode('utf-8')
154
+ logging.info(f"Captured {len(html)} chars of rendered HTML")
155
+
156
+ # Download compiled CSS
157
+ css_urls = re.findall(r'href="(/_next/static/[^"]+\.css)"', html)
158
+ captured_css = {}
159
+ for css_path in css_urls:
160
+ css_url = f"{dev_url}{css_path}"
161
+ try:
162
+ css_resp = urllib.request.urlopen(css_url)
163
+ css_content = css_resp.read().decode('utf-8')
164
+ captured_css['compiled.css'] = css_content
165
+ logging.info(f"Captured CSS: {len(css_content)} chars")
166
+ break # Usually just one CSS file
167
+ except Exception as e:
168
+ logging.warning(f"Failed to fetch CSS {css_url}: {e}")
169
+
170
+ # Strip Next.js scripts, dev tooling, and React hydration markers
171
+ body_html = _extract_and_clean_body(html)
172
+
173
+ # Extract <head> content we want to keep (fonts, meta, etc.)
174
+ head_extras = _extract_head_content(html)
175
+
176
+ # Build Hugo layouts
177
+ baseof = f'''<!DOCTYPE html>
178
+<html lang="en">
179
+<head>
180
+ <meta charset="utf-8">
181
+ <meta name="viewport" content="width=device-width, initial-scale=1">
182
+ <title>{{{{ if .IsHome }}}}{{{{ .Site.Title }}}}{{{{ else }}}}{{{{ .Title }}}} | {{{{ .Site.Title }}}}{{{{ end }}}}</title>
183
+{head_extras}
184
+ <link rel="stylesheet" href="/css/compiled.css">
185
+ <link rel="stylesheet" href="/css/globals.css">
186
+</head>
187
+<body class="antialiased">
188
+ {{{{- block "main" . }}}}{{{{- end }}}}
189
+</body>
190
+</html>'''
191
+
192
+ index_html = f'{{{{ define "main" }}}}\n{body_html}\n{{{{ end }}}}'
193
+
194
+ layouts = {
195
+ "_default/baseof.html": baseof,
196
+ "index.html": index_html,
197
+ }
198
+
199
+ # Attach captured CSS as metadata for the pipeline to handle
200
+ if captured_css:
201
+ layouts['_captured_css'] = captured_css
202
+
203
+ return layouts
204
+
205
+
206
+def _extract_and_clean_body(html: str) -> str:
207
+ """Extract <body> content and strip Next.js scripts/dev tooling."""
208
+ # Extract body content
209
+ body_match = re.search(r'<body[^>]*>(.*?)</body>', html, re.DOTALL)
210
+ if not body_match:
211
+ return html
212
+
213
+ body = body_match.group(1)
214
+
215
+ # Strip all <script> tags (Next.js runtime, React hydration, HMR, etc.)
216
+ body = re.sub(r'<script\b[^>]*>.*?</script>', '', body, flags=re.DOTALL)
217
+ body = re.sub(r'<script\b[^>]*/?>', '', body)
218
+
219
+ # Strip Next.js dev overlay and error boundary elements
220
+ body = re.sub(r'<next-route-announcer[^>]*>.*?</next-route-announcer>', '', body, flags=re.DOTALL)
221
+ body = re.sub(r'<nextjs-portal[^>]*>.*?</nextjs-portal>', '', body, flags=re.DOTALL)
222
+
223
+ # Strip data-reactroot, data-nextjs, and other React/Next.js attributes
224
+ body = re.sub(r'\s*data-(?:reactroot|nextjs[^=]*|rsc[^=]*)(?:="[^"]*")?', '', body)
225
+
226
+ # Fix FadeIn components: they render with opacity:0 and translateY(32px)
227
+ # because the IntersectionObserver JS isn't running. Force them visible.
228
+ body = re.sub(r'opacity:\s*0', 'opacity:1', body)
229
+ body = re.sub(r'translateY\(32px\)', 'translateY(0px)', body)
230
+
231
+ # Replace /_next/static/ asset references with /static/ for Hugo
232
+ body = re.sub(r'/_next/static/media/([^"]+)', r'/\1', body)
233
+
234
+ return body.strip()
235
+
236
+
237
+def _extract_head_content(html: str) -> str:
238
+ """Extract useful <head> elements (fonts, preloads) from rendered HTML."""
239
+ head_match = re.search(r'<head[^>]*>(.*?)</head>', html, re.DOTALL)
240
+ if not head_match:
241
+ return ""
242
+
243
+ head = head_match.group(1)
244
+ lines = []
245
+
246
+ # Keep font preload/stylesheet links
247
+ for match in re.finditer(r'<link[^>]+(?:fonts\.googleapis|fonts\.gstatic|preload[^>]+font)[^>]*/?>',
248
+ head, re.DOTALL):
249
+ lines.append(f" {match.group(0)}")
250
+
251
+ # Keep image preloads
252
+ for match in re.finditer(r'<link[^>]+rel="preload"[^>]+as="image"[^>]*/?>',
253
+ head, re.DOTALL):
254
+ tag = match.group(0)
255
+ # Fix /_next paths to local paths
256
+ tag = re.sub(r'/_next/static/media/', '/', tag)
257
+ lines.append(f" {tag}")
258
+
259
+ return "\n".join(lines)
260
+
261
+
262
+def _ai_convert_nextjs_sources(info: dict) -> dict:
263
+ """
264
+ Fallback: AI-powered conversion from TSX source files.
265
+ Used when no running dev server is available.
266
+ """
267
+ sources = _collect_nextjs_sources(info)
268
+ if not sources:
269
+ logging.warning("No source files collected from Next.js app")
270
+ return _fallback_layouts()
271
+
272
+ layouts = {}
273
+
274
+ # Identify component vs structural files
275
+ component_sources = {}
276
+ layout_sources = {}
277
+ for rel_path, content in sources.items():
278
+ if rel_path.endswith('.css'):
279
+ continue
280
+ elif 'layout.' in rel_path or 'page.' in rel_path:
281
+ layout_sources[rel_path] = content
282
+ else:
283
+ component_sources[rel_path] = content
284
+
285
+ # Convert each component individually
286
+ for rel_path, content in component_sources.items():
287
+ basename = os.path.splitext(os.path.basename(rel_path))[0]
288
+ partial_name = f"partials/{basename}.html"
289
+ logging.info(f" Converting {rel_path} → {partial_name}")
290
+ html = _convert_single_component(basename, content)
291
+ if html:
292
+ layouts[partial_name] = html
293
+
294
+ # Build baseof and index
295
+ partial_names = [os.path.splitext(os.path.basename(k))[0] for k in layouts.keys()]
296
+ baseof, index_html = _convert_layout_and_page(layout_sources, component_sources, partial_names)
297
+ layouts["_default/baseof.html"] = baseof
298
+ layouts["index.html"] = index_html
299
+
300
+ logging.info(f"Generated {len(layouts)} layout files via AI conversion")
301
+ return layouts
302
+
303
+
304
+_COMPONENT_PROMPT = """Convert this React/Next.js component to static Hugo-compatible HTML.
305
+
306
+CRITICAL RULES:
307
+- Output ONLY the raw HTML. No markdown fences, no explanation, no JSON wrapping.
308
+- Convert ALL JSX `className` to HTML `class`
309
+- Unroll ALL `.map()` calls into full static HTML — every single item
310
+- Preserve EVERY Tailwind CSS class and inline style EXACTLY
311
+- Preserve ALL text content — do NOT summarize or shorten
312
+- Preserve ALL SVG content inline
313
+- Strip React hooks and event handlers, keep static HTML structure
314
+
315
+Component name: {name}
316
+
317
+Source code:
318
+{source}"""
319
+
320
+
321
+def _convert_single_component(name: str, source: str) -> str | None:
322
+ """Convert a single React component to Hugo-compatible HTML via AI."""
323
+ prompt = _COMPONENT_PROMPT.format(name=name, source=source)
324
+ try:
325
+ response = call_ai(prompt, NEXTJS_SYSTEM, max_tokens=16384)
326
+ html = re.sub(r'^```(?:html)?\s*', '', response.strip())
327
+ html = re.sub(r'```\s*$', '', html.strip())
328
+ return html
329
+ except Exception as e:
330
+ logging.warning(f"Failed to convert component {name}: {e}")
331
+ return None
332
+
333
+
334
+def _convert_layout_and_page(layout_sources, component_sources, partial_names):
335
+ """Build baseof.html and index.html from layout files and partial list."""
336
+ partial_includes = "\n".join(
337
+ f' {{{{ partial "{name}.html" . }}}}' for name in partial_names
338
+ )
339
+ baseof = _fallback_baseof()
340
+ index_html = f'{{% define "main" %}}\n<div class="bg-[#121517] flex flex-col w-full">\n{partial_includes}\n</div>\n{{% end %}}'
341
+ return baseof, index_html
342
+
343
+
344
+def _collect_nextjs_sources(info: dict) -> dict:
345
+ """
346
+ Collect relevant source files from a Next.js app into a dict
347
+ keyed by relative path. Applies priority-based context budgeting.
348
+ """
349
+ app_dir = info['app_dir']
350
+ sources = {}
351
+ budget = 80000
352
+
353
+ # Tier 1: Layout and page entry points (always include)
354
+ tier1 = []
355
+ if info.get('layout_file'):
356
+ tier1.append(info['layout_file'])
357
+ if info.get('page_file'):
358
+ tier1.append(info['page_file'])
359
+
360
+ # Tier 2: Section-level components (most important for structure)
361
+ tier2 = []
362
+ # Tier 3: Page components
363
+ tier3 = []
364
+ # Tier 4: UI/marketing components
365
+ tier4 = []
366
+ # Tier 5: CSS and config
367
+ tier5 = list(info.get('css_files', []))
368
+
369
+ # Walk source directories looking for components
370
+ for search_root in [os.path.join(app_dir, 'src'), os.path.join(app_dir, 'app'), app_dir]:
371
+ if not os.path.isdir(search_root):
372
+ continue
373
+ for root, dirs, files in os.walk(search_root):
374
+ # Skip junk
375
+ dirs[:] = [d for d in dirs if d not in ('node_modules', '.next', '__MACOSX', '.git', '__tests__')]
376
+ for f in files:
377
+ if not f.endswith(('.tsx', '.jsx', '.ts', '.js')):
378
+ continue
379
+ full = os.path.join(root, f)
380
+ # Skip test files, config files, API routes
381
+ if '.test.' in f or '.spec.' in f:
382
+ continue
383
+ if '/api/' in full:
384
+ continue
385
+ # Skip files already in tier 1
386
+ if full in tier1:
387
+ continue
388
+
389
+ rel = os.path.relpath(root, app_dir)
390
+ basename = f.lower()
391
+
392
+ if 'section' in basename or 'section' in rel.lower():
393
+ tier2.append(full)
394
+ elif 'page' in basename and 'page' not in rel.lower().split('app')[-1:]:
395
+ tier3.append(full)
396
+ elif any(k in rel.lower() for k in ('components', 'marketing')):
397
+ tier4.append(full)
398
+
399
+ # Assemble by priority, tracking budget
400
+ used = 0
401
+ for tier_files in [tier1, tier2, tier3, tier4, tier5]:
402
+ for fpath in tier_files:
403
+ if not os.path.isfile(fpath):
404
+ continue
405
+ try:
406
+ with open(fpath, 'r', errors='replace') as fh:
407
+ content = fh.read()
408
+ except OSError:
409
+ continue
410
+
411
+ rel_path = os.path.relpath(fpath, app_dir)
412
+ # Skip if already collected (dedup across tiers)
413
+ if rel_path in sources:
414
+ continue
415
+
416
+ # Truncate individual large files
417
+ if len(content) > 8000:
418
+ content = content[:8000] + '\n// ... [truncated]'
419
+
420
+ if used + len(content) > budget:
421
+ remaining = budget - used
422
+ if remaining > 500:
423
+ content = content[:remaining] + '\n// ... [truncated - budget]'
424
+ sources[rel_path] = content
425
+ used += len(content)
426
+ break
427
+ sources[rel_path] = content
428
+ used += len(content)
429
+
430
+ logging.info(f"Collected {len(sources)} source files ({used} chars) from Next.js app")
431
+ return sources
70432
71433
72434
def hugoify_dir(theme_dir: str) -> str:
73435
"""
74436
Validate and optionally augment an existing Hugo theme directory.
@@ -101,18 +463,24 @@
101463
# CLI entry point (used by cli.py)
102464
def hugoify(path: str) -> str:
103465
"""
104466
Entry point for the CLI 'hugoify' command.
105467
If path is a Hugo theme dir: validate it.
468
+ If path is a Next.js app: convert React components to Hugo.
106469
If path is an HTML file or raw HTML dir: convert it.
107470
"""
108
- from .theme_finder import find_hugo_theme, find_raw_html_files
471
+ from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
109472
110473
info = find_hugo_theme(path)
111474
if info:
112475
return hugoify_dir(info['theme_dir'])
113476
477
+ nextjs_info = find_nextjs_app(path)
478
+ if nextjs_info:
479
+ layouts = hugoify_nextjs(nextjs_info)
480
+ return f"Converted Next.js app to {len(layouts)} layout files: {list(layouts.keys())}"
481
+
114482
if os.path.isfile(path) and path.endswith('.html'):
115483
layouts = hugoify_html(path)
116484
return f"Converted to {len(layouts)} layout files: {list(layouts.keys())}"
117485
118486
html_files = find_raw_html_files(path)
@@ -130,21 +498,72 @@
130498
# ---------------------------------------------------------------------------
131499
# Helpers
132500
# ---------------------------------------------------------------------------
133501
134502
def _parse_layout_json(response: str) -> dict:
135
- """Extract JSON from AI response, even if surrounded by prose."""
136
- # Try to find JSON block
137
- match = re.search(r'\{.*\}', response, re.DOTALL)
503
+ """Extract JSON from AI response, even if surrounded by prose or markdown fences."""
504
+ # Strip markdown fences if present
505
+ stripped = re.sub(r'```(?:json)?\s*', '', response)
506
+ stripped = re.sub(r'```\s*$', '', stripped.strip())
507
+
508
+ # Try the full stripped response as JSON first
509
+ try:
510
+ result = json.loads(stripped)
511
+ if isinstance(result, dict):
512
+ logging.info(f"Parsed {len(result)} layout files from AI response")
513
+ return result
514
+ except json.JSONDecodeError:
515
+ pass
516
+
517
+ # Try to find JSON block (outermost braces)
518
+ match = re.search(r'\{.*\}', stripped, re.DOTALL)
138519
if match:
139520
try:
140
- return json.loads(match.group(0))
521
+ result = json.loads(match.group(0))
522
+ if isinstance(result, dict):
523
+ logging.info(f"Parsed {len(result)} layout files from AI response (extracted)")
524
+ return result
141525
except json.JSONDecodeError:
142526
pass
527
+
528
+ # AI sometimes uses backtick-delimited values instead of JSON strings.
529
+ # Parse with a regex-based key-value extractor.
530
+ backtick_result = _parse_backtick_json(match.group(0))
531
+ if backtick_result:
532
+ logging.info(f"Parsed {len(backtick_result)} layout files from backtick-delimited response")
533
+ return backtick_result
143534
144535
# Fallback: return a minimal layout
145536
logging.warning("Could not parse AI response as JSON, using fallback layouts")
537
+ logging.debug(f"AI response was: {response[:500]!r}")
538
+ return {
539
+ "_default/baseof.html": _fallback_baseof(),
540
+ "partials/header.html": "<header><!-- header --></header>",
541
+ "partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
542
+ "index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
543
+ }
544
+
545
+
546
+def _parse_backtick_json(text: str) -> dict | None:
547
+ """
548
+ Parse a JSON-like object where values are backtick-delimited template literals
549
+ instead of proper JSON strings. This happens when the AI uses JS template syntax.
550
+ e.g.: { "key": `<html>...</html>` }
551
+ """
552
+ result = {}
553
+ # Match "key": `value` pairs where value can span multiple lines
554
+ pattern = re.compile(r'"([^"]+)"\s*:\s*`(.*?)`(?:\s*[,}])', re.DOTALL)
555
+ for m in pattern.finditer(text):
556
+ key = m.group(1)
557
+ value = m.group(2).strip()
558
+ result[key] = value
559
+
560
+ return result if result else None
561
+
562
+
563
+def _fallback_layouts() -> dict:
564
+ """Minimal fallback when source collection fails."""
146565
return {
147566
"_default/baseof.html": _fallback_baseof(),
148567
"partials/header.html": "<header><!-- header --></header>",
149568
"partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
150569
"index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
151570
--- hugoifier/utils/hugoify.py
+++ hugoifier/utils/hugoify.py
@@ -1,10 +1,11 @@
1 """
2 AI-powered HTML → Hugo template conversion.
3
4 For already-Hugo themes, use hugoify_dir() to validate/augment.
5 For raw HTML, use hugoify_html() to produce Hugo layout files.
 
6 """
7
8 import json
9 import logging
10 import os
@@ -15,60 +16,421 @@
15 SYSTEM = (
16 "You are an expert Hugo theme developer. Convert HTML templates to valid Hugo Go template files. "
17 "Output only valid Hugo template syntax — no explanations, no markdown fences."
18 )
19
 
 
 
 
 
 
 
20
21 def hugoify_html(html_path: str) -> dict:
22 """
23 Convert a raw HTML file to a set of Hugo layout files.
24
25 Returns dict mapping relative layout paths to their content, e.g.:
26 {
27 "_default/baseof.html": "<!DOCTYPE html>...",
28 "partials/header.html": "<header>...",
29 "partials/footer.html": "<footer>...",
30 "index.html": "{{ define \"main\" }}...",
31 }
32 """
33 logging.info(f"Hugoifying {html_path} ...")
34
35 with open(html_path, 'r', errors='replace') as f:
36 html = f.read()
37
38 # Truncate very large files to avoid token limits
39 if len(html) > 30000:
40 logging.warning(f"HTML is large ({len(html)} chars), truncating to 30000 for AI analysis")
41 html = html[:30000]
42
43 prompt = f"""Convert the following HTML file into Hugo layout files.
44
45 Return a JSON object where keys are relative file paths under layouts/ and values are the Hugo template content.
46
47 Required keys to produce:
48 - "_default/baseof.html" — base template with blocks for head, header, main, footer
49 - "partials/header.html" — site header/nav extracted as partial
50 - "partials/footer.html" — footer extracted as partial
51 - "index.html" — homepage using {{ define "main" }} ... {{ end }}
52
53 Rules:
54 - Replace hardcoded page titles with {{ .Title }}
55 - Replace hardcoded site name with {{ .Site.Title }}
56 - Replace hardcoded URLs with {{ .Site.BaseURL }} or {{ .Permalink }}
57 - Replace nav links with {{ range .Site.Menus.main }}<a href="{{ .URL }}">{{ .Name }}</a>{{ end }}
58 - Replace blog post lists with {{ range .Pages }} ... {{ end }}
59 - Replace copyright year with {{ now.Year }}
60 - Keep all CSS classes and HTML structure intact
61 - Use {{ partial "header.html" . }} and {{ partial "footer.html" . }} in baseof.html
62
63 HTML to convert:
64 {html}
65
66 Return ONLY a valid JSON object, no explanation."""
67
68 response = call_ai(prompt, SYSTEM)
69 return _parse_layout_json(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
71
72 def hugoify_dir(theme_dir: str) -> str:
73 """
74 Validate and optionally augment an existing Hugo theme directory.
@@ -101,18 +463,24 @@
101 # CLI entry point (used by cli.py)
102 def hugoify(path: str) -> str:
103 """
104 Entry point for the CLI 'hugoify' command.
105 If path is a Hugo theme dir: validate it.
 
106 If path is an HTML file or raw HTML dir: convert it.
107 """
108 from .theme_finder import find_hugo_theme, find_raw_html_files
109
110 info = find_hugo_theme(path)
111 if info:
112 return hugoify_dir(info['theme_dir'])
113
 
 
 
 
 
114 if os.path.isfile(path) and path.endswith('.html'):
115 layouts = hugoify_html(path)
116 return f"Converted to {len(layouts)} layout files: {list(layouts.keys())}"
117
118 html_files = find_raw_html_files(path)
@@ -130,21 +498,72 @@
130 # ---------------------------------------------------------------------------
131 # Helpers
132 # ---------------------------------------------------------------------------
133
134 def _parse_layout_json(response: str) -> dict:
135 """Extract JSON from AI response, even if surrounded by prose."""
136 # Try to find JSON block
137 match = re.search(r'\{.*\}', response, re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
 
138 if match:
139 try:
140 return json.loads(match.group(0))
 
 
 
141 except json.JSONDecodeError:
142 pass
 
 
 
 
 
 
 
143
144 # Fallback: return a minimal layout
145 logging.warning("Could not parse AI response as JSON, using fallback layouts")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146 return {
147 "_default/baseof.html": _fallback_baseof(),
148 "partials/header.html": "<header><!-- header --></header>",
149 "partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
150 "index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
151
--- hugoifier/utils/hugoify.py
+++ hugoifier/utils/hugoify.py
@@ -1,10 +1,11 @@
1 """
2 AI-powered HTML → Hugo template conversion.
3
4 For already-Hugo themes, use hugoify_dir() to validate/augment.
5 For raw HTML, use hugoify_html() to produce Hugo layout files.
6 For Next.js apps, use hugoify_nextjs() to convert React components to Hugo layouts.
7 """
8
9 import json
10 import logging
11 import os
@@ -15,60 +16,421 @@
16 SYSTEM = (
17 "You are an expert Hugo theme developer. Convert HTML templates to valid Hugo Go template files. "
18 "Output only valid Hugo template syntax — no explanations, no markdown fences."
19 )
20
21 NEXTJS_SYSTEM = (
22 "You are an expert at converting React/Next.js components to Hugo Go template files. "
23 "You understand JSX, TSX, React component composition, and Hugo template syntax. "
24 "Convert React components to static Hugo HTML templates, preserving all CSS classes and visual structure. "
25 "Output only valid Hugo template syntax — no explanations, no markdown fences."
26 )
27
28
29 def hugoify_html(html_path: str) -> dict:
30 """
31 Convert a raw HTML file to a set of Hugo layout files.
32
33 Uses direct HTML extraction (no AI) to preserve content exactly as-is.
34 Splits the HTML into Hugo's baseof.html (head/shell) and index.html (body content).
35
36 Returns dict mapping relative layout paths to their content.
 
 
 
37 """
38 logging.info(f"Hugoifying {html_path} ...")
39
40 with open(html_path, 'r', errors='replace') as f:
41 html = f.read()
42
43 logging.info(f"Read {len(html)} chars from {html_path}")
44
45 # Extract <head> content (CSS links, meta, fonts, etc.)
46 head_extras = _extract_head_content(html)
47
48 # Extract and rewrite CSS/JS paths to be relative to Hugo static/
49 css_links = re.findall(r'<link[^>]+rel=["\']stylesheet["\'][^>]*/?>',
50 html, re.DOTALL | re.IGNORECASE)
51 js_links = re.findall(r'<script[^>]+src=["\'][^"\']+["\'][^>]*>.*?</script>',
52 html, re.DOTALL)
53
54 # Extract <body> content
55 body_match = re.search(r'<body[^>]*>(.*?)</body>', html, re.DOTALL)
56 body_content = body_match.group(1).strip() if body_match else html
57
58 # Extract body attributes (class, style, etc.)
59 body_attrs_match = re.search(r'<body([^>]*)>', html)
60 body_attrs = body_attrs_match.group(1).strip() if body_attrs_match else ''
61
62 # Build baseof.html preserving the original <head> structure
63 head_match = re.search(r'<head[^>]*>(.*?)</head>', html, re.DOTALL)
64 if head_match:
65 head_content = head_match.group(1).strip()
66 # Replace hardcoded <title> with Hugo template
67 head_content = re.sub(
68 r'<title>[^<]*</title>',
69 '<title>{{ if .IsHome }}{{ .Site.Title }}{{ else }}{{ .Title }} | {{ .Site.Title }}{{ end }}</title>',
70 head_content
71 )
72 baseof = f'''<!DOCTYPE html>
73 <html lang="{{{{ with .Site.LanguageCode }}}}{{{{ . }}}}{{{{ else }}}}en{{{{ end }}}}">
74 <head>
75 {head_content}
76 </head>
77 <body{" " + body_attrs if body_attrs else ""}>
78 {{{{- block "main" . }}}}{{{{- end }}}}
79 </body>
80 </html>'''
81 else:
82 baseof = _fallback_baseof()
83
84 index_html = f'{{{{ define "main" }}}}\n{body_content}\n{{{{ end }}}}'
85
86 layouts = {
87 "_default/baseof.html": baseof,
88 "index.html": index_html,
89 }
90
91 logging.info(f"Extracted {len(layouts)} layout files directly from HTML (no AI)")
92 return layouts
93
94
95 def hugoify_nextjs(info: dict, dev_url: str = None) -> dict:
96 """
97 Convert a Next.js app to a set of Hugo layout files.
98
99 If dev_url is provided (or auto-detected), captures the actual rendered HTML
100 from the running Next.js dev server for pixel-perfect conversion.
101 Otherwise falls back to AI-powered TSX source conversion.
102
103 Args:
104 info: dict from find_nextjs_app() with app_dir, router_type, etc.
105 dev_url: URL of a running Next.js dev server (e.g. http://localhost:3000)
106
107 Returns:
108 dict mapping relative layout paths to their content, plus
109 a '_captured_assets' key with any downloaded CSS/JS files.
110 """
111 app_dir = info['app_dir']
112 logging.info(f"Hugoifying Next.js app at {app_dir} ...")
113
114 # Try to auto-detect a running dev server
115 if not dev_url:
116 dev_url = _detect_nextjs_server(info)
117
118 if dev_url:
119 return _capture_rendered_html(dev_url, info)
120
121 # Fallback: AI-powered source conversion (less faithful)
122 return _ai_convert_nextjs_sources(info)
123
124
125 def _detect_nextjs_server(info: dict) -> str | None:
126 """Check if a Next.js dev server is running on common ports."""
127 import urllib.request
128 for port in [3000, 3001, 3002]:
129 url = f"http://localhost:{port}"
130 try:
131 req = urllib.request.Request(url, method='HEAD')
132 resp = urllib.request.urlopen(req, timeout=2)
133 if resp.status == 200:
134 logging.info(f"Detected running Next.js server at {url}")
135 return url
136 except Exception:
137 continue
138 return None
139
140
141 def _capture_rendered_html(dev_url: str, info: dict) -> dict:
142 """
143 Capture the actual server-rendered HTML from a running Next.js app
144 and convert it into Hugo layout files. This gives pixel-perfect results.
145 """
146 import urllib.request
147 import urllib.parse
148
149 logging.info(f"Capturing rendered HTML from {dev_url} ...")
150
151 # Fetch the full rendered page
152 resp = urllib.request.urlopen(dev_url)
153 html = resp.read().decode('utf-8')
154 logging.info(f"Captured {len(html)} chars of rendered HTML")
155
156 # Download compiled CSS
157 css_urls = re.findall(r'href="(/_next/static/[^"]+\.css)"', html)
158 captured_css = {}
159 for css_path in css_urls:
160 css_url = f"{dev_url}{css_path}"
161 try:
162 css_resp = urllib.request.urlopen(css_url)
163 css_content = css_resp.read().decode('utf-8')
164 captured_css['compiled.css'] = css_content
165 logging.info(f"Captured CSS: {len(css_content)} chars")
166 break # Usually just one CSS file
167 except Exception as e:
168 logging.warning(f"Failed to fetch CSS {css_url}: {e}")
169
170 # Strip Next.js scripts, dev tooling, and React hydration markers
171 body_html = _extract_and_clean_body(html)
172
173 # Extract <head> content we want to keep (fonts, meta, etc.)
174 head_extras = _extract_head_content(html)
175
176 # Build Hugo layouts
177 baseof = f'''<!DOCTYPE html>
178 <html lang="en">
179 <head>
180 <meta charset="utf-8">
181 <meta name="viewport" content="width=device-width, initial-scale=1">
182 <title>{{{{ if .IsHome }}}}{{{{ .Site.Title }}}}{{{{ else }}}}{{{{ .Title }}}} | {{{{ .Site.Title }}}}{{{{ end }}}}</title>
183 {head_extras}
184 <link rel="stylesheet" href="/css/compiled.css">
185 <link rel="stylesheet" href="/css/globals.css">
186 </head>
187 <body class="antialiased">
188 {{{{- block "main" . }}}}{{{{- end }}}}
189 </body>
190 </html>'''
191
192 index_html = f'{{{{ define "main" }}}}\n{body_html}\n{{{{ end }}}}'
193
194 layouts = {
195 "_default/baseof.html": baseof,
196 "index.html": index_html,
197 }
198
199 # Attach captured CSS as metadata for the pipeline to handle
200 if captured_css:
201 layouts['_captured_css'] = captured_css
202
203 return layouts
204
205
206 def _extract_and_clean_body(html: str) -> str:
207 """Extract <body> content and strip Next.js scripts/dev tooling."""
208 # Extract body content
209 body_match = re.search(r'<body[^>]*>(.*?)</body>', html, re.DOTALL)
210 if not body_match:
211 return html
212
213 body = body_match.group(1)
214
215 # Strip all <script> tags (Next.js runtime, React hydration, HMR, etc.)
216 body = re.sub(r'<script\b[^>]*>.*?</script>', '', body, flags=re.DOTALL)
217 body = re.sub(r'<script\b[^>]*/?>', '', body)
218
219 # Strip Next.js dev overlay and error boundary elements
220 body = re.sub(r'<next-route-announcer[^>]*>.*?</next-route-announcer>', '', body, flags=re.DOTALL)
221 body = re.sub(r'<nextjs-portal[^>]*>.*?</nextjs-portal>', '', body, flags=re.DOTALL)
222
223 # Strip data-reactroot, data-nextjs, and other React/Next.js attributes
224 body = re.sub(r'\s*data-(?:reactroot|nextjs[^=]*|rsc[^=]*)(?:="[^"]*")?', '', body)
225
226 # Fix FadeIn components: they render with opacity:0 and translateY(32px)
227 # because the IntersectionObserver JS isn't running. Force them visible.
228 body = re.sub(r'opacity:\s*0', 'opacity:1', body)
229 body = re.sub(r'translateY\(32px\)', 'translateY(0px)', body)
230
231 # Replace /_next/static/ asset references with /static/ for Hugo
232 body = re.sub(r'/_next/static/media/([^"]+)', r'/\1', body)
233
234 return body.strip()
235
236
237 def _extract_head_content(html: str) -> str:
238 """Extract useful <head> elements (fonts, preloads) from rendered HTML."""
239 head_match = re.search(r'<head[^>]*>(.*?)</head>', html, re.DOTALL)
240 if not head_match:
241 return ""
242
243 head = head_match.group(1)
244 lines = []
245
246 # Keep font preload/stylesheet links
247 for match in re.finditer(r'<link[^>]+(?:fonts\.googleapis|fonts\.gstatic|preload[^>]+font)[^>]*/?>',
248 head, re.DOTALL):
249 lines.append(f" {match.group(0)}")
250
251 # Keep image preloads
252 for match in re.finditer(r'<link[^>]+rel="preload"[^>]+as="image"[^>]*/?>',
253 head, re.DOTALL):
254 tag = match.group(0)
255 # Fix /_next paths to local paths
256 tag = re.sub(r'/_next/static/media/', '/', tag)
257 lines.append(f" {tag}")
258
259 return "\n".join(lines)
260
261
262 def _ai_convert_nextjs_sources(info: dict) -> dict:
263 """
264 Fallback: AI-powered conversion from TSX source files.
265 Used when no running dev server is available.
266 """
267 sources = _collect_nextjs_sources(info)
268 if not sources:
269 logging.warning("No source files collected from Next.js app")
270 return _fallback_layouts()
271
272 layouts = {}
273
274 # Identify component vs structural files
275 component_sources = {}
276 layout_sources = {}
277 for rel_path, content in sources.items():
278 if rel_path.endswith('.css'):
279 continue
280 elif 'layout.' in rel_path or 'page.' in rel_path:
281 layout_sources[rel_path] = content
282 else:
283 component_sources[rel_path] = content
284
285 # Convert each component individually
286 for rel_path, content in component_sources.items():
287 basename = os.path.splitext(os.path.basename(rel_path))[0]
288 partial_name = f"partials/{basename}.html"
289 logging.info(f" Converting {rel_path} → {partial_name}")
290 html = _convert_single_component(basename, content)
291 if html:
292 layouts[partial_name] = html
293
294 # Build baseof and index
295 partial_names = [os.path.splitext(os.path.basename(k))[0] for k in layouts.keys()]
296 baseof, index_html = _convert_layout_and_page(layout_sources, component_sources, partial_names)
297 layouts["_default/baseof.html"] = baseof
298 layouts["index.html"] = index_html
299
300 logging.info(f"Generated {len(layouts)} layout files via AI conversion")
301 return layouts
302
303
304 _COMPONENT_PROMPT = """Convert this React/Next.js component to static Hugo-compatible HTML.
305
306 CRITICAL RULES:
307 - Output ONLY the raw HTML. No markdown fences, no explanation, no JSON wrapping.
308 - Convert ALL JSX `className` to HTML `class`
309 - Unroll ALL `.map()` calls into full static HTML — every single item
310 - Preserve EVERY Tailwind CSS class and inline style EXACTLY
311 - Preserve ALL text content — do NOT summarize or shorten
312 - Preserve ALL SVG content inline
313 - Strip React hooks and event handlers, keep static HTML structure
314
315 Component name: {name}
316
317 Source code:
318 {source}"""
319
320
321 def _convert_single_component(name: str, source: str) -> str | None:
322 """Convert a single React component to Hugo-compatible HTML via AI."""
323 prompt = _COMPONENT_PROMPT.format(name=name, source=source)
324 try:
325 response = call_ai(prompt, NEXTJS_SYSTEM, max_tokens=16384)
326 html = re.sub(r'^```(?:html)?\s*', '', response.strip())
327 html = re.sub(r'```\s*$', '', html.strip())
328 return html
329 except Exception as e:
330 logging.warning(f"Failed to convert component {name}: {e}")
331 return None
332
333
334 def _convert_layout_and_page(layout_sources, component_sources, partial_names):
335 """Build baseof.html and index.html from layout files and partial list."""
336 partial_includes = "\n".join(
337 f' {{{{ partial "{name}.html" . }}}}' for name in partial_names
338 )
339 baseof = _fallback_baseof()
340 index_html = f'{{% define "main" %}}\n<div class="bg-[#121517] flex flex-col w-full">\n{partial_includes}\n</div>\n{{% end %}}'
341 return baseof, index_html
342
343
344 def _collect_nextjs_sources(info: dict) -> dict:
345 """
346 Collect relevant source files from a Next.js app into a dict
347 keyed by relative path. Applies priority-based context budgeting.
348 """
349 app_dir = info['app_dir']
350 sources = {}
351 budget = 80000
352
353 # Tier 1: Layout and page entry points (always include)
354 tier1 = []
355 if info.get('layout_file'):
356 tier1.append(info['layout_file'])
357 if info.get('page_file'):
358 tier1.append(info['page_file'])
359
360 # Tier 2: Section-level components (most important for structure)
361 tier2 = []
362 # Tier 3: Page components
363 tier3 = []
364 # Tier 4: UI/marketing components
365 tier4 = []
366 # Tier 5: CSS and config
367 tier5 = list(info.get('css_files', []))
368
369 # Walk source directories looking for components
370 for search_root in [os.path.join(app_dir, 'src'), os.path.join(app_dir, 'app'), app_dir]:
371 if not os.path.isdir(search_root):
372 continue
373 for root, dirs, files in os.walk(search_root):
374 # Skip junk
375 dirs[:] = [d for d in dirs if d not in ('node_modules', '.next', '__MACOSX', '.git', '__tests__')]
376 for f in files:
377 if not f.endswith(('.tsx', '.jsx', '.ts', '.js')):
378 continue
379 full = os.path.join(root, f)
380 # Skip test files, config files, API routes
381 if '.test.' in f or '.spec.' in f:
382 continue
383 if '/api/' in full:
384 continue
385 # Skip files already in tier 1
386 if full in tier1:
387 continue
388
389 rel = os.path.relpath(root, app_dir)
390 basename = f.lower()
391
392 if 'section' in basename or 'section' in rel.lower():
393 tier2.append(full)
394 elif 'page' in basename and 'page' not in rel.lower().split('app')[-1:]:
395 tier3.append(full)
396 elif any(k in rel.lower() for k in ('components', 'marketing')):
397 tier4.append(full)
398
399 # Assemble by priority, tracking budget
400 used = 0
401 for tier_files in [tier1, tier2, tier3, tier4, tier5]:
402 for fpath in tier_files:
403 if not os.path.isfile(fpath):
404 continue
405 try:
406 with open(fpath, 'r', errors='replace') as fh:
407 content = fh.read()
408 except OSError:
409 continue
410
411 rel_path = os.path.relpath(fpath, app_dir)
412 # Skip if already collected (dedup across tiers)
413 if rel_path in sources:
414 continue
415
416 # Truncate individual large files
417 if len(content) > 8000:
418 content = content[:8000] + '\n// ... [truncated]'
419
420 if used + len(content) > budget:
421 remaining = budget - used
422 if remaining > 500:
423 content = content[:remaining] + '\n// ... [truncated - budget]'
424 sources[rel_path] = content
425 used += len(content)
426 break
427 sources[rel_path] = content
428 used += len(content)
429
430 logging.info(f"Collected {len(sources)} source files ({used} chars) from Next.js app")
431 return sources
432
433
434 def hugoify_dir(theme_dir: str) -> str:
435 """
436 Validate and optionally augment an existing Hugo theme directory.
@@ -101,18 +463,24 @@
463 # CLI entry point (used by cli.py)
464 def hugoify(path: str) -> str:
465 """
466 Entry point for the CLI 'hugoify' command.
467 If path is a Hugo theme dir: validate it.
468 If path is a Next.js app: convert React components to Hugo.
469 If path is an HTML file or raw HTML dir: convert it.
470 """
471 from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
472
473 info = find_hugo_theme(path)
474 if info:
475 return hugoify_dir(info['theme_dir'])
476
477 nextjs_info = find_nextjs_app(path)
478 if nextjs_info:
479 layouts = hugoify_nextjs(nextjs_info)
480 return f"Converted Next.js app to {len(layouts)} layout files: {list(layouts.keys())}"
481
482 if os.path.isfile(path) and path.endswith('.html'):
483 layouts = hugoify_html(path)
484 return f"Converted to {len(layouts)} layout files: {list(layouts.keys())}"
485
486 html_files = find_raw_html_files(path)
@@ -130,21 +498,72 @@
498 # ---------------------------------------------------------------------------
499 # Helpers
500 # ---------------------------------------------------------------------------
501
502 def _parse_layout_json(response: str) -> dict:
503 """Extract JSON from AI response, even if surrounded by prose or markdown fences."""
504 # Strip markdown fences if present
505 stripped = re.sub(r'```(?:json)?\s*', '', response)
506 stripped = re.sub(r'```\s*$', '', stripped.strip())
507
508 # Try the full stripped response as JSON first
509 try:
510 result = json.loads(stripped)
511 if isinstance(result, dict):
512 logging.info(f"Parsed {len(result)} layout files from AI response")
513 return result
514 except json.JSONDecodeError:
515 pass
516
517 # Try to find JSON block (outermost braces)
518 match = re.search(r'\{.*\}', stripped, re.DOTALL)
519 if match:
520 try:
521 result = json.loads(match.group(0))
522 if isinstance(result, dict):
523 logging.info(f"Parsed {len(result)} layout files from AI response (extracted)")
524 return result
525 except json.JSONDecodeError:
526 pass
527
528 # AI sometimes uses backtick-delimited values instead of JSON strings.
529 # Parse with a regex-based key-value extractor.
530 backtick_result = _parse_backtick_json(match.group(0))
531 if backtick_result:
532 logging.info(f"Parsed {len(backtick_result)} layout files from backtick-delimited response")
533 return backtick_result
534
535 # Fallback: return a minimal layout
536 logging.warning("Could not parse AI response as JSON, using fallback layouts")
537 logging.debug(f"AI response was: {response[:500]!r}")
538 return {
539 "_default/baseof.html": _fallback_baseof(),
540 "partials/header.html": "<header><!-- header --></header>",
541 "partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
542 "index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
543 }
544
545
546 def _parse_backtick_json(text: str) -> dict | None:
547 """
548 Parse a JSON-like object where values are backtick-delimited template literals
549 instead of proper JSON strings. This happens when the AI uses JS template syntax.
550 e.g.: { "key": `<html>...</html>` }
551 """
552 result = {}
553 # Match "key": `value` pairs where value can span multiple lines
554 pattern = re.compile(r'"([^"]+)"\s*:\s*`(.*?)`(?:\s*[,}])', re.DOTALL)
555 for m in pattern.finditer(text):
556 key = m.group(1)
557 value = m.group(2).strip()
558 result[key] = value
559
560 return result if result else None
561
562
563 def _fallback_layouts() -> dict:
564 """Minimal fallback when source collection fails."""
565 return {
566 "_default/baseof.html": _fallback_baseof(),
567 "partials/header.html": "<header><!-- header --></header>",
568 "partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
569 "index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
570
--- hugoifier/utils/theme_finder.py
+++ hugoifier/utils/theme_finder.py
@@ -1,10 +1,12 @@
11
"""
22
Locates the actual Hugo theme and exampleSite within the messy zip-extracted structure.
3
+Also detects Next.js applications for conversion.
34
Themes in themes/ are structured as: {name}/{name}/themes/{theme-name}/
45
"""
56
7
+import json
68
import logging
79
import os
810
911
1012
def find_hugo_theme(input_path):
@@ -57,10 +59,116 @@
5759
'example_site': example_site,
5860
'theme_name': theme_name,
5961
'is_hugo_theme': True,
6062
}
6163
64
+
65
+def find_nextjs_app(input_path):
66
+ """
67
+ Detect a Next.js application in the given path.
68
+
69
+ Walks up to 2 levels deep to find package.json with "next" in dependencies,
70
+ similar to how find_hugo_theme handles zip-extracted double-folder structure.
71
+
72
+ Returns dict with:
73
+ app_dir: root of the Next.js project (where package.json lives)
74
+ app_name: name from package.json or directory name
75
+ router_type: 'app' or 'pages'
76
+ has_src_dir: whether components live under src/
77
+ layout_file: path to app/layout.tsx/jsx (App Router) or None
78
+ page_file: path to app/page.tsx/jsx or pages/index.tsx/jsx
79
+ css_files: list of global CSS files found
80
+ is_nextjs_app: True
81
+ """
82
+ input_path = os.path.abspath(input_path)
83
+
84
+ # Look for package.json at root or one level deep (zip-extracted pattern)
85
+ candidates = []
86
+ for pkg in _find_file_up_to_depth(input_path, 'package.json', max_depth=2):
87
+ try:
88
+ with open(pkg, 'r') as f:
89
+ data = json.load(f)
90
+ except (json.JSONDecodeError, OSError):
91
+ continue
92
+
93
+ deps = {**data.get('dependencies', {}), **data.get('devDependencies', {})}
94
+ if 'next' in deps:
95
+ candidates.append((os.path.dirname(pkg), data))
96
+
97
+ if not candidates:
98
+ return None
99
+
100
+ # Pick the deepest match (most specific, like find_hugo_theme)
101
+ app_dir, pkg_data = max(candidates, key=lambda x: x[0].count(os.sep))
102
+ app_name = pkg_data.get('name', os.path.basename(app_dir))
103
+
104
+ # Detect router type
105
+ app_router_dir = os.path.join(app_dir, 'app')
106
+ pages_dir = os.path.join(app_dir, 'pages')
107
+ if os.path.isdir(app_router_dir):
108
+ router_type = 'app'
109
+ elif os.path.isdir(pages_dir):
110
+ router_type = 'pages'
111
+ else:
112
+ return None # Has next dep but no recognizable router
113
+
114
+ # Detect src/ directory
115
+ src_dir = os.path.join(app_dir, 'src')
116
+ has_src_dir = os.path.isdir(src_dir)
117
+
118
+ # Find layout and page files
119
+ layout_file = _find_tsx_or_jsx(app_dir, 'app', 'layout')
120
+ if router_type == 'app':
121
+ page_file = _find_tsx_or_jsx(app_dir, 'app', 'page')
122
+ else:
123
+ page_file = _find_tsx_or_jsx(app_dir, 'pages', 'index')
124
+
125
+ # Find CSS files
126
+ css_files = []
127
+ for search_dir in [app_router_dir, os.path.join(app_dir, 'src'), app_dir]:
128
+ if not os.path.isdir(search_dir):
129
+ continue
130
+ for f in os.listdir(search_dir):
131
+ if f.endswith('.css'):
132
+ css_files.append(os.path.join(search_dir, f))
133
+
134
+ return {
135
+ 'app_dir': app_dir,
136
+ 'app_name': app_name,
137
+ 'router_type': router_type,
138
+ 'has_src_dir': has_src_dir,
139
+ 'layout_file': layout_file,
140
+ 'page_file': page_file,
141
+ 'css_files': css_files,
142
+ 'is_nextjs_app': True,
143
+ }
144
+
145
+
146
+def _find_file_up_to_depth(root, filename, max_depth=2):
147
+ """Yield paths to `filename` found up to max_depth levels under root."""
148
+ for depth_root, dirs, files in os.walk(root):
149
+ rel = os.path.relpath(depth_root, root)
150
+ depth = 0 if rel == '.' else rel.count(os.sep) + 1
151
+ if depth > max_depth:
152
+ dirs.clear()
153
+ continue
154
+ if '__MACOSX' in depth_root or 'node_modules' in depth_root:
155
+ dirs.clear()
156
+ continue
157
+ if filename in files:
158
+ yield os.path.join(depth_root, filename)
159
+
160
+
161
+def _find_tsx_or_jsx(base, subdir, name):
162
+ """Find {name}.tsx or {name}.jsx in base/subdir/."""
163
+ d = os.path.join(base, subdir)
164
+ for ext in ('.tsx', '.jsx', '.ts', '.js'):
165
+ p = os.path.join(d, name + ext)
166
+ if os.path.isfile(p):
167
+ return p
168
+ return None
169
+
62170
63171
def find_raw_html_files(input_path):
64172
"""Find HTML files in a raw HTML theme (not a Hugo theme)."""
65173
html_files = []
66174
for root, dirs, files in os.walk(input_path):
67175
--- hugoifier/utils/theme_finder.py
+++ hugoifier/utils/theme_finder.py
@@ -1,10 +1,12 @@
1 """
2 Locates the actual Hugo theme and exampleSite within the messy zip-extracted structure.
 
3 Themes in themes/ are structured as: {name}/{name}/themes/{theme-name}/
4 """
5
 
6 import logging
7 import os
8
9
10 def find_hugo_theme(input_path):
@@ -57,10 +59,116 @@
57 'example_site': example_site,
58 'theme_name': theme_name,
59 'is_hugo_theme': True,
60 }
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
63 def find_raw_html_files(input_path):
64 """Find HTML files in a raw HTML theme (not a Hugo theme)."""
65 html_files = []
66 for root, dirs, files in os.walk(input_path):
67
--- hugoifier/utils/theme_finder.py
+++ hugoifier/utils/theme_finder.py
@@ -1,10 +1,12 @@
1 """
2 Locates the actual Hugo theme and exampleSite within the messy zip-extracted structure.
3 Also detects Next.js applications for conversion.
4 Themes in themes/ are structured as: {name}/{name}/themes/{theme-name}/
5 """
6
7 import json
8 import logging
9 import os
10
11
12 def find_hugo_theme(input_path):
@@ -57,10 +59,116 @@
59 'example_site': example_site,
60 'theme_name': theme_name,
61 'is_hugo_theme': True,
62 }
63
64
65 def find_nextjs_app(input_path):
66 """
67 Detect a Next.js application in the given path.
68
69 Walks up to 2 levels deep to find package.json with "next" in dependencies,
70 similar to how find_hugo_theme handles zip-extracted double-folder structure.
71
72 Returns dict with:
73 app_dir: root of the Next.js project (where package.json lives)
74 app_name: name from package.json or directory name
75 router_type: 'app' or 'pages'
76 has_src_dir: whether components live under src/
77 layout_file: path to app/layout.tsx/jsx (App Router) or None
78 page_file: path to app/page.tsx/jsx or pages/index.tsx/jsx
79 css_files: list of global CSS files found
80 is_nextjs_app: True
81 """
82 input_path = os.path.abspath(input_path)
83
84 # Look for package.json at root or one level deep (zip-extracted pattern)
85 candidates = []
86 for pkg in _find_file_up_to_depth(input_path, 'package.json', max_depth=2):
87 try:
88 with open(pkg, 'r') as f:
89 data = json.load(f)
90 except (json.JSONDecodeError, OSError):
91 continue
92
93 deps = {**data.get('dependencies', {}), **data.get('devDependencies', {})}
94 if 'next' in deps:
95 candidates.append((os.path.dirname(pkg), data))
96
97 if not candidates:
98 return None
99
100 # Pick the deepest match (most specific, like find_hugo_theme)
101 app_dir, pkg_data = max(candidates, key=lambda x: x[0].count(os.sep))
102 app_name = pkg_data.get('name', os.path.basename(app_dir))
103
104 # Detect router type
105 app_router_dir = os.path.join(app_dir, 'app')
106 pages_dir = os.path.join(app_dir, 'pages')
107 if os.path.isdir(app_router_dir):
108 router_type = 'app'
109 elif os.path.isdir(pages_dir):
110 router_type = 'pages'
111 else:
112 return None # Has next dep but no recognizable router
113
114 # Detect src/ directory
115 src_dir = os.path.join(app_dir, 'src')
116 has_src_dir = os.path.isdir(src_dir)
117
118 # Find layout and page files
119 layout_file = _find_tsx_or_jsx(app_dir, 'app', 'layout')
120 if router_type == 'app':
121 page_file = _find_tsx_or_jsx(app_dir, 'app', 'page')
122 else:
123 page_file = _find_tsx_or_jsx(app_dir, 'pages', 'index')
124
125 # Find CSS files
126 css_files = []
127 for search_dir in [app_router_dir, os.path.join(app_dir, 'src'), app_dir]:
128 if not os.path.isdir(search_dir):
129 continue
130 for f in os.listdir(search_dir):
131 if f.endswith('.css'):
132 css_files.append(os.path.join(search_dir, f))
133
134 return {
135 'app_dir': app_dir,
136 'app_name': app_name,
137 'router_type': router_type,
138 'has_src_dir': has_src_dir,
139 'layout_file': layout_file,
140 'page_file': page_file,
141 'css_files': css_files,
142 'is_nextjs_app': True,
143 }
144
145
146 def _find_file_up_to_depth(root, filename, max_depth=2):
147 """Yield paths to `filename` found up to max_depth levels under root."""
148 for depth_root, dirs, files in os.walk(root):
149 rel = os.path.relpath(depth_root, root)
150 depth = 0 if rel == '.' else rel.count(os.sep) + 1
151 if depth > max_depth:
152 dirs.clear()
153 continue
154 if '__MACOSX' in depth_root or 'node_modules' in depth_root:
155 dirs.clear()
156 continue
157 if filename in files:
158 yield os.path.join(depth_root, filename)
159
160
161 def _find_tsx_or_jsx(base, subdir, name):
162 """Find {name}.tsx or {name}.jsx in base/subdir/."""
163 d = os.path.join(base, subdir)
164 for ext in ('.tsx', '.jsx', '.ts', '.js'):
165 p = os.path.join(d, name + ext)
166 if os.path.isfile(p):
167 return p
168 return None
169
170
171 def find_raw_html_files(input_path):
172 """Find HTML files in a raw HTML theme (not a Hugo theme)."""
173 html_files = []
174 for root, dirs, files in os.walk(input_path):
175

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button