Hugoifier

Merge pull request #12 from ConflictHQ/feat/nextjs-support feat: add Next.js app conversion support

noreply 2026-03-17 15:35 trunk merge

Commit 77c5e9a6e72fe2cb7f5966a9a06377d08527f39e8ac5b931659c7a250d588bc7

Parent 91515c0ce303054…

5 files changed +1 -1 +8 -7 +102 -15 +463 -44 +108

~ hugoifier/cli.py ~ hugoifier/config.py ~ hugoifier/utils/complete.py ~ hugoifier/utils/hugoify.py ~ hugoifier/utils/theme_finder.py

M hugoifier/cli.py

+1 -1

		--- hugoifier/cli.py
		+++ hugoifier/cli.py
		@@ -47,11 +47,11 @@
47	47	# analyze
48	48	analyze_parser = subparsers.add_parser("analyze", help="Analyze a theme and report structure")
49	49	analyze_parser.add_argument("path", help="Path to the theme")
50	50
51	51	# hugoify
52		- hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML to Hugo theme (or validate existing Hugo theme)")
	52	+ hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML or Next.js app to Hugo theme (or validate existing Hugo theme)")
53	53	hugoify_parser.add_argument("path", help="Path to HTML file or theme directory")
54	54
55	55	# decapify
56	56	decapify_parser = subparsers.add_parser("decapify", help="Add Decap CMS to an assembled Hugo site")
57	57	decapify_parser.add_argument("path", help="Path to the Hugo site directory")
58	58

	--- hugoifier/cli.py
	+++ hugoifier/cli.py
	@@ -47,11 +47,11 @@
47	# analyze
48	analyze_parser = subparsers.add_parser("analyze", help="Analyze a theme and report structure")
49	analyze_parser.add_argument("path", help="Path to the theme")
50
51	# hugoify
52	hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML to Hugo theme (or validate existing Hugo theme)")
53	hugoify_parser.add_argument("path", help="Path to HTML file or theme directory")
54
55	# decapify
56	decapify_parser = subparsers.add_parser("decapify", help="Add Decap CMS to an assembled Hugo site")
57	decapify_parser.add_argument("path", help="Path to the Hugo site directory")
58

	--- hugoifier/cli.py
	+++ hugoifier/cli.py
	@@ -47,11 +47,11 @@
47	# analyze
48	analyze_parser = subparsers.add_parser("analyze", help="Analyze a theme and report structure")
49	analyze_parser.add_argument("path", help="Path to the theme")
50
51	# hugoify
52	hugoify_parser = subparsers.add_parser("hugoify", help="Convert HTML or Next.js app to Hugo theme (or validate existing Hugo theme)")
53	hugoify_parser.add_argument("path", help="Path to HTML file or theme directory")
54
55	# decapify
56	decapify_parser = subparsers.add_parser("decapify", help="Add Decap CMS to an assembled Hugo site")
57	decapify_parser.add_argument("path", help="Path to the Hugo site directory")
58

M hugoifier/config.py

+8 -7

		--- hugoifier/config.py
		+++ hugoifier/config.py
		@@ -27,43 +27,44 @@
27	27	GOOGLE_MODEL = os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro')
28	28
29	29	MAX_TOKENS = int(os.getenv('HUGOIFIER_MAX_TOKENS', '4096'))
30	30
31	31
32		-def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.") -> str:
	32	+def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.", max_tokens: int = None) -> str:
33	33	"""
34	34	Call the configured AI backend and return the response text.
35	35	This is the single entry point for all AI calls in the codebase.
36	36	"""
	37	+ tokens = max_tokens or MAX_TOKENS
37	38	if BACKEND == 'anthropic':
38		- return _call_anthropic(prompt, system)
	39	+ return _call_anthropic(prompt, system, tokens)
39	40	elif BACKEND == 'openai':
40		- return _call_openai(prompt, system)
	41	+ return _call_openai(prompt, system, tokens)
41	42	elif BACKEND == 'google':
42	43	return _call_google(prompt, system)
43	44	else:
44	45	raise ValueError(
45	46	f"Unknown backend: {BACKEND!r}. "
46	47	"Set HUGOIFIER_BACKEND to 'anthropic', 'openai', or 'google'."
47	48	)
48	49
49	50
50		-def _call_anthropic(prompt: str, system: str) -> str:
	51	+def _call_anthropic(prompt: str, system: str, max_tokens: int = None) -> str:
51	52	if not ANTHROPIC_API_KEY:
52	53	raise EnvironmentError("ANTHROPIC_API_KEY is not set")
53	54	import anthropic
54	55	client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
55	56	message = client.messages.create(
56	57	model=ANTHROPIC_MODEL,
57		- max_tokens=MAX_TOKENS,
	58	+ max_tokens=max_tokens or MAX_TOKENS,
58	59	system=system,
59	60	messages=[{"role": "user", "content": prompt}],
60	61	)
61	62	return message.content[0].text
62	63
63	64
64		-def _call_openai(prompt: str, system: str) -> str:
	65	+def _call_openai(prompt: str, system: str, max_tokens: int = None) -> str:
65	66	if not OPENAI_API_KEY:
66	67	raise EnvironmentError("OPENAI_API_KEY is not set")
67	68	from openai import OpenAI
68	69	client = OpenAI(api_key=OPENAI_API_KEY)
69	70	response = client.chat.completions.create(
		@@ -70,11 +71,11 @@
70	71	model=OPENAI_MODEL,
71	72	messages=[
72	73	{"role": "system", "content": system},
73	74	{"role": "user", "content": prompt},
74	75	],
75		- max_tokens=MAX_TOKENS,
	76	+ max_tokens=max_tokens or MAX_TOKENS,
76	77	)
77	78	return response.choices[0].message.content.strip()
78	79
79	80
80	81	def _call_google(prompt: str, system: str) -> str:
81	82

	--- hugoifier/config.py
	+++ hugoifier/config.py
	@@ -27,43 +27,44 @@
27	GOOGLE_MODEL = os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro')
28
29	MAX_TOKENS = int(os.getenv('HUGOIFIER_MAX_TOKENS', '4096'))
30
31
32	def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.") -> str:
33	"""
34	Call the configured AI backend and return the response text.
35	This is the single entry point for all AI calls in the codebase.
36	"""

37	if BACKEND == 'anthropic':
38	return _call_anthropic(prompt, system)
39	elif BACKEND == 'openai':
40	return _call_openai(prompt, system)
41	elif BACKEND == 'google':
42	return _call_google(prompt, system)
43	else:
44	raise ValueError(
45	f"Unknown backend: {BACKEND!r}. "
46	"Set HUGOIFIER_BACKEND to 'anthropic', 'openai', or 'google'."
47	)
48
49
50	def _call_anthropic(prompt: str, system: str) -> str:
51	if not ANTHROPIC_API_KEY:
52	raise EnvironmentError("ANTHROPIC_API_KEY is not set")
53	import anthropic
54	client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
55	message = client.messages.create(
56	model=ANTHROPIC_MODEL,
57	max_tokens=MAX_TOKENS,
58	system=system,
59	messages=[{"role": "user", "content": prompt}],
60	)
61	return message.content[0].text
62
63
64	def _call_openai(prompt: str, system: str) -> str:
65	if not OPENAI_API_KEY:
66	raise EnvironmentError("OPENAI_API_KEY is not set")
67	from openai import OpenAI
68	client = OpenAI(api_key=OPENAI_API_KEY)
69	response = client.chat.completions.create(
	@@ -70,11 +71,11 @@
70	model=OPENAI_MODEL,
71	messages=[
72	{"role": "system", "content": system},
73	{"role": "user", "content": prompt},
74	],
75	max_tokens=MAX_TOKENS,
76	)
77	return response.choices[0].message.content.strip()
78
79
80	def _call_google(prompt: str, system: str) -> str:
81

	--- hugoifier/config.py
	+++ hugoifier/config.py
	@@ -27,43 +27,44 @@
27	GOOGLE_MODEL = os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro')
28
29	MAX_TOKENS = int(os.getenv('HUGOIFIER_MAX_TOKENS', '4096'))
30
31
32	def call_ai(prompt: str, system: str = "You are a helpful Hugo theme conversion assistant.", max_tokens: int = None) -> str:
33	"""
34	Call the configured AI backend and return the response text.
35	This is the single entry point for all AI calls in the codebase.
36	"""
37	tokens = max_tokens or MAX_TOKENS
38	if BACKEND == 'anthropic':
39	return _call_anthropic(prompt, system, tokens)
40	elif BACKEND == 'openai':
41	return _call_openai(prompt, system, tokens)
42	elif BACKEND == 'google':
43	return _call_google(prompt, system)
44	else:
45	raise ValueError(
46	f"Unknown backend: {BACKEND!r}. "
47	"Set HUGOIFIER_BACKEND to 'anthropic', 'openai', or 'google'."
48	)
49
50
51	def _call_anthropic(prompt: str, system: str, max_tokens: int = None) -> str:
52	if not ANTHROPIC_API_KEY:
53	raise EnvironmentError("ANTHROPIC_API_KEY is not set")
54	import anthropic
55	client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
56	message = client.messages.create(
57	model=ANTHROPIC_MODEL,
58	max_tokens=max_tokens or MAX_TOKENS,
59	system=system,
60	messages=[{"role": "user", "content": prompt}],
61	)
62	return message.content[0].text
63
64
65	def _call_openai(prompt: str, system: str, max_tokens: int = None) -> str:
66	if not OPENAI_API_KEY:
67	raise EnvironmentError("OPENAI_API_KEY is not set")
68	from openai import OpenAI
69	client = OpenAI(api_key=OPENAI_API_KEY)
70	response = client.chat.completions.create(
	@@ -70,11 +71,11 @@
71	model=OPENAI_MODEL,
72	messages=[
73	{"role": "system", "content": system},
74	{"role": "user", "content": prompt},
75	],
76	max_tokens=max_tokens or MAX_TOKENS,
77	)
78	return response.choices[0].message.content.strip()
79
80
81	def _call_google(prompt: str, system: str) -> str:
82

M hugoifier/utils/complete.py

+102 -15

		--- hugoifier/utils/complete.py
		+++ hugoifier/utils/complete.py
		@@ -9,12 +9,12 @@
9	9	import os
10	10	import shutil
11	11	from pathlib import Path
12	12
13	13	from .decapify import decapify
14		-from .hugoify import hugoify_html
15		-from .theme_finder import find_hugo_theme, find_raw_html_files
	14	+from .hugoify import hugoify_html, hugoify_nextjs
	15	+from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
16	16	from .theme_patcher import patch_config, patch_theme
17	17
18	18
19	19	def complete(
20	20	input_path: str,
		@@ -41,16 +41,21 @@
41	41	branding = {'cms_name': cms_name, 'cms_logo': cms_logo, 'cms_color': cms_color}
42	42	info = find_hugo_theme(input_path)
43	43
44	44	if info:
45	45	return _assemble_hugo_site(info, output_dir, branding)
46		- else:
47		- # Raw HTML path
48		- html_files = find_raw_html_files(input_path)
49		- if not html_files:
50		- raise ValueError(f"No Hugo theme or HTML files found in {input_path}")
51		- return _convert_raw_html(input_path, html_files, output_dir, branding)
	46	+
	47	+ # Next.js path (check before raw HTML since Next.js projects may contain .html files)
	48	+ nextjs_info = find_nextjs_app(input_path)
	49	+ if nextjs_info:
	50	+ return _convert_nextjs(input_path, nextjs_info, output_dir, branding)
	51	+
	52	+ # Raw HTML path
	53	+ html_files = find_raw_html_files(input_path)
	54	+ if not html_files:
	55	+ raise ValueError(f"No Hugo theme, Next.js app, or HTML files found in {input_path}")
	56	+ return _convert_raw_html(input_path, html_files, output_dir, branding)
52	57
53	58
54	59	# ---------------------------------------------------------------------------
55	60	# Hugo theme path
56	61	# ---------------------------------------------------------------------------
		@@ -111,10 +116,87 @@
111	116
112	117	logging.info(f"Done. Site ready at: {output_dir}")
113	118	logging.info(f"Run: cd {output_dir} && hugo serve")
114	119	return output_dir
115	120
	121	+
	122	+# ---------------------------------------------------------------------------
	123	+# Next.js path
	124	+# ---------------------------------------------------------------------------
	125	+
	126	+def _convert_nextjs(
	127	+ input_path: str, nextjs_info: dict, output_dir: str = None, branding: dict = None
	128	+) -> str:
	129	+ app_dir = nextjs_info['app_dir']
	130	+ theme_name = nextjs_info.get('app_name', os.path.basename(os.path.abspath(input_path)))
	131	+
	132	+ if output_dir is None:
	133	+ output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
	134	+
	135	+ logging.info(f"Converting Next.js app: {theme_name}")
	136	+
	137	+ # Convert: capture rendered HTML if dev server running, else AI fallback
	138	+ hugo_layouts = hugoify_nextjs(nextjs_info)
	139	+
	140	+ os.makedirs(output_dir, exist_ok=True)
	141	+
	142	+ # Extract captured CSS if present (from rendered HTML capture)
	143	+ captured_css = hugo_layouts.pop('_captured_css', {})
	144	+
	145	+ # Write converted layouts
	146	+ theme_layouts_dir = os.path.join(output_dir, 'themes', theme_name, 'layouts')
	147	+ os.makedirs(os.path.join(theme_layouts_dir, '_default'), exist_ok=True)
	148	+ os.makedirs(os.path.join(theme_layouts_dir, 'partials'), exist_ok=True)
	149	+
	150	+ for filename, content in hugo_layouts.items():
	151	+ # Fix common AI mistake: partial "partials/X.html" → partial "X.html"
	152	+ if isinstance(content, str):
	153	+ content = content.replace('partial "partials/', 'partial "')
	154	+ dest = os.path.join(theme_layouts_dir, filename)
	155	+ os.makedirs(os.path.dirname(dest), exist_ok=True)
	156	+ with open(dest, 'w') as f:
	157	+ f.write(content)
	158	+
	159	+ # Copy public/ assets to theme static/
	160	+ public_dir = os.path.join(app_dir, 'public')
	161	+ theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
	162	+ if os.path.isdir(public_dir):
	163	+ _copy_dir(public_dir, theme_static)
	164	+ logging.info("Copied public/ assets to static/")
	165	+
	166	+ # Write captured CSS (from rendered HTML capture)
	167	+ css_dest = os.path.join(theme_static, 'css')
	168	+ os.makedirs(css_dest, exist_ok=True)
	169	+ for css_name, css_content in captured_css.items():
	170	+ with open(os.path.join(css_dest, css_name), 'w') as f:
	171	+ f.write(css_content)
	172	+ logging.info(f"Wrote captured CSS: {css_name}")
	173	+
	174	+ # Also copy source CSS files (globals.css etc.)
	175	+ for css_file in nextjs_info.get('css_files', []):
	176	+ if os.path.isfile(css_file):
	177	+ shutil.copy2(css_file, os.path.join(css_dest, os.path.basename(css_file)))
	178	+ logging.info("Copied CSS files")
	179	+
	180	+ _write_minimal_hugo_toml(output_dir, theme_name)
	181	+
	182	+ # Create minimal content
	183	+ content_dir = os.path.join(output_dir, 'content')
	184	+ os.makedirs(content_dir, exist_ok=True)
	185	+ with open(os.path.join(content_dir, '_index.md'), 'w') as f:
	186	+ f.write('---\ntitle: Home\n---\n')
	187	+
	188	+ b = branding or {}
	189	+ decapify(
	190	+ output_dir,
	191	+ cms_name=b.get('cms_name'), cms_logo=b.get('cms_logo'), cms_color=b.get('cms_color'),
	192	+ )
	193	+
	194	+ logging.info(f"Done. Site ready at: {output_dir}")
	195	+ logging.info(f"Run: cd {output_dir} && hugo serve")
	196	+ return output_dir
	197	+
116	198
117	199	# ---------------------------------------------------------------------------
118	200	# Raw HTML path
119	201	# ---------------------------------------------------------------------------
120	202
		@@ -126,13 +208,13 @@
126	208	if output_dir is None:
127	209	output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
128	210
129	211	logging.info(f"Converting raw HTML theme: {theme_name}")
130	212
131		- # Use AI to convert the main HTML file to Hugo layouts
	213	+ # Direct HTML extraction — use the actual HTML as-is, no AI reinterpretation
132	214	main_html = _pick_main_html(html_files)
133		- logging.info(f"Converting {main_html} ...")
	215	+ logging.info(f"Extracting {main_html} ...")
134	216	hugo_layouts = hugoify_html(main_html)
135	217
136	218	os.makedirs(output_dir, exist_ok=True)
137	219
138	220	# Write converted layouts
		@@ -144,15 +226,20 @@
144	226	dest = os.path.join(theme_layouts_dir, filename)
145	227	os.makedirs(os.path.dirname(dest), exist_ok=True)
146	228	with open(dest, 'w') as f:
147	229	f.write(content)
148	230
149		- # Copy CSS/JS/images
150		- for ext_dir in ('css', 'js', 'images', 'img', 'assets', 'fonts'):
151		- src = os.path.join(input_path, ext_dir)
152		- if os.path.isdir(src):
153		- _copy_dir(src, os.path.join(output_dir, 'themes', theme_name, 'static', ext_dir))
	231	+ # Copy ALL static assets from the HTML theme directory
	232	+ theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
	233	+ for item in os.listdir(input_path):
	234	+ src = os.path.join(input_path, item)
	235	+ if os.path.isdir(src) and item not in ('__MACOSX', '.git', 'node_modules'):
	236	+ _copy_dir(src, os.path.join(theme_static, item))
	237	+ elif os.path.isfile(src) and not item.endswith('.html'):
	238	+ # Copy non-HTML files (images, fonts, etc.) to static root
	239	+ os.makedirs(theme_static, exist_ok=True)
	240	+ shutil.copy2(src, os.path.join(theme_static, item))
154	241
155	242	_write_minimal_hugo_toml(output_dir, theme_name)
156	243
157	244	# Create minimal content
158	245	content_dir = os.path.join(output_dir, 'content')
159	246

	--- hugoifier/utils/complete.py
	+++ hugoifier/utils/complete.py
	@@ -9,12 +9,12 @@
9	import os
10	import shutil
11	from pathlib import Path
12
13	from .decapify import decapify
14	from .hugoify import hugoify_html
15	from .theme_finder import find_hugo_theme, find_raw_html_files
16	from .theme_patcher import patch_config, patch_theme
17
18
19	def complete(
20	input_path: str,
	@@ -41,16 +41,21 @@
41	branding = {'cms_name': cms_name, 'cms_logo': cms_logo, 'cms_color': cms_color}
42	info = find_hugo_theme(input_path)
43
44	if info:
45	return _assemble_hugo_site(info, output_dir, branding)
46	else:
47	# Raw HTML path
48	html_files = find_raw_html_files(input_path)
49	if not html_files:
50	raise ValueError(f"No Hugo theme or HTML files found in {input_path}")
51	return _convert_raw_html(input_path, html_files, output_dir, branding)





52
53
54	# ---------------------------------------------------------------------------
55	# Hugo theme path
56	# ---------------------------------------------------------------------------
	@@ -111,10 +116,87 @@
111
112	logging.info(f"Done. Site ready at: {output_dir}")
113	logging.info(f"Run: cd {output_dir} && hugo serve")
114	return output_dir
115













































































116
117	# ---------------------------------------------------------------------------
118	# Raw HTML path
119	# ---------------------------------------------------------------------------
120
	@@ -126,13 +208,13 @@
126	if output_dir is None:
127	output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
128
129	logging.info(f"Converting raw HTML theme: {theme_name}")
130
131	# Use AI to convert the main HTML file to Hugo layouts
132	main_html = _pick_main_html(html_files)
133	logging.info(f"Converting {main_html} ...")
134	hugo_layouts = hugoify_html(main_html)
135
136	os.makedirs(output_dir, exist_ok=True)
137
138	# Write converted layouts
	@@ -144,15 +226,20 @@
144	dest = os.path.join(theme_layouts_dir, filename)
145	os.makedirs(os.path.dirname(dest), exist_ok=True)
146	with open(dest, 'w') as f:
147	f.write(content)
148
149	# Copy CSS/JS/images
150	for ext_dir in ('css', 'js', 'images', 'img', 'assets', 'fonts'):
151	src = os.path.join(input_path, ext_dir)
152	if os.path.isdir(src):
153	_copy_dir(src, os.path.join(output_dir, 'themes', theme_name, 'static', ext_dir))





154
155	_write_minimal_hugo_toml(output_dir, theme_name)
156
157	# Create minimal content
158	content_dir = os.path.join(output_dir, 'content')
159

	--- hugoifier/utils/complete.py
	+++ hugoifier/utils/complete.py
	@@ -9,12 +9,12 @@
9	import os
10	import shutil
11	from pathlib import Path
12
13	from .decapify import decapify
14	from .hugoify import hugoify_html, hugoify_nextjs
15	from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
16	from .theme_patcher import patch_config, patch_theme
17
18
19	def complete(
20	input_path: str,
	@@ -41,16 +41,21 @@
41	branding = {'cms_name': cms_name, 'cms_logo': cms_logo, 'cms_color': cms_color}
42	info = find_hugo_theme(input_path)
43
44	if info:
45	return _assemble_hugo_site(info, output_dir, branding)
46
47	# Next.js path (check before raw HTML since Next.js projects may contain .html files)
48	nextjs_info = find_nextjs_app(input_path)
49	if nextjs_info:
50	return _convert_nextjs(input_path, nextjs_info, output_dir, branding)
51
52	# Raw HTML path
53	html_files = find_raw_html_files(input_path)
54	if not html_files:
55	raise ValueError(f"No Hugo theme, Next.js app, or HTML files found in {input_path}")
56	return _convert_raw_html(input_path, html_files, output_dir, branding)
57
58
59	# ---------------------------------------------------------------------------
60	# Hugo theme path
61	# ---------------------------------------------------------------------------
	@@ -111,10 +116,87 @@
116
117	logging.info(f"Done. Site ready at: {output_dir}")
118	logging.info(f"Run: cd {output_dir} && hugo serve")
119	return output_dir
120
121
122	# ---------------------------------------------------------------------------
123	# Next.js path
124	# ---------------------------------------------------------------------------
125
126	def _convert_nextjs(
127	input_path: str, nextjs_info: dict, output_dir: str = None, branding: dict = None
128	) -> str:
129	app_dir = nextjs_info['app_dir']
130	theme_name = nextjs_info.get('app_name', os.path.basename(os.path.abspath(input_path)))
131
132	if output_dir is None:
133	output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
134
135	logging.info(f"Converting Next.js app: {theme_name}")
136
137	# Convert: capture rendered HTML if dev server running, else AI fallback
138	hugo_layouts = hugoify_nextjs(nextjs_info)
139
140	os.makedirs(output_dir, exist_ok=True)
141
142	# Extract captured CSS if present (from rendered HTML capture)
143	captured_css = hugo_layouts.pop('_captured_css', {})
144
145	# Write converted layouts
146	theme_layouts_dir = os.path.join(output_dir, 'themes', theme_name, 'layouts')
147	os.makedirs(os.path.join(theme_layouts_dir, '_default'), exist_ok=True)
148	os.makedirs(os.path.join(theme_layouts_dir, 'partials'), exist_ok=True)
149
150	for filename, content in hugo_layouts.items():
151	# Fix common AI mistake: partial "partials/X.html" → partial "X.html"
152	if isinstance(content, str):
153	content = content.replace('partial "partials/', 'partial "')
154	dest = os.path.join(theme_layouts_dir, filename)
155	os.makedirs(os.path.dirname(dest), exist_ok=True)
156	with open(dest, 'w') as f:
157	f.write(content)
158
159	# Copy public/ assets to theme static/
160	public_dir = os.path.join(app_dir, 'public')
161	theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
162	if os.path.isdir(public_dir):
163	_copy_dir(public_dir, theme_static)
164	logging.info("Copied public/ assets to static/")
165
166	# Write captured CSS (from rendered HTML capture)
167	css_dest = os.path.join(theme_static, 'css')
168	os.makedirs(css_dest, exist_ok=True)
169	for css_name, css_content in captured_css.items():
170	with open(os.path.join(css_dest, css_name), 'w') as f:
171	f.write(css_content)
172	logging.info(f"Wrote captured CSS: {css_name}")
173
174	# Also copy source CSS files (globals.css etc.)
175	for css_file in nextjs_info.get('css_files', []):
176	if os.path.isfile(css_file):
177	shutil.copy2(css_file, os.path.join(css_dest, os.path.basename(css_file)))
178	logging.info("Copied CSS files")
179
180	_write_minimal_hugo_toml(output_dir, theme_name)
181
182	# Create minimal content
183	content_dir = os.path.join(output_dir, 'content')
184	os.makedirs(content_dir, exist_ok=True)
185	with open(os.path.join(content_dir, '_index.md'), 'w') as f:
186	f.write('---\ntitle: Home\n---\n')
187
188	b = branding or {}
189	decapify(
190	output_dir,
191	cms_name=b.get('cms_name'), cms_logo=b.get('cms_logo'), cms_color=b.get('cms_color'),
192	)
193
194	logging.info(f"Done. Site ready at: {output_dir}")
195	logging.info(f"Run: cd {output_dir} && hugo serve")
196	return output_dir
197
198
199	# ---------------------------------------------------------------------------
200	# Raw HTML path
201	# ---------------------------------------------------------------------------
202
	@@ -126,13 +208,13 @@
208	if output_dir is None:
209	output_dir = str(Path(__file__).parents[2] / 'output' / theme_name)
210
211	logging.info(f"Converting raw HTML theme: {theme_name}")
212
213	# Direct HTML extraction — use the actual HTML as-is, no AI reinterpretation
214	main_html = _pick_main_html(html_files)
215	logging.info(f"Extracting {main_html} ...")
216	hugo_layouts = hugoify_html(main_html)
217
218	os.makedirs(output_dir, exist_ok=True)
219
220	# Write converted layouts
	@@ -144,15 +226,20 @@
226	dest = os.path.join(theme_layouts_dir, filename)
227	os.makedirs(os.path.dirname(dest), exist_ok=True)
228	with open(dest, 'w') as f:
229	f.write(content)
230
231	# Copy ALL static assets from the HTML theme directory
232	theme_static = os.path.join(output_dir, 'themes', theme_name, 'static')
233	for item in os.listdir(input_path):
234	src = os.path.join(input_path, item)
235	if os.path.isdir(src) and item not in ('__MACOSX', '.git', 'node_modules'):
236	_copy_dir(src, os.path.join(theme_static, item))
237	elif os.path.isfile(src) and not item.endswith('.html'):
238	# Copy non-HTML files (images, fonts, etc.) to static root
239	os.makedirs(theme_static, exist_ok=True)
240	shutil.copy2(src, os.path.join(theme_static, item))
241
242	_write_minimal_hugo_toml(output_dir, theme_name)
243
244	# Create minimal content
245	content_dir = os.path.join(output_dir, 'content')
246

M hugoifier/utils/hugoify.py

+463 -44

		--- hugoifier/utils/hugoify.py
		+++ hugoifier/utils/hugoify.py
		@@ -1,10 +1,11 @@
1	1	"""
2	2	AI-powered HTML → Hugo template conversion.
3	3
4	4	For already-Hugo themes, use hugoify_dir() to validate/augment.
5	5	For raw HTML, use hugoify_html() to produce Hugo layout files.
	6	+For Next.js apps, use hugoify_nextjs() to convert React components to Hugo layouts.
6	7	"""
7	8
8	9	import json
9	10	import logging
10	11	import os
		@@ -15,60 +16,421 @@
15	16	SYSTEM = (
16	17	"You are an expert Hugo theme developer. Convert HTML templates to valid Hugo Go template files. "
17	18	"Output only valid Hugo template syntax — no explanations, no markdown fences."
18	19	)
19	20
	21	+NEXTJS_SYSTEM = (
	22	+ "You are an expert at converting React/Next.js components to Hugo Go template files. "
	23	+ "You understand JSX, TSX, React component composition, and Hugo template syntax. "
	24	+ "Convert React components to static Hugo HTML templates, preserving all CSS classes and visual structure. "
	25	+ "Output only valid Hugo template syntax — no explanations, no markdown fences."
	26	+)
	27	+
20	28
21	29	def hugoify_html(html_path: str) -> dict:
22	30	"""
23	31	Convert a raw HTML file to a set of Hugo layout files.
24	32
25		- Returns dict mapping relative layout paths to their content, e.g.:
26		- {
27		- "_default/baseof.html": "<!DOCTYPE html>...",
28		- "partials/header.html": "<header>...",
29		- "partials/footer.html": "<footer>...",
30		- "index.html": "{{ define \"main\" }}...",
31		- }
	33	+ Uses direct HTML extraction (no AI) to preserve content exactly as-is.
	34	+ Splits the HTML into Hugo's baseof.html (head/shell) and index.html (body content).
	35	+
	36	+ Returns dict mapping relative layout paths to their content.
32	37	"""
33	38	logging.info(f"Hugoifying {html_path} ...")
34	39
35	40	with open(html_path, 'r', errors='replace') as f:
36	41	html = f.read()
37	42
38		- # Truncate very large files to avoid token limits
39		- if len(html) > 30000:
40		- logging.warning(f"HTML is large ({len(html)} chars), truncating to 30000 for AI analysis")
41		- html = html[:30000]
42		-
43		- prompt = f"""Convert the following HTML file into Hugo layout files.
44		-
45		-Return a JSON object where keys are relative file paths under layouts/ and values are the Hugo template content.
46		-
47		-Required keys to produce:
48		-- "_default/baseof.html" — base template with blocks for head, header, main, footer
49		-- "partials/header.html" — site header/nav extracted as partial
50		-- "partials/footer.html" — footer extracted as partial
51		-- "index.html" — homepage using {{ define "main" }} ... {{ end }}
52		-
53		-Rules:
54		-- Replace hardcoded page titles with {{ .Title }}
55		-- Replace hardcoded site name with {{ .Site.Title }}
56		-- Replace hardcoded URLs with {{ .Site.BaseURL }} or {{ .Permalink }}
57		-- Replace nav links with {{ range .Site.Menus.main }}<a href="{{ .URL }}">{{ .Name }}</a>{{ end }}
58		-- Replace blog post lists with {{ range .Pages }} ... {{ end }}
59		-- Replace copyright year with {{ now.Year }}
60		-- Keep all CSS classes and HTML structure intact
61		-- Use {{ partial "header.html" . }} and {{ partial "footer.html" . }} in baseof.html
62		-
63		-HTML to convert:
64		-{html}
65		-
66		-Return ONLY a valid JSON object, no explanation."""
67		-
68		- response = call_ai(prompt, SYSTEM)
69		- return _parse_layout_json(response)
	43	+ logging.info(f"Read {len(html)} chars from {html_path}")
	44	+
	45	+ # Extract <head> content (CSS links, meta, fonts, etc.)
	46	+ head_extras = _extract_head_content(html)
	47	+
	48	+ # Extract and rewrite CSS/JS paths to be relative to Hugo static/
	49	+ css_links = re.findall(r'<link[^>]+rel=["\']stylesheet["\'][^>]*/?>',
	50	+ html, re.DOTALL \| re.IGNORECASE)
	51	+ js_links = re.findall(r'<script[^>]+src=["\'][^"\']+["\'][^>]>.?</script>',
	52	+ html, re.DOTALL)
	53	+
	54	+ # Extract <body> content
	55	+ body_match = re.search(r'<body[^>]>(.?)</body>', html, re.DOTALL)
	56	+ body_content = body_match.group(1).strip() if body_match else html
	57	+
	58	+ # Extract body attributes (class, style, etc.)
	59	+ body_attrs_match = re.search(r'<body([^>]*)>', html)
	60	+ body_attrs = body_attrs_match.group(1).strip() if body_attrs_match else ''
	61	+
	62	+ # Build baseof.html preserving the original <head> structure
	63	+ head_match = re.search(r'<head[^>]>(.?)</head>', html, re.DOTALL)
	64	+ if head_match:
	65	+ head_content = head_match.group(1).strip()
	66	+ # Replace hardcoded <title> with Hugo template
	67	+ head_content = re.sub(
	68	+ r'<title>[^<]*</title>',
	69	+ '<title>{{ if .IsHome }}{{ .Site.Title }}{{ else }}{{ .Title }} \| {{ .Site.Title }}{{ end }}</title>',
	70	+ head_content
	71	+ )
	72	+ baseof = f'''<!DOCTYPE html>
	73	+<html lang="{{{{ with .Site.LanguageCode }}}}{{{{ . }}}}{{{{ else }}}}en{{{{ end }}}}">
	74	+<head>
	75	+{head_content}
	76	+</head>
	77	+<body{" " + body_attrs if body_attrs else ""}>
	78	+ {{{{- block "main" . }}}}{{{{- end }}}}
	79	+</body>
	80	+</html>'''
	81	+ else:
	82	+ baseof = _fallback_baseof()
	83	+
	84	+ index_html = f'{{{{ define "main" }}}}\n{body_content}\n{{{{ end }}}}'
	85	+
	86	+ layouts = {
	87	+ "_default/baseof.html": baseof,
	88	+ "index.html": index_html,
	89	+ }
	90	+
	91	+ logging.info(f"Extracted {len(layouts)} layout files directly from HTML (no AI)")
	92	+ return layouts
	93	+
	94	+
	95	+def hugoify_nextjs(info: dict, dev_url: str = None) -> dict:
	96	+ """
	97	+ Convert a Next.js app to a set of Hugo layout files.
	98	+
	99	+ If dev_url is provided (or auto-detected), captures the actual rendered HTML
	100	+ from the running Next.js dev server for pixel-perfect conversion.
	101	+ Otherwise falls back to AI-powered TSX source conversion.
	102	+
	103	+ Args:
	104	+ info: dict from find_nextjs_app() with app_dir, router_type, etc.
	105	+ dev_url: URL of a running Next.js dev server (e.g. http://localhost:3000)
	106	+
	107	+ Returns:
	108	+ dict mapping relative layout paths to their content, plus
	109	+ a '_captured_assets' key with any downloaded CSS/JS files.
	110	+ """
	111	+ app_dir = info['app_dir']
	112	+ logging.info(f"Hugoifying Next.js app at {app_dir} ...")
	113	+
	114	+ # Try to auto-detect a running dev server
	115	+ if not dev_url:
	116	+ dev_url = _detect_nextjs_server(info)
	117	+
	118	+ if dev_url:
	119	+ return _capture_rendered_html(dev_url, info)
	120	+
	121	+ # Fallback: AI-powered source conversion (less faithful)
	122	+ return _ai_convert_nextjs_sources(info)
	123	+
	124	+
	125	+def _detect_nextjs_server(info: dict) -> str \| None:
	126	+ """Check if a Next.js dev server is running on common ports."""
	127	+ import urllib.request
	128	+ for port in [3000, 3001, 3002]:
	129	+ url = f"http://localhost:{port}"
	130	+ try:
	131	+ req = urllib.request.Request(url, method='HEAD')
	132	+ resp = urllib.request.urlopen(req, timeout=2)
	133	+ if resp.status == 200:
	134	+ logging.info(f"Detected running Next.js server at {url}")
	135	+ return url
	136	+ except Exception:
	137	+ continue
	138	+ return None
	139	+
	140	+
	141	+def _capture_rendered_html(dev_url: str, info: dict) -> dict:
	142	+ """
	143	+ Capture the actual server-rendered HTML from a running Next.js app
	144	+ and convert it into Hugo layout files. This gives pixel-perfect results.
	145	+ """
	146	+ import urllib.request
	147	+ import urllib.parse
	148	+
	149	+ logging.info(f"Capturing rendered HTML from {dev_url} ...")
	150	+
	151	+ # Fetch the full rendered page
	152	+ resp = urllib.request.urlopen(dev_url)
	153	+ html = resp.read().decode('utf-8')
	154	+ logging.info(f"Captured {len(html)} chars of rendered HTML")
	155	+
	156	+ # Download compiled CSS
	157	+ css_urls = re.findall(r'href="(/_next/static/[^"]+\.css)"', html)
	158	+ captured_css = {}
	159	+ for css_path in css_urls:
	160	+ css_url = f"{dev_url}{css_path}"
	161	+ try:
	162	+ css_resp = urllib.request.urlopen(css_url)
	163	+ css_content = css_resp.read().decode('utf-8')
	164	+ captured_css['compiled.css'] = css_content
	165	+ logging.info(f"Captured CSS: {len(css_content)} chars")
	166	+ break # Usually just one CSS file
	167	+ except Exception as e:
	168	+ logging.warning(f"Failed to fetch CSS {css_url}: {e}")
	169	+
	170	+ # Strip Next.js scripts, dev tooling, and React hydration markers
	171	+ body_html = _extract_and_clean_body(html)
	172	+
	173	+ # Extract <head> content we want to keep (fonts, meta, etc.)
	174	+ head_extras = _extract_head_content(html)
	175	+
	176	+ # Build Hugo layouts
	177	+ baseof = f'''<!DOCTYPE html>
	178	+<html lang="en">
	179	+<head>
	180	+ <meta charset="utf-8">
	181	+ <meta name="viewport" content="width=device-width, initial-scale=1">
	182	+ <title>{{{{ if .IsHome }}}}{{{{ .Site.Title }}}}{{{{ else }}}}{{{{ .Title }}}} \| {{{{ .Site.Title }}}}{{{{ end }}}}</title>
	183	+{head_extras}
	184	+ <link rel="stylesheet" href="/css/compiled.css">
	185	+ <link rel="stylesheet" href="/css/globals.css">
	186	+</head>
	187	+<body class="antialiased">
	188	+ {{{{- block "main" . }}}}{{{{- end }}}}
	189	+</body>
	190	+</html>'''
	191	+
	192	+ index_html = f'{{{{ define "main" }}}}\n{body_html}\n{{{{ end }}}}'
	193	+
	194	+ layouts = {
	195	+ "_default/baseof.html": baseof,
	196	+ "index.html": index_html,
	197	+ }
	198	+
	199	+ # Attach captured CSS as metadata for the pipeline to handle
	200	+ if captured_css:
	201	+ layouts['_captured_css'] = captured_css
	202	+
	203	+ return layouts
	204	+
	205	+
	206	+def _extract_and_clean_body(html: str) -> str:
	207	+ """Extract <body> content and strip Next.js scripts/dev tooling."""
	208	+ # Extract body content
	209	+ body_match = re.search(r'<body[^>]>(.?)</body>', html, re.DOTALL)
	210	+ if not body_match:
	211	+ return html
	212	+
	213	+ body = body_match.group(1)
	214	+
	215	+ # Strip all <script> tags (Next.js runtime, React hydration, HMR, etc.)
	216	+ body = re.sub(r'<script\b[^>]>.?</script>', '', body, flags=re.DOTALL)
	217	+ body = re.sub(r'<script\b[^>]*/?>', '', body)
	218	+
	219	+ # Strip Next.js dev overlay and error boundary elements
	220	+ body = re.sub(r'<next-route-announcer[^>]>.?</next-route-announcer>', '', body, flags=re.DOTALL)
	221	+ body = re.sub(r'<nextjs-portal[^>]>.?</nextjs-portal>', '', body, flags=re.DOTALL)
	222	+
	223	+ # Strip data-reactroot, data-nextjs, and other React/Next.js attributes
	224	+ body = re.sub(r'\sdata-(?:reactroot\|nextjs[^=]\|rsc[^=])(?:="[^"]")?', '', body)
	225	+
	226	+ # Fix FadeIn components: they render with opacity:0 and translateY(32px)
	227	+ # because the IntersectionObserver JS isn't running. Force them visible.
	228	+ body = re.sub(r'opacity:\s*0', 'opacity:1', body)
	229	+ body = re.sub(r'translateY$32px$', 'translateY(0px)', body)
	230	+
	231	+ # Replace /_next/static/ asset references with /static/ for Hugo
	232	+ body = re.sub(r'/_next/static/media/([^"]+)', r'/\1', body)
	233	+
	234	+ return body.strip()
	235	+
	236	+
	237	+def _extract_head_content(html: str) -> str:
	238	+ """Extract useful <head> elements (fonts, preloads) from rendered HTML."""
	239	+ head_match = re.search(r'<head[^>]>(.?)</head>', html, re.DOTALL)
	240	+ if not head_match:
	241	+ return ""
	242	+
	243	+ head = head_match.group(1)
	244	+ lines = []
	245	+
	246	+ # Keep font preload/stylesheet links
	247	+ for match in re.finditer(r'<link[^>]+(?:fonts\.googleapis\|fonts\.gstatic\|preload[^>]+font)[^>]*/?>',
	248	+ head, re.DOTALL):
	249	+ lines.append(f" {match.group(0)}")
	250	+
	251	+ # Keep image preloads
	252	+ for match in re.finditer(r'<link[^>]+rel="preload"[^>]+as="image"[^>]*/?>',
	253	+ head, re.DOTALL):
	254	+ tag = match.group(0)
	255	+ # Fix /_next paths to local paths
	256	+ tag = re.sub(r'/_next/static/media/', '/', tag)
	257	+ lines.append(f" {tag}")
	258	+
	259	+ return "\n".join(lines)
	260	+
	261	+
	262	+def _ai_convert_nextjs_sources(info: dict) -> dict:
	263	+ """
	264	+ Fallback: AI-powered conversion from TSX source files.
	265	+ Used when no running dev server is available.
	266	+ """
	267	+ sources = _collect_nextjs_sources(info)
	268	+ if not sources:
	269	+ logging.warning("No source files collected from Next.js app")
	270	+ return _fallback_layouts()
	271	+
	272	+ layouts = {}
	273	+
	274	+ # Identify component vs structural files
	275	+ component_sources = {}
	276	+ layout_sources = {}
	277	+ for rel_path, content in sources.items():
	278	+ if rel_path.endswith('.css'):
	279	+ continue
	280	+ elif 'layout.' in rel_path or 'page.' in rel_path:
	281	+ layout_sources[rel_path] = content
	282	+ else:
	283	+ component_sources[rel_path] = content
	284	+
	285	+ # Convert each component individually
	286	+ for rel_path, content in component_sources.items():
	287	+ basename = os.path.splitext(os.path.basename(rel_path))[0]
	288	+ partial_name = f"partials/{basename}.html"
	289	+ logging.info(f" Converting {rel_path} → {partial_name}")
	290	+ html = _convert_single_component(basename, content)
	291	+ if html:
	292	+ layouts[partial_name] = html
	293	+
	294	+ # Build baseof and index
	295	+ partial_names = [os.path.splitext(os.path.basename(k))[0] for k in layouts.keys()]
	296	+ baseof, index_html = _convert_layout_and_page(layout_sources, component_sources, partial_names)
	297	+ layouts["_default/baseof.html"] = baseof
	298	+ layouts["index.html"] = index_html
	299	+
	300	+ logging.info(f"Generated {len(layouts)} layout files via AI conversion")
	301	+ return layouts
	302	+
	303	+
	304	+_COMPONENT_PROMPT = """Convert this React/Next.js component to static Hugo-compatible HTML.
	305	+
	306	+CRITICAL RULES:
	307	+- Output ONLY the raw HTML. No markdown fences, no explanation, no JSON wrapping.
	308	+- Convert ALL JSX `className` to HTML `class`
	309	+- Unroll ALL `.map()` calls into full static HTML — every single item
	310	+- Preserve EVERY Tailwind CSS class and inline style EXACTLY
	311	+- Preserve ALL text content — do NOT summarize or shorten
	312	+- Preserve ALL SVG content inline
	313	+- Strip React hooks and event handlers, keep static HTML structure
	314	+
	315	+Component name: {name}
	316	+
	317	+Source code:
	318	+{source}"""
	319	+
	320	+
	321	+def _convert_single_component(name: str, source: str) -> str \| None:
	322	+ """Convert a single React component to Hugo-compatible HTML via AI."""
	323	+ prompt = _COMPONENT_PROMPT.format(name=name, source=source)
	324	+ try:
	325	+ response = call_ai(prompt, NEXTJS_SYSTEM, max_tokens=16384)
	326	+ html = re.sub(r'^```(?:html)?\s*', '', response.strip())
	327	+ html = re.sub(r'```\s*$', '', html.strip())
	328	+ return html
	329	+ except Exception as e:
	330	+ logging.warning(f"Failed to convert component {name}: {e}")
	331	+ return None
	332	+
	333	+
	334	+def _convert_layout_and_page(layout_sources, component_sources, partial_names):
	335	+ """Build baseof.html and index.html from layout files and partial list."""
	336	+ partial_includes = "\n".join(
	337	+ f' {{{{ partial "{name}.html" . }}}}' for name in partial_names
	338	+ )
	339	+ baseof = _fallback_baseof()
	340	+ index_html = f'{{% define "main" %}}\n<div class="bg-[#121517] flex flex-col w-full">\n{partial_includes}\n</div>\n{{% end %}}'
	341	+ return baseof, index_html
	342	+
	343	+
	344	+def _collect_nextjs_sources(info: dict) -> dict:
	345	+ """
	346	+ Collect relevant source files from a Next.js app into a dict
	347	+ keyed by relative path. Applies priority-based context budgeting.
	348	+ """
	349	+ app_dir = info['app_dir']
	350	+ sources = {}
	351	+ budget = 80000
	352	+
	353	+ # Tier 1: Layout and page entry points (always include)
	354	+ tier1 = []
	355	+ if info.get('layout_file'):
	356	+ tier1.append(info['layout_file'])
	357	+ if info.get('page_file'):
	358	+ tier1.append(info['page_file'])
	359	+
	360	+ # Tier 2: Section-level components (most important for structure)
	361	+ tier2 = []
	362	+ # Tier 3: Page components
	363	+ tier3 = []
	364	+ # Tier 4: UI/marketing components
	365	+ tier4 = []
	366	+ # Tier 5: CSS and config
	367	+ tier5 = list(info.get('css_files', []))
	368	+
	369	+ # Walk source directories looking for components
	370	+ for search_root in [os.path.join(app_dir, 'src'), os.path.join(app_dir, 'app'), app_dir]:
	371	+ if not os.path.isdir(search_root):
	372	+ continue
	373	+ for root, dirs, files in os.walk(search_root):
	374	+ # Skip junk
	375	+ dirs[:] = [d for d in dirs if d not in ('node_modules', '.next', '__MACOSX', '.git', '__tests__')]
	376	+ for f in files:
	377	+ if not f.endswith(('.tsx', '.jsx', '.ts', '.js')):
	378	+ continue
	379	+ full = os.path.join(root, f)
	380	+ # Skip test files, config files, API routes
	381	+ if '.test.' in f or '.spec.' in f:
	382	+ continue
	383	+ if '/api/' in full:
	384	+ continue
	385	+ # Skip files already in tier 1
	386	+ if full in tier1:
	387	+ continue
	388	+
	389	+ rel = os.path.relpath(root, app_dir)
	390	+ basename = f.lower()
	391	+
	392	+ if 'section' in basename or 'section' in rel.lower():
	393	+ tier2.append(full)
	394	+ elif 'page' in basename and 'page' not in rel.lower().split('app')[-1:]:
	395	+ tier3.append(full)
	396	+ elif any(k in rel.lower() for k in ('components', 'marketing')):
	397	+ tier4.append(full)
	398	+
	399	+ # Assemble by priority, tracking budget
	400	+ used = 0
	401	+ for tier_files in [tier1, tier2, tier3, tier4, tier5]:
	402	+ for fpath in tier_files:
	403	+ if not os.path.isfile(fpath):
	404	+ continue
	405	+ try:
	406	+ with open(fpath, 'r', errors='replace') as fh:
	407	+ content = fh.read()
	408	+ except OSError:
	409	+ continue
	410	+
	411	+ rel_path = os.path.relpath(fpath, app_dir)
	412	+ # Skip if already collected (dedup across tiers)
	413	+ if rel_path in sources:
	414	+ continue
	415	+
	416	+ # Truncate individual large files
	417	+ if len(content) > 8000:
	418	+ content = content[:8000] + '\n// ... [truncated]'
	419	+
	420	+ if used + len(content) > budget:
	421	+ remaining = budget - used
	422	+ if remaining > 500:
	423	+ content = content[:remaining] + '\n// ... [truncated - budget]'
	424	+ sources[rel_path] = content
	425	+ used += len(content)
	426	+ break
	427	+ sources[rel_path] = content
	428	+ used += len(content)
	429	+
	430	+ logging.info(f"Collected {len(sources)} source files ({used} chars) from Next.js app")
	431	+ return sources
70	432
71	433
72	434	def hugoify_dir(theme_dir: str) -> str:
73	435	"""
74	436	Validate and optionally augment an existing Hugo theme directory.
		@@ -101,18 +463,24 @@
101	463	# CLI entry point (used by cli.py)
102	464	def hugoify(path: str) -> str:
103	465	"""
104	466	Entry point for the CLI 'hugoify' command.
105	467	If path is a Hugo theme dir: validate it.
	468	+ If path is a Next.js app: convert React components to Hugo.
106	469	If path is an HTML file or raw HTML dir: convert it.
107	470	"""
108		- from .theme_finder import find_hugo_theme, find_raw_html_files
	471	+ from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
109	472
110	473	info = find_hugo_theme(path)
111	474	if info:
112	475	return hugoify_dir(info['theme_dir'])
113	476
	477	+ nextjs_info = find_nextjs_app(path)
	478	+ if nextjs_info:
	479	+ layouts = hugoify_nextjs(nextjs_info)
	480	+ return f"Converted Next.js app to {len(layouts)} layout files: {list(layouts.keys())}"
	481	+
114	482	if os.path.isfile(path) and path.endswith('.html'):
115	483	layouts = hugoify_html(path)
116	484	return f"Converted to {len(layouts)} layout files: {list(layouts.keys())}"
117	485
118	486	html_files = find_raw_html_files(path)
		@@ -130,21 +498,72 @@
130	498	# ---------------------------------------------------------------------------
131	499	# Helpers
132	500	# ---------------------------------------------------------------------------
133	501
134	502	def _parse_layout_json(response: str) -> dict:
135		- """Extract JSON from AI response, even if surrounded by prose."""
136		- # Try to find JSON block
137		- match = re.search(r'\{.*\}', response, re.DOTALL)
	503	+ """Extract JSON from AI response, even if surrounded by prose or markdown fences."""
	504	+ # Strip markdown fences if present
	505	+ stripped = re.sub(r'```(?:json)?\s*', '', response)
	506	+ stripped = re.sub(r'```\s*$', '', stripped.strip())
	507	+
	508	+ # Try the full stripped response as JSON first
	509	+ try:
	510	+ result = json.loads(stripped)
	511	+ if isinstance(result, dict):
	512	+ logging.info(f"Parsed {len(result)} layout files from AI response")
	513	+ return result
	514	+ except json.JSONDecodeError:
	515	+ pass
	516	+
	517	+ # Try to find JSON block (outermost braces)
	518	+ match = re.search(r'\{.*\}', stripped, re.DOTALL)
138	519	if match:
139	520	try:
140		- return json.loads(match.group(0))
	521	+ result = json.loads(match.group(0))
	522	+ if isinstance(result, dict):
	523	+ logging.info(f"Parsed {len(result)} layout files from AI response (extracted)")
	524	+ return result
141	525	except json.JSONDecodeError:
142	526	pass
	527	+
	528	+ # AI sometimes uses backtick-delimited values instead of JSON strings.
	529	+ # Parse with a regex-based key-value extractor.
	530	+ backtick_result = _parse_backtick_json(match.group(0))
	531	+ if backtick_result:
	532	+ logging.info(f"Parsed {len(backtick_result)} layout files from backtick-delimited response")
	533	+ return backtick_result
143	534
144	535	# Fallback: return a minimal layout
145	536	logging.warning("Could not parse AI response as JSON, using fallback layouts")
	537	+ logging.debug(f"AI response was: {response[:500]!r}")
	538	+ return {
	539	+ "_default/baseof.html": _fallback_baseof(),
	540	+ "partials/header.html": "<header><!-- header --></header>",
	541	+ "partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
	542	+ "index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
	543	+ }
	544	+
	545	+
	546	+def _parse_backtick_json(text: str) -> dict \| None:
	547	+ """
	548	+ Parse a JSON-like object where values are backtick-delimited template literals
	549	+ instead of proper JSON strings. This happens when the AI uses JS template syntax.
	550	+ e.g.: { "key": `<html>...</html>` }
	551	+ """
	552	+ result = {}
	553	+ # Match "key": `value` pairs where value can span multiple lines
	554	+ pattern = re.compile(r'"([^"]+)"\s:\s`(.?)`(?:\s[,}])', re.DOTALL)
	555	+ for m in pattern.finditer(text):
	556	+ key = m.group(1)
	557	+ value = m.group(2).strip()
	558	+ result[key] = value
	559	+
	560	+ return result if result else None
	561	+
	562	+
	563	+def _fallback_layouts() -> dict:
	564	+ """Minimal fallback when source collection fails."""
146	565	return {
147	566	"_default/baseof.html": _fallback_baseof(),
148	567	"partials/header.html": "<header><!-- header --></header>",
149	568	"partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
150	569	"index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
151	570

	--- hugoifier/utils/hugoify.py
	+++ hugoifier/utils/hugoify.py
	@@ -1,10 +1,11 @@
1	"""
2	AI-powered HTML → Hugo template conversion.
3
4	For already-Hugo themes, use hugoify_dir() to validate/augment.
5	For raw HTML, use hugoify_html() to produce Hugo layout files.

6	"""
7
8	import json
9	import logging
10	import os
	@@ -15,60 +16,421 @@
15	SYSTEM = (
16	"You are an expert Hugo theme developer. Convert HTML templates to valid Hugo Go template files. "
17	"Output only valid Hugo template syntax — no explanations, no markdown fences."
18	)
19







20
21	def hugoify_html(html_path: str) -> dict:
22	"""
23	Convert a raw HTML file to a set of Hugo layout files.
24
25	Returns dict mapping relative layout paths to their content, e.g.:
26	{
27	"_default/baseof.html": "<!DOCTYPE html>...",
28	"partials/header.html": "<header>...",
29	"partials/footer.html": "<footer>...",
30	"index.html": "{{ define \"main\" }}...",
31	}
32	"""
33	logging.info(f"Hugoifying {html_path} ...")
34
35	with open(html_path, 'r', errors='replace') as f:
36	html = f.read()
37
38	# Truncate very large files to avoid token limits
39	if len(html) > 30000:
40	logging.warning(f"HTML is large ({len(html)} chars), truncating to 30000 for AI analysis")
41	html = html[:30000]
42
43	prompt = f"""Convert the following HTML file into Hugo layout files.
44
45	Return a JSON object where keys are relative file paths under layouts/ and values are the Hugo template content.
46
47	Required keys to produce:
48	- "_default/baseof.html" — base template with blocks for head, header, main, footer
49	- "partials/header.html" — site header/nav extracted as partial
50	- "partials/footer.html" — footer extracted as partial
51	- "index.html" — homepage using {{ define "main" }} ... {{ end }}
52
53	Rules:
54	- Replace hardcoded page titles with {{ .Title }}
55	- Replace hardcoded site name with {{ .Site.Title }}
56	- Replace hardcoded URLs with {{ .Site.BaseURL }} or {{ .Permalink }}
57	- Replace nav links with {{ range .Site.Menus.main }}<a href="{{ .URL }}">{{ .Name }}</a>{{ end }}
58	- Replace blog post lists with {{ range .Pages }} ... {{ end }}
59	- Replace copyright year with {{ now.Year }}
60	- Keep all CSS classes and HTML structure intact
61	- Use {{ partial "header.html" . }} and {{ partial "footer.html" . }} in baseof.html
62
63	HTML to convert:
64	{html}
65
66	Return ONLY a valid JSON object, no explanation."""
67
68	response = call_ai(prompt, SYSTEM)
69	return _parse_layout_json(response)





































































































































































































































































































































































70
71
72	def hugoify_dir(theme_dir: str) -> str:
73	"""
74	Validate and optionally augment an existing Hugo theme directory.
	@@ -101,18 +463,24 @@
101	# CLI entry point (used by cli.py)
102	def hugoify(path: str) -> str:
103	"""
104	Entry point for the CLI 'hugoify' command.
105	If path is a Hugo theme dir: validate it.

106	If path is an HTML file or raw HTML dir: convert it.
107	"""
108	from .theme_finder import find_hugo_theme, find_raw_html_files
109
110	info = find_hugo_theme(path)
111	if info:
112	return hugoify_dir(info['theme_dir'])
113





114	if os.path.isfile(path) and path.endswith('.html'):
115	layouts = hugoify_html(path)
116	return f"Converted to {len(layouts)} layout files: {list(layouts.keys())}"
117
118	html_files = find_raw_html_files(path)
	@@ -130,21 +498,72 @@
130	# ---------------------------------------------------------------------------
131	# Helpers
132	# ---------------------------------------------------------------------------
133
134	def _parse_layout_json(response: str) -> dict:
135	"""Extract JSON from AI response, even if surrounded by prose."""
136	# Try to find JSON block
137	match = re.search(r'\{.*\}', response, re.DOTALL)













138	if match:
139	try:
140	return json.loads(match.group(0))



141	except json.JSONDecodeError:
142	pass







143
144	# Fallback: return a minimal layout
145	logging.warning("Could not parse AI response as JSON, using fallback layouts")




























146	return {
147	"_default/baseof.html": _fallback_baseof(),
148	"partials/header.html": "<header><!-- header --></header>",
149	"partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
150	"index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
151

	--- hugoifier/utils/hugoify.py
	+++ hugoifier/utils/hugoify.py
	@@ -1,10 +1,11 @@
1	"""
2	AI-powered HTML → Hugo template conversion.
3
4	For already-Hugo themes, use hugoify_dir() to validate/augment.
5	For raw HTML, use hugoify_html() to produce Hugo layout files.
6	For Next.js apps, use hugoify_nextjs() to convert React components to Hugo layouts.
7	"""
8
9	import json
10	import logging
11	import os
	@@ -15,60 +16,421 @@
16	SYSTEM = (
17	"You are an expert Hugo theme developer. Convert HTML templates to valid Hugo Go template files. "
18	"Output only valid Hugo template syntax — no explanations, no markdown fences."
19	)
20
21	NEXTJS_SYSTEM = (
22	"You are an expert at converting React/Next.js components to Hugo Go template files. "
23	"You understand JSX, TSX, React component composition, and Hugo template syntax. "
24	"Convert React components to static Hugo HTML templates, preserving all CSS classes and visual structure. "
25	"Output only valid Hugo template syntax — no explanations, no markdown fences."
26	)
27
28
29	def hugoify_html(html_path: str) -> dict:
30	"""
31	Convert a raw HTML file to a set of Hugo layout files.
32
33	Uses direct HTML extraction (no AI) to preserve content exactly as-is.
34	Splits the HTML into Hugo's baseof.html (head/shell) and index.html (body content).
35
36	Returns dict mapping relative layout paths to their content.



37	"""
38	logging.info(f"Hugoifying {html_path} ...")
39
40	with open(html_path, 'r', errors='replace') as f:
41	html = f.read()
42
43	logging.info(f"Read {len(html)} chars from {html_path}")
44
45	# Extract <head> content (CSS links, meta, fonts, etc.)
46	head_extras = _extract_head_content(html)
47
48	# Extract and rewrite CSS/JS paths to be relative to Hugo static/
49	css_links = re.findall(r'<link[^>]+rel=["\']stylesheet["\'][^>]*/?>',
50	html, re.DOTALL \| re.IGNORECASE)
51	js_links = re.findall(r'<script[^>]+src=["\'][^"\']+["\'][^>]>.?</script>',
52	html, re.DOTALL)
53
54	# Extract <body> content
55	body_match = re.search(r'<body[^>]>(.?)</body>', html, re.DOTALL)
56	body_content = body_match.group(1).strip() if body_match else html
57
58	# Extract body attributes (class, style, etc.)
59	body_attrs_match = re.search(r'<body([^>]*)>', html)
60	body_attrs = body_attrs_match.group(1).strip() if body_attrs_match else ''
61
62	# Build baseof.html preserving the original <head> structure
63	head_match = re.search(r'<head[^>]>(.?)</head>', html, re.DOTALL)
64	if head_match:
65	head_content = head_match.group(1).strip()
66	# Replace hardcoded <title> with Hugo template
67	head_content = re.sub(
68	r'<title>[^<]*</title>',
69	'<title>{{ if .IsHome }}{{ .Site.Title }}{{ else }}{{ .Title }} \| {{ .Site.Title }}{{ end }}</title>',
70	head_content
71	)
72	baseof = f'''<!DOCTYPE html>
73	<html lang="{{{{ with .Site.LanguageCode }}}}{{{{ . }}}}{{{{ else }}}}en{{{{ end }}}}">
74	<head>
75	{head_content}
76	</head>
77	<body{" " + body_attrs if body_attrs else ""}>
78	{{{{- block "main" . }}}}{{{{- end }}}}
79	</body>
80	</html>'''
81	else:
82	baseof = _fallback_baseof()
83
84	index_html = f'{{{{ define "main" }}}}\n{body_content}\n{{{{ end }}}}'
85
86	layouts = {
87	"_default/baseof.html": baseof,
88	"index.html": index_html,
89	}
90
91	logging.info(f"Extracted {len(layouts)} layout files directly from HTML (no AI)")
92	return layouts
93
94
95	def hugoify_nextjs(info: dict, dev_url: str = None) -> dict:
96	"""
97	Convert a Next.js app to a set of Hugo layout files.
98
99	If dev_url is provided (or auto-detected), captures the actual rendered HTML
100	from the running Next.js dev server for pixel-perfect conversion.
101	Otherwise falls back to AI-powered TSX source conversion.
102
103	Args:
104	info: dict from find_nextjs_app() with app_dir, router_type, etc.
105	dev_url: URL of a running Next.js dev server (e.g. http://localhost:3000)
106
107	Returns:
108	dict mapping relative layout paths to their content, plus
109	a '_captured_assets' key with any downloaded CSS/JS files.
110	"""
111	app_dir = info['app_dir']
112	logging.info(f"Hugoifying Next.js app at {app_dir} ...")
113
114	# Try to auto-detect a running dev server
115	if not dev_url:
116	dev_url = _detect_nextjs_server(info)
117
118	if dev_url:
119	return _capture_rendered_html(dev_url, info)
120
121	# Fallback: AI-powered source conversion (less faithful)
122	return _ai_convert_nextjs_sources(info)
123
124
125	def _detect_nextjs_server(info: dict) -> str \| None:
126	"""Check if a Next.js dev server is running on common ports."""
127	import urllib.request
128	for port in [3000, 3001, 3002]:
129	url = f"http://localhost:{port}"
130	try:
131	req = urllib.request.Request(url, method='HEAD')
132	resp = urllib.request.urlopen(req, timeout=2)
133	if resp.status == 200:
134	logging.info(f"Detected running Next.js server at {url}")
135	return url
136	except Exception:
137	continue
138	return None
139
140
141	def _capture_rendered_html(dev_url: str, info: dict) -> dict:
142	"""
143	Capture the actual server-rendered HTML from a running Next.js app
144	and convert it into Hugo layout files. This gives pixel-perfect results.
145	"""
146	import urllib.request
147	import urllib.parse
148
149	logging.info(f"Capturing rendered HTML from {dev_url} ...")
150
151	# Fetch the full rendered page
152	resp = urllib.request.urlopen(dev_url)
153	html = resp.read().decode('utf-8')
154	logging.info(f"Captured {len(html)} chars of rendered HTML")
155
156	# Download compiled CSS
157	css_urls = re.findall(r'href="(/_next/static/[^"]+\.css)"', html)
158	captured_css = {}
159	for css_path in css_urls:
160	css_url = f"{dev_url}{css_path}"
161	try:
162	css_resp = urllib.request.urlopen(css_url)
163	css_content = css_resp.read().decode('utf-8')
164	captured_css['compiled.css'] = css_content
165	logging.info(f"Captured CSS: {len(css_content)} chars")
166	break # Usually just one CSS file
167	except Exception as e:
168	logging.warning(f"Failed to fetch CSS {css_url}: {e}")
169
170	# Strip Next.js scripts, dev tooling, and React hydration markers
171	body_html = _extract_and_clean_body(html)
172
173	# Extract <head> content we want to keep (fonts, meta, etc.)
174	head_extras = _extract_head_content(html)
175
176	# Build Hugo layouts
177	baseof = f'''<!DOCTYPE html>
178	<html lang="en">
179	<head>
180	<meta charset="utf-8">
181	<meta name="viewport" content="width=device-width, initial-scale=1">
182	<title>{{{{ if .IsHome }}}}{{{{ .Site.Title }}}}{{{{ else }}}}{{{{ .Title }}}} \| {{{{ .Site.Title }}}}{{{{ end }}}}</title>
183	{head_extras}
184	<link rel="stylesheet" href="/css/compiled.css">
185	<link rel="stylesheet" href="/css/globals.css">
186	</head>
187	<body class="antialiased">
188	{{{{- block "main" . }}}}{{{{- end }}}}
189	</body>
190	</html>'''
191
192	index_html = f'{{{{ define "main" }}}}\n{body_html}\n{{{{ end }}}}'
193
194	layouts = {
195	"_default/baseof.html": baseof,
196	"index.html": index_html,
197	}
198
199	# Attach captured CSS as metadata for the pipeline to handle
200	if captured_css:
201	layouts['_captured_css'] = captured_css
202
203	return layouts
204
205
206	def _extract_and_clean_body(html: str) -> str:
207	"""Extract <body> content and strip Next.js scripts/dev tooling."""
208	# Extract body content
209	body_match = re.search(r'<body[^>]>(.?)</body>', html, re.DOTALL)
210	if not body_match:
211	return html
212
213	body = body_match.group(1)
214
215	# Strip all <script> tags (Next.js runtime, React hydration, HMR, etc.)
216	body = re.sub(r'<script\b[^>]>.?</script>', '', body, flags=re.DOTALL)
217	body = re.sub(r'<script\b[^>]*/?>', '', body)
218
219	# Strip Next.js dev overlay and error boundary elements
220	body = re.sub(r'<next-route-announcer[^>]>.?</next-route-announcer>', '', body, flags=re.DOTALL)
221	body = re.sub(r'<nextjs-portal[^>]>.?</nextjs-portal>', '', body, flags=re.DOTALL)
222
223	# Strip data-reactroot, data-nextjs, and other React/Next.js attributes
224	body = re.sub(r'\sdata-(?:reactroot\|nextjs[^=]\|rsc[^=])(?:="[^"]")?', '', body)
225
226	# Fix FadeIn components: they render with opacity:0 and translateY(32px)
227	# because the IntersectionObserver JS isn't running. Force them visible.
228	body = re.sub(r'opacity:\s*0', 'opacity:1', body)
229	body = re.sub(r'translateY$32px$', 'translateY(0px)', body)
230
231	# Replace /_next/static/ asset references with /static/ for Hugo
232	body = re.sub(r'/_next/static/media/([^"]+)', r'/\1', body)
233
234	return body.strip()
235
236
237	def _extract_head_content(html: str) -> str:
238	"""Extract useful <head> elements (fonts, preloads) from rendered HTML."""
239	head_match = re.search(r'<head[^>]>(.?)</head>', html, re.DOTALL)
240	if not head_match:
241	return ""
242
243	head = head_match.group(1)
244	lines = []
245
246	# Keep font preload/stylesheet links
247	for match in re.finditer(r'<link[^>]+(?:fonts\.googleapis\|fonts\.gstatic\|preload[^>]+font)[^>]*/?>',
248	head, re.DOTALL):
249	lines.append(f" {match.group(0)}")
250
251	# Keep image preloads
252	for match in re.finditer(r'<link[^>]+rel="preload"[^>]+as="image"[^>]*/?>',
253	head, re.DOTALL):
254	tag = match.group(0)
255	# Fix /_next paths to local paths
256	tag = re.sub(r'/_next/static/media/', '/', tag)
257	lines.append(f" {tag}")
258
259	return "\n".join(lines)
260
261
262	def _ai_convert_nextjs_sources(info: dict) -> dict:
263	"""
264	Fallback: AI-powered conversion from TSX source files.
265	Used when no running dev server is available.
266	"""
267	sources = _collect_nextjs_sources(info)
268	if not sources:
269	logging.warning("No source files collected from Next.js app")
270	return _fallback_layouts()
271
272	layouts = {}
273
274	# Identify component vs structural files
275	component_sources = {}
276	layout_sources = {}
277	for rel_path, content in sources.items():
278	if rel_path.endswith('.css'):
279	continue
280	elif 'layout.' in rel_path or 'page.' in rel_path:
281	layout_sources[rel_path] = content
282	else:
283	component_sources[rel_path] = content
284
285	# Convert each component individually
286	for rel_path, content in component_sources.items():
287	basename = os.path.splitext(os.path.basename(rel_path))[0]
288	partial_name = f"partials/{basename}.html"
289	logging.info(f" Converting {rel_path} → {partial_name}")
290	html = _convert_single_component(basename, content)
291	if html:
292	layouts[partial_name] = html
293
294	# Build baseof and index
295	partial_names = [os.path.splitext(os.path.basename(k))[0] for k in layouts.keys()]
296	baseof, index_html = _convert_layout_and_page(layout_sources, component_sources, partial_names)
297	layouts["_default/baseof.html"] = baseof
298	layouts["index.html"] = index_html
299
300	logging.info(f"Generated {len(layouts)} layout files via AI conversion")
301	return layouts
302
303
304	_COMPONENT_PROMPT = """Convert this React/Next.js component to static Hugo-compatible HTML.
305
306	CRITICAL RULES:
307	- Output ONLY the raw HTML. No markdown fences, no explanation, no JSON wrapping.
308	- Convert ALL JSX `className` to HTML `class`
309	- Unroll ALL `.map()` calls into full static HTML — every single item
310	- Preserve EVERY Tailwind CSS class and inline style EXACTLY
311	- Preserve ALL text content — do NOT summarize or shorten
312	- Preserve ALL SVG content inline
313	- Strip React hooks and event handlers, keep static HTML structure
314
315	Component name: {name}
316
317	Source code:
318	{source}"""
319
320
321	def _convert_single_component(name: str, source: str) -> str \| None:
322	"""Convert a single React component to Hugo-compatible HTML via AI."""
323	prompt = _COMPONENT_PROMPT.format(name=name, source=source)
324	try:
325	response = call_ai(prompt, NEXTJS_SYSTEM, max_tokens=16384)
326	html = re.sub(r'^```(?:html)?\s*', '', response.strip())
327	html = re.sub(r'```\s*$', '', html.strip())
328	return html
329	except Exception as e:
330	logging.warning(f"Failed to convert component {name}: {e}")
331	return None
332
333
334	def _convert_layout_and_page(layout_sources, component_sources, partial_names):
335	"""Build baseof.html and index.html from layout files and partial list."""
336	partial_includes = "\n".join(
337	f' {{{{ partial "{name}.html" . }}}}' for name in partial_names
338	)
339	baseof = _fallback_baseof()
340	index_html = f'{{% define "main" %}}\n<div class="bg-[#121517] flex flex-col w-full">\n{partial_includes}\n</div>\n{{% end %}}'
341	return baseof, index_html
342
343
344	def _collect_nextjs_sources(info: dict) -> dict:
345	"""
346	Collect relevant source files from a Next.js app into a dict
347	keyed by relative path. Applies priority-based context budgeting.
348	"""
349	app_dir = info['app_dir']
350	sources = {}
351	budget = 80000
352
353	# Tier 1: Layout and page entry points (always include)
354	tier1 = []
355	if info.get('layout_file'):
356	tier1.append(info['layout_file'])
357	if info.get('page_file'):
358	tier1.append(info['page_file'])
359
360	# Tier 2: Section-level components (most important for structure)
361	tier2 = []
362	# Tier 3: Page components
363	tier3 = []
364	# Tier 4: UI/marketing components
365	tier4 = []
366	# Tier 5: CSS and config
367	tier5 = list(info.get('css_files', []))
368
369	# Walk source directories looking for components
370	for search_root in [os.path.join(app_dir, 'src'), os.path.join(app_dir, 'app'), app_dir]:
371	if not os.path.isdir(search_root):
372	continue
373	for root, dirs, files in os.walk(search_root):
374	# Skip junk
375	dirs[:] = [d for d in dirs if d not in ('node_modules', '.next', '__MACOSX', '.git', '__tests__')]
376	for f in files:
377	if not f.endswith(('.tsx', '.jsx', '.ts', '.js')):
378	continue
379	full = os.path.join(root, f)
380	# Skip test files, config files, API routes
381	if '.test.' in f or '.spec.' in f:
382	continue
383	if '/api/' in full:
384	continue
385	# Skip files already in tier 1
386	if full in tier1:
387	continue
388
389	rel = os.path.relpath(root, app_dir)
390	basename = f.lower()
391
392	if 'section' in basename or 'section' in rel.lower():
393	tier2.append(full)
394	elif 'page' in basename and 'page' not in rel.lower().split('app')[-1:]:
395	tier3.append(full)
396	elif any(k in rel.lower() for k in ('components', 'marketing')):
397	tier4.append(full)
398
399	# Assemble by priority, tracking budget
400	used = 0
401	for tier_files in [tier1, tier2, tier3, tier4, tier5]:
402	for fpath in tier_files:
403	if not os.path.isfile(fpath):
404	continue
405	try:
406	with open(fpath, 'r', errors='replace') as fh:
407	content = fh.read()
408	except OSError:
409	continue
410
411	rel_path = os.path.relpath(fpath, app_dir)
412	# Skip if already collected (dedup across tiers)
413	if rel_path in sources:
414	continue
415
416	# Truncate individual large files
417	if len(content) > 8000:
418	content = content[:8000] + '\n// ... [truncated]'
419
420	if used + len(content) > budget:
421	remaining = budget - used
422	if remaining > 500:
423	content = content[:remaining] + '\n// ... [truncated - budget]'
424	sources[rel_path] = content
425	used += len(content)
426	break
427	sources[rel_path] = content
428	used += len(content)
429
430	logging.info(f"Collected {len(sources)} source files ({used} chars) from Next.js app")
431	return sources
432
433
434	def hugoify_dir(theme_dir: str) -> str:
435	"""
436	Validate and optionally augment an existing Hugo theme directory.
	@@ -101,18 +463,24 @@
463	# CLI entry point (used by cli.py)
464	def hugoify(path: str) -> str:
465	"""
466	Entry point for the CLI 'hugoify' command.
467	If path is a Hugo theme dir: validate it.
468	If path is a Next.js app: convert React components to Hugo.
469	If path is an HTML file or raw HTML dir: convert it.
470	"""
471	from .theme_finder import find_hugo_theme, find_nextjs_app, find_raw_html_files
472
473	info = find_hugo_theme(path)
474	if info:
475	return hugoify_dir(info['theme_dir'])
476
477	nextjs_info = find_nextjs_app(path)
478	if nextjs_info:
479	layouts = hugoify_nextjs(nextjs_info)
480	return f"Converted Next.js app to {len(layouts)} layout files: {list(layouts.keys())}"
481
482	if os.path.isfile(path) and path.endswith('.html'):
483	layouts = hugoify_html(path)
484	return f"Converted to {len(layouts)} layout files: {list(layouts.keys())}"
485
486	html_files = find_raw_html_files(path)
	@@ -130,21 +498,72 @@
498	# ---------------------------------------------------------------------------
499	# Helpers
500	# ---------------------------------------------------------------------------
501
502	def _parse_layout_json(response: str) -> dict:
503	"""Extract JSON from AI response, even if surrounded by prose or markdown fences."""
504	# Strip markdown fences if present
505	stripped = re.sub(r'```(?:json)?\s*', '', response)
506	stripped = re.sub(r'```\s*$', '', stripped.strip())
507
508	# Try the full stripped response as JSON first
509	try:
510	result = json.loads(stripped)
511	if isinstance(result, dict):
512	logging.info(f"Parsed {len(result)} layout files from AI response")
513	return result
514	except json.JSONDecodeError:
515	pass
516
517	# Try to find JSON block (outermost braces)
518	match = re.search(r'\{.*\}', stripped, re.DOTALL)
519	if match:
520	try:
521	result = json.loads(match.group(0))
522	if isinstance(result, dict):
523	logging.info(f"Parsed {len(result)} layout files from AI response (extracted)")
524	return result
525	except json.JSONDecodeError:
526	pass
527
528	# AI sometimes uses backtick-delimited values instead of JSON strings.
529	# Parse with a regex-based key-value extractor.
530	backtick_result = _parse_backtick_json(match.group(0))
531	if backtick_result:
532	logging.info(f"Parsed {len(backtick_result)} layout files from backtick-delimited response")
533	return backtick_result
534
535	# Fallback: return a minimal layout
536	logging.warning("Could not parse AI response as JSON, using fallback layouts")
537	logging.debug(f"AI response was: {response[:500]!r}")
538	return {
539	"_default/baseof.html": _fallback_baseof(),
540	"partials/header.html": "<header><!-- header --></header>",
541	"partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
542	"index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
543	}
544
545
546	def _parse_backtick_json(text: str) -> dict \| None:
547	"""
548	Parse a JSON-like object where values are backtick-delimited template literals
549	instead of proper JSON strings. This happens when the AI uses JS template syntax.
550	e.g.: { "key": `<html>...</html>` }
551	"""
552	result = {}
553	# Match "key": `value` pairs where value can span multiple lines
554	pattern = re.compile(r'"([^"]+)"\s:\s`(.?)`(?:\s[,}])', re.DOTALL)
555	for m in pattern.finditer(text):
556	key = m.group(1)
557	value = m.group(2).strip()
558	result[key] = value
559
560	return result if result else None
561
562
563	def _fallback_layouts() -> dict:
564	"""Minimal fallback when source collection fails."""
565	return {
566	"_default/baseof.html": _fallback_baseof(),
567	"partials/header.html": "<header><!-- header --></header>",
568	"partials/footer.html": "<footer>{{ .Site.Params.copyright }}</footer>",
569	"index.html": '{{ define "main" }}<main>{{ .Content }}</main>{{ end }}',
570

M hugoifier/utils/theme_finder.py

+108

		--- hugoifier/utils/theme_finder.py
		+++ hugoifier/utils/theme_finder.py
		@@ -1,10 +1,12 @@
1	1	"""
2	2	Locates the actual Hugo theme and exampleSite within the messy zip-extracted structure.
	3	+Also detects Next.js applications for conversion.
3	4	Themes in themes/ are structured as: {name}/{name}/themes/{theme-name}/
4	5	"""
5	6
	7	+import json
6	8	import logging
7	9	import os
8	10
9	11
10	12	def find_hugo_theme(input_path):
		@@ -57,10 +59,116 @@
57	59	'example_site': example_site,
58	60	'theme_name': theme_name,
59	61	'is_hugo_theme': True,
60	62	}
61	63
	64	+
	65	+def find_nextjs_app(input_path):
	66	+ """
	67	+ Detect a Next.js application in the given path.
	68	+
	69	+ Walks up to 2 levels deep to find package.json with "next" in dependencies,
	70	+ similar to how find_hugo_theme handles zip-extracted double-folder structure.
	71	+
	72	+ Returns dict with:
	73	+ app_dir: root of the Next.js project (where package.json lives)
	74	+ app_name: name from package.json or directory name
	75	+ router_type: 'app' or 'pages'
	76	+ has_src_dir: whether components live under src/
	77	+ layout_file: path to app/layout.tsx/jsx (App Router) or None
	78	+ page_file: path to app/page.tsx/jsx or pages/index.tsx/jsx
	79	+ css_files: list of global CSS files found
	80	+ is_nextjs_app: True
	81	+ """
	82	+ input_path = os.path.abspath(input_path)
	83	+
	84	+ # Look for package.json at root or one level deep (zip-extracted pattern)
	85	+ candidates = []
	86	+ for pkg in _find_file_up_to_depth(input_path, 'package.json', max_depth=2):
	87	+ try:
	88	+ with open(pkg, 'r') as f:
	89	+ data = json.load(f)
	90	+ except (json.JSONDecodeError, OSError):
	91	+ continue
	92	+
	93	+ deps = {data.get('dependencies', {}), data.get('devDependencies', {})}
	94	+ if 'next' in deps:
	95	+ candidates.append((os.path.dirname(pkg), data))
	96	+
	97	+ if not candidates:
	98	+ return None
	99	+
	100	+ # Pick the deepest match (most specific, like find_hugo_theme)
	101	+ app_dir, pkg_data = max(candidates, key=lambda x: x[0].count(os.sep))
	102	+ app_name = pkg_data.get('name', os.path.basename(app_dir))
	103	+
	104	+ # Detect router type
	105	+ app_router_dir = os.path.join(app_dir, 'app')
	106	+ pages_dir = os.path.join(app_dir, 'pages')
	107	+ if os.path.isdir(app_router_dir):
	108	+ router_type = 'app'
	109	+ elif os.path.isdir(pages_dir):
	110	+ router_type = 'pages'
	111	+ else:
	112	+ return None # Has next dep but no recognizable router
	113	+
	114	+ # Detect src/ directory
	115	+ src_dir = os.path.join(app_dir, 'src')
	116	+ has_src_dir = os.path.isdir(src_dir)
	117	+
	118	+ # Find layout and page files
	119	+ layout_file = _find_tsx_or_jsx(app_dir, 'app', 'layout')
	120	+ if router_type == 'app':
	121	+ page_file = _find_tsx_or_jsx(app_dir, 'app', 'page')
	122	+ else:
	123	+ page_file = _find_tsx_or_jsx(app_dir, 'pages', 'index')
	124	+
	125	+ # Find CSS files
	126	+ css_files = []
	127	+ for search_dir in [app_router_dir, os.path.join(app_dir, 'src'), app_dir]:
	128	+ if not os.path.isdir(search_dir):
	129	+ continue
	130	+ for f in os.listdir(search_dir):
	131	+ if f.endswith('.css'):
	132	+ css_files.append(os.path.join(search_dir, f))
	133	+
	134	+ return {
	135	+ 'app_dir': app_dir,
	136	+ 'app_name': app_name,
	137	+ 'router_type': router_type,
	138	+ 'has_src_dir': has_src_dir,
	139	+ 'layout_file': layout_file,
	140	+ 'page_file': page_file,
	141	+ 'css_files': css_files,
	142	+ 'is_nextjs_app': True,
	143	+ }
	144	+
	145	+
	146	+def _find_file_up_to_depth(root, filename, max_depth=2):
	147	+ """Yield paths to `filename` found up to max_depth levels under root."""
	148	+ for depth_root, dirs, files in os.walk(root):
	149	+ rel = os.path.relpath(depth_root, root)
	150	+ depth = 0 if rel == '.' else rel.count(os.sep) + 1
	151	+ if depth > max_depth:
	152	+ dirs.clear()
	153	+ continue
	154	+ if '__MACOSX' in depth_root or 'node_modules' in depth_root:
	155	+ dirs.clear()
	156	+ continue
	157	+ if filename in files:
	158	+ yield os.path.join(depth_root, filename)
	159	+
	160	+
	161	+def _find_tsx_or_jsx(base, subdir, name):
	162	+ """Find {name}.tsx or {name}.jsx in base/subdir/."""
	163	+ d = os.path.join(base, subdir)
	164	+ for ext in ('.tsx', '.jsx', '.ts', '.js'):
	165	+ p = os.path.join(d, name + ext)
	166	+ if os.path.isfile(p):
	167	+ return p
	168	+ return None
	169	+
62	170
63	171	def find_raw_html_files(input_path):
64	172	"""Find HTML files in a raw HTML theme (not a Hugo theme)."""
65	173	html_files = []
66	174	for root, dirs, files in os.walk(input_path):
67	175

	--- hugoifier/utils/theme_finder.py
	+++ hugoifier/utils/theme_finder.py
	@@ -1,10 +1,12 @@
1	"""
2	Locates the actual Hugo theme and exampleSite within the messy zip-extracted structure.

3	Themes in themes/ are structured as: {name}/{name}/themes/{theme-name}/
4	"""
5

6	import logging
7	import os
8
9
10	def find_hugo_theme(input_path):
	@@ -57,10 +59,116 @@
57	'example_site': example_site,
58	'theme_name': theme_name,
59	'is_hugo_theme': True,
60	}
61










































































































62
63	def find_raw_html_files(input_path):
64	"""Find HTML files in a raw HTML theme (not a Hugo theme)."""
65	html_files = []
66	for root, dirs, files in os.walk(input_path):
67

	--- hugoifier/utils/theme_finder.py
	+++ hugoifier/utils/theme_finder.py
	@@ -1,10 +1,12 @@
1	"""
2	Locates the actual Hugo theme and exampleSite within the messy zip-extracted structure.
3	Also detects Next.js applications for conversion.
4	Themes in themes/ are structured as: {name}/{name}/themes/{theme-name}/
5	"""
6
7	import json
8	import logging
9	import os
10
11
12	def find_hugo_theme(input_path):
	@@ -57,10 +59,116 @@
59	'example_site': example_site,
60	'theme_name': theme_name,
61	'is_hugo_theme': True,
62	}
63
64
65	def find_nextjs_app(input_path):
66	"""
67	Detect a Next.js application in the given path.
68
69	Walks up to 2 levels deep to find package.json with "next" in dependencies,
70	similar to how find_hugo_theme handles zip-extracted double-folder structure.
71
72	Returns dict with:
73	app_dir: root of the Next.js project (where package.json lives)
74	app_name: name from package.json or directory name
75	router_type: 'app' or 'pages'
76	has_src_dir: whether components live under src/
77	layout_file: path to app/layout.tsx/jsx (App Router) or None
78	page_file: path to app/page.tsx/jsx or pages/index.tsx/jsx
79	css_files: list of global CSS files found
80	is_nextjs_app: True
81	"""
82	input_path = os.path.abspath(input_path)
83
84	# Look for package.json at root or one level deep (zip-extracted pattern)
85	candidates = []
86	for pkg in _find_file_up_to_depth(input_path, 'package.json', max_depth=2):
87	try:
88	with open(pkg, 'r') as f:
89	data = json.load(f)
90	except (json.JSONDecodeError, OSError):
91	continue
92
93	deps = {data.get('dependencies', {}), data.get('devDependencies', {})}
94	if 'next' in deps:
95	candidates.append((os.path.dirname(pkg), data))
96
97	if not candidates:
98	return None
99
100	# Pick the deepest match (most specific, like find_hugo_theme)
101	app_dir, pkg_data = max(candidates, key=lambda x: x[0].count(os.sep))
102	app_name = pkg_data.get('name', os.path.basename(app_dir))
103
104	# Detect router type
105	app_router_dir = os.path.join(app_dir, 'app')
106	pages_dir = os.path.join(app_dir, 'pages')
107	if os.path.isdir(app_router_dir):
108	router_type = 'app'
109	elif os.path.isdir(pages_dir):
110	router_type = 'pages'
111	else:
112	return None # Has next dep but no recognizable router
113
114	# Detect src/ directory
115	src_dir = os.path.join(app_dir, 'src')
116	has_src_dir = os.path.isdir(src_dir)
117
118	# Find layout and page files
119	layout_file = _find_tsx_or_jsx(app_dir, 'app', 'layout')
120	if router_type == 'app':
121	page_file = _find_tsx_or_jsx(app_dir, 'app', 'page')
122	else:
123	page_file = _find_tsx_or_jsx(app_dir, 'pages', 'index')
124
125	# Find CSS files
126	css_files = []
127	for search_dir in [app_router_dir, os.path.join(app_dir, 'src'), app_dir]:
128	if not os.path.isdir(search_dir):
129	continue
130	for f in os.listdir(search_dir):
131	if f.endswith('.css'):
132	css_files.append(os.path.join(search_dir, f))
133
134	return {
135	'app_dir': app_dir,
136	'app_name': app_name,
137	'router_type': router_type,
138	'has_src_dir': has_src_dir,
139	'layout_file': layout_file,
140	'page_file': page_file,
141	'css_files': css_files,
142	'is_nextjs_app': True,
143	}
144
145
146	def _find_file_up_to_depth(root, filename, max_depth=2):
147	"""Yield paths to `filename` found up to max_depth levels under root."""
148	for depth_root, dirs, files in os.walk(root):
149	rel = os.path.relpath(depth_root, root)
150	depth = 0 if rel == '.' else rel.count(os.sep) + 1
151	if depth > max_depth:
152	dirs.clear()
153	continue
154	if '__MACOSX' in depth_root or 'node_modules' in depth_root:
155	dirs.clear()
156	continue
157	if filename in files:
158	yield os.path.join(depth_root, filename)
159
160
161	def _find_tsx_or_jsx(base, subdir, name):
162	"""Find {name}.tsx or {name}.jsx in base/subdir/."""
163	d = os.path.join(base, subdir)
164	for ext in ('.tsx', '.jsx', '.ts', '.js'):
165	p = os.path.join(d, name + ext)
166	if os.path.isfile(p):
167	return p
168	return None
169
170
171	def find_raw_html_files(input_path):
172	"""Find HTML files in a raw HTML theme (not a Hugo theme)."""
173	html_files = []
174	for root, dirs, files in os.walk(input_path):
175

Hugoifier

Keyboard Shortcuts