Fossil SCM

A more precise implementation of "slugify".

drh 2026-01-13 19:50 markdown-header-ids

Commit 8c9810e6930c66780d7b7294935b651a5f46dea8926c61a1775f36917203d2c5

Parent 21cbb8c467a1a36…

1 file changed +20 -2

M src/markdown_html.c

+20 -2

		--- src/markdown_html.c
		+++ src/markdown_html.c
		@@ -229,17 +229,35 @@
229	229	INTER_BLOCK(ob);
230	230	z = fossil_strdup(blob_buffer(text));
231	231	if( z==0 ){
232	232	j = 0;
233	233	}else{
	234	+ /*
	235	+ ** The GitHub "slugify" algorithm converts the text of a markdown header
	236	+ ** into a ID for that header. The algorithm is:
	237	+ **
	238	+ ** 1. ASCII alphanumerics -> convert to lower case
	239	+ ** 2. Spaces, hyphens, underscores -> convert to '-'
	240	+ ** 3. Non-ASCII -> preserve as-is
	241	+ ** 4. Other punctuation -> remove
	242	+ ** 5. Multiple consecutive dashes -> collapse to one
	243	+ ** 6. Leading and trailing dashes -> remove
	244	+ ** 7. Markup <...> and &...; -> remove
	245	+ **
	246	+ ** This implementation does the conversion in-place.
	247	+ */
234	248	for(i=j=0; z[i]; i++){
235	249	if( fossil_isalnum(z[i]) ){
236	250	z[j++] = fossil_tolower(z[i]);
237		- }else if( fossil_isspace(z[i]) && j>0 && fossil_isalnum(z[j-1]) ){
238		- z[j++] = '-';
	251	+ }else if( fossil_isspace(z[i]) \|\| z[i]=='-' \|\| z[i]=='_' ){
	252	+ if( j>0 && z[j-1]!='-' ) z[j++] = '-';
239	253	}else if( z[i]=='<' ){
240	254	do{ i++; }while( z[i]!=0 && z[i]!='>' );
	255	+ }else if( z[i]=='&' ){
	256	+ do{ i++; }while( z[i]!=0 && z[i]!=';' );
	257	+ }else if( (z[i]&0x80)!=0 ){
	258	+ z[j++] = z[i];
241	259	}
242	260	}
243	261	if( j>0 && z[j-1]=='-' ) j--;
244	262	z[j] = 0;
245	263	}
246	264

	--- src/markdown_html.c
	+++ src/markdown_html.c
	@@ -229,17 +229,35 @@
229	INTER_BLOCK(ob);
230	z = fossil_strdup(blob_buffer(text));
231	if( z==0 ){
232	j = 0;
233	}else{














234	for(i=j=0; z[i]; i++){
235	if( fossil_isalnum(z[i]) ){
236	z[j++] = fossil_tolower(z[i]);
237	}else if( fossil_isspace(z[i]) && j>0 && fossil_isalnum(z[j-1]) ){
238	z[j++] = '-';
239	}else if( z[i]=='<' ){
240	do{ i++; }while( z[i]!=0 && z[i]!='>' );




241	}
242	}
243	if( j>0 && z[j-1]=='-' ) j--;
244	z[j] = 0;
245	}
246

	--- src/markdown_html.c
	+++ src/markdown_html.c
	@@ -229,17 +229,35 @@
229	INTER_BLOCK(ob);
230	z = fossil_strdup(blob_buffer(text));
231	if( z==0 ){
232	j = 0;
233	}else{
234	/*
235	** The GitHub "slugify" algorithm converts the text of a markdown header
236	** into a ID for that header. The algorithm is:
237	**
238	** 1. ASCII alphanumerics -> convert to lower case
239	** 2. Spaces, hyphens, underscores -> convert to '-'
240	** 3. Non-ASCII -> preserve as-is
241	** 4. Other punctuation -> remove
242	** 5. Multiple consecutive dashes -> collapse to one
243	** 6. Leading and trailing dashes -> remove
244	** 7. Markup <...> and &...; -> remove
245	**
246	** This implementation does the conversion in-place.
247	*/
248	for(i=j=0; z[i]; i++){
249	if( fossil_isalnum(z[i]) ){
250	z[j++] = fossil_tolower(z[i]);
251	}else if( fossil_isspace(z[i]) \|\| z[i]=='-' \|\| z[i]=='_' ){
252	if( j>0 && z[j-1]!='-' ) z[j++] = '-';
253	}else if( z[i]=='<' ){
254	do{ i++; }while( z[i]!=0 && z[i]!='>' );
255	}else if( z[i]=='&' ){
256	do{ i++; }while( z[i]!=0 && z[i]!=';' );
257	}else if( (z[i]&0x80)!=0 ){
258	z[j++] = z[i];
259	}
260	}
261	if( j>0 && z[j-1]=='-' ) j--;
262	z[j] = 0;
263	}
264