Fossil SCM

A more precise implementation of "slugify".

drh 2026-01-13 19:50 markdown-header-ids
Commit 8c9810e6930c66780d7b7294935b651a5f46dea8926c61a1775f36917203d2c5
1 file changed +20 -2
--- src/markdown_html.c
+++ src/markdown_html.c
@@ -229,17 +229,35 @@
229229
INTER_BLOCK(ob);
230230
z = fossil_strdup(blob_buffer(text));
231231
if( z==0 ){
232232
j = 0;
233233
}else{
234
+ /*
235
+ ** The GitHub "slugify" algorithm converts the text of a markdown header
236
+ ** into a ID for that header. The algorithm is:
237
+ **
238
+ ** 1. ASCII alphanumerics -> convert to lower case
239
+ ** 2. Spaces, hyphens, underscores -> convert to '-'
240
+ ** 3. Non-ASCII -> preserve as-is
241
+ ** 4. Other punctuation -> remove
242
+ ** 5. Multiple consecutive dashes -> collapse to one
243
+ ** 6. Leading and trailing dashes -> remove
244
+ ** 7. Markup <...> and &...; -> remove
245
+ **
246
+ ** This implementation does the conversion in-place.
247
+ */
234248
for(i=j=0; z[i]; i++){
235249
if( fossil_isalnum(z[i]) ){
236250
z[j++] = fossil_tolower(z[i]);
237
- }else if( fossil_isspace(z[i]) && j>0 && fossil_isalnum(z[j-1]) ){
238
- z[j++] = '-';
251
+ }else if( fossil_isspace(z[i]) || z[i]=='-' || z[i]=='_' ){
252
+ if( j>0 && z[j-1]!='-' ) z[j++] = '-';
239253
}else if( z[i]=='<' ){
240254
do{ i++; }while( z[i]!=0 && z[i]!='>' );
255
+ }else if( z[i]=='&' ){
256
+ do{ i++; }while( z[i]!=0 && z[i]!=';' );
257
+ }else if( (z[i]&0x80)!=0 ){
258
+ z[j++] = z[i];
241259
}
242260
}
243261
if( j>0 && z[j-1]=='-' ) j--;
244262
z[j] = 0;
245263
}
246264
--- src/markdown_html.c
+++ src/markdown_html.c
@@ -229,17 +229,35 @@
229 INTER_BLOCK(ob);
230 z = fossil_strdup(blob_buffer(text));
231 if( z==0 ){
232 j = 0;
233 }else{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234 for(i=j=0; z[i]; i++){
235 if( fossil_isalnum(z[i]) ){
236 z[j++] = fossil_tolower(z[i]);
237 }else if( fossil_isspace(z[i]) && j>0 && fossil_isalnum(z[j-1]) ){
238 z[j++] = '-';
239 }else if( z[i]=='<' ){
240 do{ i++; }while( z[i]!=0 && z[i]!='>' );
 
 
 
 
241 }
242 }
243 if( j>0 && z[j-1]=='-' ) j--;
244 z[j] = 0;
245 }
246
--- src/markdown_html.c
+++ src/markdown_html.c
@@ -229,17 +229,35 @@
229 INTER_BLOCK(ob);
230 z = fossil_strdup(blob_buffer(text));
231 if( z==0 ){
232 j = 0;
233 }else{
234 /*
235 ** The GitHub "slugify" algorithm converts the text of a markdown header
236 ** into a ID for that header. The algorithm is:
237 **
238 ** 1. ASCII alphanumerics -> convert to lower case
239 ** 2. Spaces, hyphens, underscores -> convert to '-'
240 ** 3. Non-ASCII -> preserve as-is
241 ** 4. Other punctuation -> remove
242 ** 5. Multiple consecutive dashes -> collapse to one
243 ** 6. Leading and trailing dashes -> remove
244 ** 7. Markup <...> and &...; -> remove
245 **
246 ** This implementation does the conversion in-place.
247 */
248 for(i=j=0; z[i]; i++){
249 if( fossil_isalnum(z[i]) ){
250 z[j++] = fossil_tolower(z[i]);
251 }else if( fossil_isspace(z[i]) || z[i]=='-' || z[i]=='_' ){
252 if( j>0 && z[j-1]!='-' ) z[j++] = '-';
253 }else if( z[i]=='<' ){
254 do{ i++; }while( z[i]!=0 && z[i]!='>' );
255 }else if( z[i]=='&' ){
256 do{ i++; }while( z[i]!=0 && z[i]!=';' );
257 }else if( (z[i]&0x80)!=0 ){
258 z[j++] = z[i];
259 }
260 }
261 if( j>0 && z[j-1]=='-' ) j--;
262 z[j] = 0;
263 }
264

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button