Fossil SCM
A more precise implementation of "slugify".
Commit
8c9810e6930c66780d7b7294935b651a5f46dea8926c61a1775f36917203d2c5
Parent
21cbb8c467a1a36…
1 file changed
+20
-2
+20
-2
| --- src/markdown_html.c | ||
| +++ src/markdown_html.c | ||
| @@ -229,17 +229,35 @@ | ||
| 229 | 229 | INTER_BLOCK(ob); |
| 230 | 230 | z = fossil_strdup(blob_buffer(text)); |
| 231 | 231 | if( z==0 ){ |
| 232 | 232 | j = 0; |
| 233 | 233 | }else{ |
| 234 | + /* | |
| 235 | + ** The GitHub "slugify" algorithm converts the text of a markdown header | |
| 236 | + ** into a ID for that header. The algorithm is: | |
| 237 | + ** | |
| 238 | + ** 1. ASCII alphanumerics -> convert to lower case | |
| 239 | + ** 2. Spaces, hyphens, underscores -> convert to '-' | |
| 240 | + ** 3. Non-ASCII -> preserve as-is | |
| 241 | + ** 4. Other punctuation -> remove | |
| 242 | + ** 5. Multiple consecutive dashes -> collapse to one | |
| 243 | + ** 6. Leading and trailing dashes -> remove | |
| 244 | + ** 7. Markup <...> and &...; -> remove | |
| 245 | + ** | |
| 246 | + ** This implementation does the conversion in-place. | |
| 247 | + */ | |
| 234 | 248 | for(i=j=0; z[i]; i++){ |
| 235 | 249 | if( fossil_isalnum(z[i]) ){ |
| 236 | 250 | z[j++] = fossil_tolower(z[i]); |
| 237 | - }else if( fossil_isspace(z[i]) && j>0 && fossil_isalnum(z[j-1]) ){ | |
| 238 | - z[j++] = '-'; | |
| 251 | + }else if( fossil_isspace(z[i]) || z[i]=='-' || z[i]=='_' ){ | |
| 252 | + if( j>0 && z[j-1]!='-' ) z[j++] = '-'; | |
| 239 | 253 | }else if( z[i]=='<' ){ |
| 240 | 254 | do{ i++; }while( z[i]!=0 && z[i]!='>' ); |
| 255 | + }else if( z[i]=='&' ){ | |
| 256 | + do{ i++; }while( z[i]!=0 && z[i]!=';' ); | |
| 257 | + }else if( (z[i]&0x80)!=0 ){ | |
| 258 | + z[j++] = z[i]; | |
| 241 | 259 | } |
| 242 | 260 | } |
| 243 | 261 | if( j>0 && z[j-1]=='-' ) j--; |
| 244 | 262 | z[j] = 0; |
| 245 | 263 | } |
| 246 | 264 |
| --- src/markdown_html.c | |
| +++ src/markdown_html.c | |
| @@ -229,17 +229,35 @@ | |
| 229 | INTER_BLOCK(ob); |
| 230 | z = fossil_strdup(blob_buffer(text)); |
| 231 | if( z==0 ){ |
| 232 | j = 0; |
| 233 | }else{ |
| 234 | for(i=j=0; z[i]; i++){ |
| 235 | if( fossil_isalnum(z[i]) ){ |
| 236 | z[j++] = fossil_tolower(z[i]); |
| 237 | }else if( fossil_isspace(z[i]) && j>0 && fossil_isalnum(z[j-1]) ){ |
| 238 | z[j++] = '-'; |
| 239 | }else if( z[i]=='<' ){ |
| 240 | do{ i++; }while( z[i]!=0 && z[i]!='>' ); |
| 241 | } |
| 242 | } |
| 243 | if( j>0 && z[j-1]=='-' ) j--; |
| 244 | z[j] = 0; |
| 245 | } |
| 246 |
| --- src/markdown_html.c | |
| +++ src/markdown_html.c | |
| @@ -229,17 +229,35 @@ | |
| 229 | INTER_BLOCK(ob); |
| 230 | z = fossil_strdup(blob_buffer(text)); |
| 231 | if( z==0 ){ |
| 232 | j = 0; |
| 233 | }else{ |
| 234 | /* |
| 235 | ** The GitHub "slugify" algorithm converts the text of a markdown header |
| 236 | ** into a ID for that header. The algorithm is: |
| 237 | ** |
| 238 | ** 1. ASCII alphanumerics -> convert to lower case |
| 239 | ** 2. Spaces, hyphens, underscores -> convert to '-' |
| 240 | ** 3. Non-ASCII -> preserve as-is |
| 241 | ** 4. Other punctuation -> remove |
| 242 | ** 5. Multiple consecutive dashes -> collapse to one |
| 243 | ** 6. Leading and trailing dashes -> remove |
| 244 | ** 7. Markup <...> and &...; -> remove |
| 245 | ** |
| 246 | ** This implementation does the conversion in-place. |
| 247 | */ |
| 248 | for(i=j=0; z[i]; i++){ |
| 249 | if( fossil_isalnum(z[i]) ){ |
| 250 | z[j++] = fossil_tolower(z[i]); |
| 251 | }else if( fossil_isspace(z[i]) || z[i]=='-' || z[i]=='_' ){ |
| 252 | if( j>0 && z[j-1]!='-' ) z[j++] = '-'; |
| 253 | }else if( z[i]=='<' ){ |
| 254 | do{ i++; }while( z[i]!=0 && z[i]!='>' ); |
| 255 | }else if( z[i]=='&' ){ |
| 256 | do{ i++; }while( z[i]!=0 && z[i]!=';' ); |
| 257 | }else if( (z[i]&0x80)!=0 ){ |
| 258 | z[j++] = z[i]; |
| 259 | } |
| 260 | } |
| 261 | if( j>0 && z[j-1]=='-' ) j--; |
| 262 | z[j] = 0; |
| 263 | } |
| 264 |