Fossil SCM
Try two optimizations (to be reverted after further measurements, if they don't pay off): (0) Abort forward scanning for space characters as soon as the scanned characters don't fit on the current line. (1) Shortcut for ASCII characters on UTF-8 string analysis.
Commit
4e8dd7df46cf1acb50c14ab66d68d2b7328241c87eb200d78c216acce7a5de6a
Parent
1cc31c309d4e7d1…
1 file changed
+11
-7
+11
-7
| --- src/comformat.c | ||
| +++ src/comformat.c | ||
| @@ -241,19 +241,21 @@ | ||
| 241 | 241 | ** algorithm, the NUL character is treated the same as a spacing character. |
| 242 | 242 | */ |
| 243 | 243 | static int comment_next_space( |
| 244 | 244 | const char *zLine, /* [in] The comment line being printed. */ |
| 245 | 245 | int index, /* [in] The current character index being handled. */ |
| 246 | + int maxChars, /* [in] Optimization hint to abort before space found. */ | |
| 246 | 247 | int *sumWidth /* [out] Summated width of all characters to next space. */ |
| 247 | 248 | ){ |
| 248 | 249 | int cchUTF8, utf32, wcwidth = 0; |
| 249 | 250 | int nextIndex = index; |
| 250 | 251 | for(;;){ |
| 251 | 252 | char_info_utf8(&zLine[nextIndex],&cchUTF8,&utf32); |
| 252 | 253 | nextIndex += cchUTF8; |
| 253 | 254 | wcwidth += cli_wcwidth(utf32); |
| 254 | - if( zLine[nextIndex]==0 || fossil_isspace(zLine[nextIndex]) ){ | |
| 255 | + if( zLine[nextIndex]==0 || fossil_isspace(zLine[nextIndex]) || | |
| 256 | + wcwidth>maxChars ){ | |
| 255 | 257 | *sumWidth = wcwidth; |
| 256 | 258 | return nextIndex; |
| 257 | 259 | } |
| 258 | 260 | } |
| 259 | 261 | return 0; /* NOT REACHED */ |
| @@ -277,11 +279,16 @@ | ||
| 277 | 279 | ){ |
| 278 | 280 | int i = 0; /* Counted bytes. */ |
| 279 | 281 | int cchUTF8 = 1; /* Code units consumed. */ |
| 280 | 282 | int maxUTF8 = 1; /* Expected sequence length. */ |
| 281 | 283 | char c = z[i++]; |
| 282 | - if( (c&0xe0)==0xc0 ) maxUTF8 = 2; /* UTF-8 lead byte 110vvvvv */ | |
| 284 | + if( (c&0x80)==0x00 ){ /* 7-bit ASCII character. */ | |
| 285 | + *pCchUTF8 = 1; | |
| 286 | + *pUtf32 = (int)z[0]; | |
| 287 | + return; | |
| 288 | + } | |
| 289 | + else if( (c&0xe0)==0xc0 ) maxUTF8 = 2; /* UTF-8 lead byte 110vvvvv */ | |
| 283 | 290 | else if( (c&0xf0)==0xe0 ) maxUTF8 = 3; /* UTF-8 lead byte 1110vvvv */ |
| 284 | 291 | else if( (c&0xf8)==0xf0 ) maxUTF8 = 4; /* UTF-8 lead byte 11110vvv */ |
| 285 | 292 | while( cchUTF8<maxUTF8 && |
| 286 | 293 | (z[i]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 287 | 294 | cchUTF8++; |
| @@ -313,13 +320,10 @@ | ||
| 313 | 320 | case 2: |
| 314 | 321 | *pUtf32 = |
| 315 | 322 | ( (z[0] & 0x1f)<< 6 ) | |
| 316 | 323 | ( (z[1] & 0x3f)<< 0 ) ; |
| 317 | 324 | break; |
| 318 | - case 1: | |
| 319 | - *pUtf32 = (int)z[0]; | |
| 320 | - break; | |
| 321 | 325 | } |
| 322 | 326 | #ifdef FOSSIL_DEBUG |
| 323 | 327 | assert( |
| 324 | 328 | *pUtf32>=0 && *pUtf32<=0x10ffff && /* Valid range U+0000 to U+10FFFF. */ |
| 325 | 329 | *pUtf32<0xd800 && *pUtf32>0xdfff /* Non-scalar (UTF-16 surrogates). */ |
| @@ -424,11 +428,11 @@ | ||
| 424 | 428 | lineCnt++; |
| 425 | 429 | charCnt = 0; |
| 426 | 430 | useChars = 0; |
| 427 | 431 | }else if( c=='\t' ){ |
| 428 | 432 | int sumWidth; |
| 429 | - int nextIndex = comment_next_space(zLine, index, &sumWidth); | |
| 433 | + int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth); | |
| 430 | 434 | if( nextIndex<=0 || sumWidth>maxChars ){ |
| 431 | 435 | break; |
| 432 | 436 | } |
| 433 | 437 | charCnt++; |
| 434 | 438 | useChars = COMMENT_TAB_WIDTH; |
| @@ -436,11 +440,11 @@ | ||
| 436 | 440 | zBuf[iBuf++] = ' '; |
| 437 | 441 | break; |
| 438 | 442 | } |
| 439 | 443 | }else if( wordBreak && fossil_isspace(c) ){ |
| 440 | 444 | int sumWidth; |
| 441 | - int nextIndex = comment_next_space(zLine, index, &sumWidth); | |
| 445 | + int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth); | |
| 442 | 446 | if( nextIndex<=0 || sumWidth>=maxChars ){ |
| 443 | 447 | break; |
| 444 | 448 | } |
| 445 | 449 | charCnt++; |
| 446 | 450 | }else{ |
| 447 | 451 |
| --- src/comformat.c | |
| +++ src/comformat.c | |
| @@ -241,19 +241,21 @@ | |
| 241 | ** algorithm, the NUL character is treated the same as a spacing character. |
| 242 | */ |
| 243 | static int comment_next_space( |
| 244 | const char *zLine, /* [in] The comment line being printed. */ |
| 245 | int index, /* [in] The current character index being handled. */ |
| 246 | int *sumWidth /* [out] Summated width of all characters to next space. */ |
| 247 | ){ |
| 248 | int cchUTF8, utf32, wcwidth = 0; |
| 249 | int nextIndex = index; |
| 250 | for(;;){ |
| 251 | char_info_utf8(&zLine[nextIndex],&cchUTF8,&utf32); |
| 252 | nextIndex += cchUTF8; |
| 253 | wcwidth += cli_wcwidth(utf32); |
| 254 | if( zLine[nextIndex]==0 || fossil_isspace(zLine[nextIndex]) ){ |
| 255 | *sumWidth = wcwidth; |
| 256 | return nextIndex; |
| 257 | } |
| 258 | } |
| 259 | return 0; /* NOT REACHED */ |
| @@ -277,11 +279,16 @@ | |
| 277 | ){ |
| 278 | int i = 0; /* Counted bytes. */ |
| 279 | int cchUTF8 = 1; /* Code units consumed. */ |
| 280 | int maxUTF8 = 1; /* Expected sequence length. */ |
| 281 | char c = z[i++]; |
| 282 | if( (c&0xe0)==0xc0 ) maxUTF8 = 2; /* UTF-8 lead byte 110vvvvv */ |
| 283 | else if( (c&0xf0)==0xe0 ) maxUTF8 = 3; /* UTF-8 lead byte 1110vvvv */ |
| 284 | else if( (c&0xf8)==0xf0 ) maxUTF8 = 4; /* UTF-8 lead byte 11110vvv */ |
| 285 | while( cchUTF8<maxUTF8 && |
| 286 | (z[i]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 287 | cchUTF8++; |
| @@ -313,13 +320,10 @@ | |
| 313 | case 2: |
| 314 | *pUtf32 = |
| 315 | ( (z[0] & 0x1f)<< 6 ) | |
| 316 | ( (z[1] & 0x3f)<< 0 ) ; |
| 317 | break; |
| 318 | case 1: |
| 319 | *pUtf32 = (int)z[0]; |
| 320 | break; |
| 321 | } |
| 322 | #ifdef FOSSIL_DEBUG |
| 323 | assert( |
| 324 | *pUtf32>=0 && *pUtf32<=0x10ffff && /* Valid range U+0000 to U+10FFFF. */ |
| 325 | *pUtf32<0xd800 && *pUtf32>0xdfff /* Non-scalar (UTF-16 surrogates). */ |
| @@ -424,11 +428,11 @@ | |
| 424 | lineCnt++; |
| 425 | charCnt = 0; |
| 426 | useChars = 0; |
| 427 | }else if( c=='\t' ){ |
| 428 | int sumWidth; |
| 429 | int nextIndex = comment_next_space(zLine, index, &sumWidth); |
| 430 | if( nextIndex<=0 || sumWidth>maxChars ){ |
| 431 | break; |
| 432 | } |
| 433 | charCnt++; |
| 434 | useChars = COMMENT_TAB_WIDTH; |
| @@ -436,11 +440,11 @@ | |
| 436 | zBuf[iBuf++] = ' '; |
| 437 | break; |
| 438 | } |
| 439 | }else if( wordBreak && fossil_isspace(c) ){ |
| 440 | int sumWidth; |
| 441 | int nextIndex = comment_next_space(zLine, index, &sumWidth); |
| 442 | if( nextIndex<=0 || sumWidth>=maxChars ){ |
| 443 | break; |
| 444 | } |
| 445 | charCnt++; |
| 446 | }else{ |
| 447 |
| --- src/comformat.c | |
| +++ src/comformat.c | |
| @@ -241,19 +241,21 @@ | |
| 241 | ** algorithm, the NUL character is treated the same as a spacing character. |
| 242 | */ |
| 243 | static int comment_next_space( |
| 244 | const char *zLine, /* [in] The comment line being printed. */ |
| 245 | int index, /* [in] The current character index being handled. */ |
| 246 | int maxChars, /* [in] Optimization hint to abort before space found. */ |
| 247 | int *sumWidth /* [out] Summated width of all characters to next space. */ |
| 248 | ){ |
| 249 | int cchUTF8, utf32, wcwidth = 0; |
| 250 | int nextIndex = index; |
| 251 | for(;;){ |
| 252 | char_info_utf8(&zLine[nextIndex],&cchUTF8,&utf32); |
| 253 | nextIndex += cchUTF8; |
| 254 | wcwidth += cli_wcwidth(utf32); |
| 255 | if( zLine[nextIndex]==0 || fossil_isspace(zLine[nextIndex]) || |
| 256 | wcwidth>maxChars ){ |
| 257 | *sumWidth = wcwidth; |
| 258 | return nextIndex; |
| 259 | } |
| 260 | } |
| 261 | return 0; /* NOT REACHED */ |
| @@ -277,11 +279,16 @@ | |
| 279 | ){ |
| 280 | int i = 0; /* Counted bytes. */ |
| 281 | int cchUTF8 = 1; /* Code units consumed. */ |
| 282 | int maxUTF8 = 1; /* Expected sequence length. */ |
| 283 | char c = z[i++]; |
| 284 | if( (c&0x80)==0x00 ){ /* 7-bit ASCII character. */ |
| 285 | *pCchUTF8 = 1; |
| 286 | *pUtf32 = (int)z[0]; |
| 287 | return; |
| 288 | } |
| 289 | else if( (c&0xe0)==0xc0 ) maxUTF8 = 2; /* UTF-8 lead byte 110vvvvv */ |
| 290 | else if( (c&0xf0)==0xe0 ) maxUTF8 = 3; /* UTF-8 lead byte 1110vvvv */ |
| 291 | else if( (c&0xf8)==0xf0 ) maxUTF8 = 4; /* UTF-8 lead byte 11110vvv */ |
| 292 | while( cchUTF8<maxUTF8 && |
| 293 | (z[i]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 294 | cchUTF8++; |
| @@ -313,13 +320,10 @@ | |
| 320 | case 2: |
| 321 | *pUtf32 = |
| 322 | ( (z[0] & 0x1f)<< 6 ) | |
| 323 | ( (z[1] & 0x3f)<< 0 ) ; |
| 324 | break; |
| 325 | } |
| 326 | #ifdef FOSSIL_DEBUG |
| 327 | assert( |
| 328 | *pUtf32>=0 && *pUtf32<=0x10ffff && /* Valid range U+0000 to U+10FFFF. */ |
| 329 | *pUtf32<0xd800 && *pUtf32>0xdfff /* Non-scalar (UTF-16 surrogates). */ |
| @@ -424,11 +428,11 @@ | |
| 428 | lineCnt++; |
| 429 | charCnt = 0; |
| 430 | useChars = 0; |
| 431 | }else if( c=='\t' ){ |
| 432 | int sumWidth; |
| 433 | int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth); |
| 434 | if( nextIndex<=0 || sumWidth>maxChars ){ |
| 435 | break; |
| 436 | } |
| 437 | charCnt++; |
| 438 | useChars = COMMENT_TAB_WIDTH; |
| @@ -436,11 +440,11 @@ | |
| 440 | zBuf[iBuf++] = ' '; |
| 441 | break; |
| 442 | } |
| 443 | }else if( wordBreak && fossil_isspace(c) ){ |
| 444 | int sumWidth; |
| 445 | int nextIndex = comment_next_space(zLine, index, maxChars, &sumWidth); |
| 446 | if( nextIndex<=0 || sumWidth>=maxChars ){ |
| 447 | break; |
| 448 | } |
| 449 | charCnt++; |
| 450 | }else{ |
| 451 |