Fossil SCM
Minor optimizations: drop a few redundant comparisons and calculations, and take advantage of the logical AND short-circuit by testing the least expensive and most unlikely condition first. Also fold away the iterative comments into cross references.
Commit
490d38ff2e0079e76054bf207cb94de8ab1ecdb8
Parent
b86a2fc7eb20968…
1 file changed
+36
-55
+36
-55
| --- src/comformat.c | ||
| +++ src/comformat.c | ||
| @@ -157,27 +157,24 @@ | ||
| 157 | 157 | int strlen_utf8(const char *zString, int lengthBytes) |
| 158 | 158 | { |
| 159 | 159 | #if 0 |
| 160 | 160 | assert( lengthBytes>=0 ); |
| 161 | 161 | #endif |
| 162 | - int lengthUTF8=0; /* Counted UTF-8 sequences. */ | |
| 163 | - int i; | |
| 164 | - for( i=0; i<lengthBytes; i++ ){ | |
| 162 | + int i; /* Counted bytes. */ | |
| 163 | + int lengthUTF8; /* Counted UTF-8 sequences. */ | |
| 164 | + for( i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++ ){ | |
| 165 | 165 | char c = zString[i]; |
| 166 | - lengthUTF8++; | |
| 167 | - if( (c&0xc0)==0xc0 ){ /* Any UTF-8 lead byte 11xxxxxx */ | |
| 168 | - int cchUTF8=1; /* Code units consumed. */ | |
| 169 | - int maxUTF8=1; /* Expected sequence length. */ | |
| 170 | - if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ | |
| 171 | - else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ | |
| 172 | - else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ | |
| 173 | - while( i<lengthBytes-1 && | |
| 174 | - cchUTF8<maxUTF8 && | |
| 175 | - (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ | |
| 176 | - cchUTF8++; | |
| 177 | - i++; | |
| 178 | - } | |
| 166 | + int cchUTF8=1; /* Code units consumed. */ | |
| 167 | + int maxUTF8=1; /* Expected sequence length. */ | |
| 168 | + if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ | |
| 169 | + else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ | |
| 170 | + else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ | |
| 171 | + while( cchUTF8<maxUTF8 && | |
| 172 | + i<lengthBytes-1 && | |
| 173 | + (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ | |
| 174 | + cchUTF8++; | |
| 175 | + i++; | |
| 179 | 176 | } |
| 180 | 177 | } |
| 181 | 178 | return lengthUTF8; |
| 182 | 179 | } |
| 183 | 180 | |
| @@ -223,10 +220,11 @@ | ||
| 223 | 220 | int *pLineCnt, /* [in/out] Pointer to the total line count. */ |
| 224 | 221 | const char **pzLine /* [out] Pointer to the end of the logical line. */ |
| 225 | 222 | ){ |
| 226 | 223 | int index = 0, charCnt = 0, lineCnt = 0, maxChars, i; |
| 227 | 224 | char zBuf[400]; int iBuf=0; /* Output buffer and counter. */ |
| 225 | + int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */ | |
| 228 | 226 | if( !zLine ) return; |
| 229 | 227 | if( lineChars<=0 ) return; |
| 230 | 228 | #if 0 |
| 231 | 229 | assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */ |
| 232 | 230 | assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */ |
| @@ -294,35 +292,23 @@ | ||
| 294 | 292 | charCnt++; |
| 295 | 293 | }else{ |
| 296 | 294 | charCnt++; |
| 297 | 295 | } |
| 298 | 296 | assert( c!='\n' || charCnt==0 ); |
| 299 | - /* | |
| 300 | - ** Avoid output of incomplete UTF-8 sequences, and also avoid line breaks | |
| 301 | - ** inside UTF-8 sequences. Incomplete, ill-formed and overlong sequences are | |
| 302 | - ** kept together. The invalid lead bytes 0xC0 to 0xC1 and 0xF5 to 0xF7 are | |
| 303 | - ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively, | |
| 304 | - ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte | |
| 305 | - ** sequences (as lone trail bytes). | |
| 306 | - */ | |
| 307 | - if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */ | |
| 308 | - int cchUTF8=1; /* Code units consumed. */ | |
| 309 | - int maxUTF8=1; /* Expected sequence length. */ | |
| 310 | - zBuf[iBuf++]=c; | |
| 311 | - if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ | |
| 312 | - else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ | |
| 313 | - else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ | |
| 314 | - while( cchUTF8<maxUTF8 && | |
| 315 | - (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ | |
| 316 | - cchUTF8++; | |
| 317 | - zBuf[iBuf++] = zLine[index++]; | |
| 318 | - } | |
| 319 | - maxChars--; | |
| 320 | - }else{ | |
| 321 | - zBuf[iBuf++] = c; | |
| 322 | - maxChars -= useChars; | |
| 323 | - } | |
| 297 | + zBuf[iBuf++] = c; | |
| 298 | + /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */ | |
| 299 | + cchUTF8=1; /* Code units consumed. */ | |
| 300 | + maxUTF8=1; /* Expected sequence length. */ | |
| 301 | + if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ | |
| 302 | + else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ | |
| 303 | + else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ | |
| 304 | + while( cchUTF8<maxUTF8 && | |
| 305 | + (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ | |
| 306 | + cchUTF8++; | |
| 307 | + zBuf[iBuf++] = zLine[index++]; | |
| 308 | + } | |
| 309 | + maxChars -= useChars; | |
| 324 | 310 | if( maxChars<=0 ) break; |
| 325 | 311 | if( c=='\n' ) break; |
| 326 | 312 | } |
| 327 | 313 | if( charCnt>0 ){ |
| 328 | 314 | zBuf[iBuf++] = '\n'; |
| @@ -362,10 +348,11 @@ | ||
| 362 | 348 | int si, sk, i, k, kc; |
| 363 | 349 | int doIndent = 0; |
| 364 | 350 | char *zBuf; |
| 365 | 351 | char zBuffer[400]; |
| 366 | 352 | int lineCnt = 0; |
| 353 | + int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */ | |
| 367 | 354 | |
| 368 | 355 | if( width<0 ){ |
| 369 | 356 | comment_set_maxchars(indent, &maxChars); |
| 370 | 357 | } |
| 371 | 358 | if( zText==0 ) zText = "(NULL)"; |
| @@ -389,26 +376,20 @@ | ||
| 389 | 376 | return lineCnt; |
| 390 | 377 | } |
| 391 | 378 | for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){ |
| 392 | 379 | char c = zText[i]; |
| 393 | 380 | kc++; /* Count complete UTF-8 sequences. */ |
| 394 | - /* | |
| 395 | - ** Avoid line breaks inside UTF-8 sequences. Incomplete, ill-formed and | |
| 396 | - ** overlong sequences are kept together. The invalid lead bytes 0xC0 to | |
| 397 | - ** 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and | |
| 398 | - ** 4-byte sequences, respectively, the other invalid lead bytes 0xF8 to | |
| 399 | - ** 0xFF are treated as invalid 1-byte sequences (as lone trail bytes). | |
| 400 | - */ | |
| 401 | - if( (c&0xc0)==0xc0 && zText[i+1]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */ | |
| 402 | - int cchUTF8=1; /* Code units consumed. */ | |
| 403 | - int maxUTF8=1; /* Expected sequence length. */ | |
| 404 | - if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ | |
| 405 | - else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ | |
| 406 | - else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ | |
| 381 | + /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */ | |
| 382 | + cchUTF8=1; /* Code units consumed. */ | |
| 383 | + maxUTF8=1; /* Expected sequence length. */ | |
| 384 | + if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ | |
| 385 | + else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ | |
| 386 | + else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ | |
| 387 | + if( maxUTF8>1 ){ | |
| 407 | 388 | zBuf[k++] = c; |
| 408 | 389 | while( cchUTF8<maxUTF8 && |
| 409 | - (zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ | |
| 390 | + (zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ | |
| 410 | 391 | cchUTF8++; |
| 411 | 392 | zBuf[k++] = zText[++i]; |
| 412 | 393 | } |
| 413 | 394 | } |
| 414 | 395 | else if( fossil_isspace(c) ){ |
| 415 | 396 |
| --- src/comformat.c | |
| +++ src/comformat.c | |
| @@ -157,27 +157,24 @@ | |
| 157 | int strlen_utf8(const char *zString, int lengthBytes) |
| 158 | { |
| 159 | #if 0 |
| 160 | assert( lengthBytes>=0 ); |
| 161 | #endif |
| 162 | int lengthUTF8=0; /* Counted UTF-8 sequences. */ |
| 163 | int i; |
| 164 | for( i=0; i<lengthBytes; i++ ){ |
| 165 | char c = zString[i]; |
| 166 | lengthUTF8++; |
| 167 | if( (c&0xc0)==0xc0 ){ /* Any UTF-8 lead byte 11xxxxxx */ |
| 168 | int cchUTF8=1; /* Code units consumed. */ |
| 169 | int maxUTF8=1; /* Expected sequence length. */ |
| 170 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 171 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 172 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 173 | while( i<lengthBytes-1 && |
| 174 | cchUTF8<maxUTF8 && |
| 175 | (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 176 | cchUTF8++; |
| 177 | i++; |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | return lengthUTF8; |
| 182 | } |
| 183 | |
| @@ -223,10 +220,11 @@ | |
| 223 | int *pLineCnt, /* [in/out] Pointer to the total line count. */ |
| 224 | const char **pzLine /* [out] Pointer to the end of the logical line. */ |
| 225 | ){ |
| 226 | int index = 0, charCnt = 0, lineCnt = 0, maxChars, i; |
| 227 | char zBuf[400]; int iBuf=0; /* Output buffer and counter. */ |
| 228 | if( !zLine ) return; |
| 229 | if( lineChars<=0 ) return; |
| 230 | #if 0 |
| 231 | assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */ |
| 232 | assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */ |
| @@ -294,35 +292,23 @@ | |
| 294 | charCnt++; |
| 295 | }else{ |
| 296 | charCnt++; |
| 297 | } |
| 298 | assert( c!='\n' || charCnt==0 ); |
| 299 | /* |
| 300 | ** Avoid output of incomplete UTF-8 sequences, and also avoid line breaks |
| 301 | ** inside UTF-8 sequences. Incomplete, ill-formed and overlong sequences are |
| 302 | ** kept together. The invalid lead bytes 0xC0 to 0xC1 and 0xF5 to 0xF7 are |
| 303 | ** allowed to initiate (ill-formed) 2- and 4-byte sequences, respectively, |
| 304 | ** the other invalid lead bytes 0xF8 to 0xFF are treated as invalid 1-byte |
| 305 | ** sequences (as lone trail bytes). |
| 306 | */ |
| 307 | if( (c&0xc0)==0xc0 && zLine[index]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */ |
| 308 | int cchUTF8=1; /* Code units consumed. */ |
| 309 | int maxUTF8=1; /* Expected sequence length. */ |
| 310 | zBuf[iBuf++]=c; |
| 311 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 312 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 313 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 314 | while( cchUTF8<maxUTF8 && |
| 315 | (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 316 | cchUTF8++; |
| 317 | zBuf[iBuf++] = zLine[index++]; |
| 318 | } |
| 319 | maxChars--; |
| 320 | }else{ |
| 321 | zBuf[iBuf++] = c; |
| 322 | maxChars -= useChars; |
| 323 | } |
| 324 | if( maxChars<=0 ) break; |
| 325 | if( c=='\n' ) break; |
| 326 | } |
| 327 | if( charCnt>0 ){ |
| 328 | zBuf[iBuf++] = '\n'; |
| @@ -362,10 +348,11 @@ | |
| 362 | int si, sk, i, k, kc; |
| 363 | int doIndent = 0; |
| 364 | char *zBuf; |
| 365 | char zBuffer[400]; |
| 366 | int lineCnt = 0; |
| 367 | |
| 368 | if( width<0 ){ |
| 369 | comment_set_maxchars(indent, &maxChars); |
| 370 | } |
| 371 | if( zText==0 ) zText = "(NULL)"; |
| @@ -389,26 +376,20 @@ | |
| 389 | return lineCnt; |
| 390 | } |
| 391 | for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){ |
| 392 | char c = zText[i]; |
| 393 | kc++; /* Count complete UTF-8 sequences. */ |
| 394 | /* |
| 395 | ** Avoid line breaks inside UTF-8 sequences. Incomplete, ill-formed and |
| 396 | ** overlong sequences are kept together. The invalid lead bytes 0xC0 to |
| 397 | ** 0xC1 and 0xF5 to 0xF7 are allowed to initiate (ill-formed) 2- and |
| 398 | ** 4-byte sequences, respectively, the other invalid lead bytes 0xF8 to |
| 399 | ** 0xFF are treated as invalid 1-byte sequences (as lone trail bytes). |
| 400 | */ |
| 401 | if( (c&0xc0)==0xc0 && zText[i+1]!=0 ){ /* Any UTF-8 lead byte 11xxxxxx */ |
| 402 | int cchUTF8=1; /* Code units consumed. */ |
| 403 | int maxUTF8=1; /* Expected sequence length. */ |
| 404 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 405 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 406 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 407 | zBuf[k++] = c; |
| 408 | while( cchUTF8<maxUTF8 && |
| 409 | (zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 410 | cchUTF8++; |
| 411 | zBuf[k++] = zText[++i]; |
| 412 | } |
| 413 | } |
| 414 | else if( fossil_isspace(c) ){ |
| 415 |
| --- src/comformat.c | |
| +++ src/comformat.c | |
| @@ -157,27 +157,24 @@ | |
| 157 | int strlen_utf8(const char *zString, int lengthBytes) |
| 158 | { |
| 159 | #if 0 |
| 160 | assert( lengthBytes>=0 ); |
| 161 | #endif |
| 162 | int i; /* Counted bytes. */ |
| 163 | int lengthUTF8; /* Counted UTF-8 sequences. */ |
| 164 | for( i=0, lengthUTF8=0; i<lengthBytes; i++, lengthUTF8++ ){ |
| 165 | char c = zString[i]; |
| 166 | int cchUTF8=1; /* Code units consumed. */ |
| 167 | int maxUTF8=1; /* Expected sequence length. */ |
| 168 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 169 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 170 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 171 | while( cchUTF8<maxUTF8 && |
| 172 | i<lengthBytes-1 && |
| 173 | (zString[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 174 | cchUTF8++; |
| 175 | i++; |
| 176 | } |
| 177 | } |
| 178 | return lengthUTF8; |
| 179 | } |
| 180 | |
| @@ -223,10 +220,11 @@ | |
| 220 | int *pLineCnt, /* [in/out] Pointer to the total line count. */ |
| 221 | const char **pzLine /* [out] Pointer to the end of the logical line. */ |
| 222 | ){ |
| 223 | int index = 0, charCnt = 0, lineCnt = 0, maxChars, i; |
| 224 | char zBuf[400]; int iBuf=0; /* Output buffer and counter. */ |
| 225 | int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */ |
| 226 | if( !zLine ) return; |
| 227 | if( lineChars<=0 ) return; |
| 228 | #if 0 |
| 229 | assert( indent<sizeof(zBuf)-5 ); /* See following comments to explain */ |
| 230 | assert( origIndent<sizeof(zBuf)-5 ); /* these limits. */ |
| @@ -294,35 +292,23 @@ | |
| 292 | charCnt++; |
| 293 | }else{ |
| 294 | charCnt++; |
| 295 | } |
| 296 | assert( c!='\n' || charCnt==0 ); |
| 297 | zBuf[iBuf++] = c; |
| 298 | /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */ |
| 299 | cchUTF8=1; /* Code units consumed. */ |
| 300 | maxUTF8=1; /* Expected sequence length. */ |
| 301 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 302 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 303 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 304 | while( cchUTF8<maxUTF8 && |
| 305 | (zLine[index]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 306 | cchUTF8++; |
| 307 | zBuf[iBuf++] = zLine[index++]; |
| 308 | } |
| 309 | maxChars -= useChars; |
| 310 | if( maxChars<=0 ) break; |
| 311 | if( c=='\n' ) break; |
| 312 | } |
| 313 | if( charCnt>0 ){ |
| 314 | zBuf[iBuf++] = '\n'; |
| @@ -362,10 +348,11 @@ | |
| 348 | int si, sk, i, k, kc; |
| 349 | int doIndent = 0; |
| 350 | char *zBuf; |
| 351 | char zBuffer[400]; |
| 352 | int lineCnt = 0; |
| 353 | int cchUTF8, maxUTF8; /* Helper variables to count UTF-8 sequences. */ |
| 354 | |
| 355 | if( width<0 ){ |
| 356 | comment_set_maxchars(indent, &maxChars); |
| 357 | } |
| 358 | if( zText==0 ) zText = "(NULL)"; |
| @@ -389,26 +376,20 @@ | |
| 376 | return lineCnt; |
| 377 | } |
| 378 | for(sk=si=i=k=kc=0; zText[i] && kc<maxChars; i++){ |
| 379 | char c = zText[i]; |
| 380 | kc++; /* Count complete UTF-8 sequences. */ |
| 381 | /* Skip over UTF-8 sequences, see comment on strlen_utf8() for details. */ |
| 382 | cchUTF8=1; /* Code units consumed. */ |
| 383 | maxUTF8=1; /* Expected sequence length. */ |
| 384 | if( (c&0xe0)==0xc0 )maxUTF8=2; /* UTF-8 lead byte 110vvvvv */ |
| 385 | else if( (c&0xf0)==0xe0 )maxUTF8=3; /* UTF-8 lead byte 1110vvvv */ |
| 386 | else if( (c&0xf8)==0xf0 )maxUTF8=4; /* UTF-8 lead byte 11110vvv */ |
| 387 | if( maxUTF8>1 ){ |
| 388 | zBuf[k++] = c; |
| 389 | while( cchUTF8<maxUTF8 && |
| 390 | (zText[i+1]&0xc0)==0x80 ){ /* UTF-8 trail byte 10vvvvvv */ |
| 391 | cchUTF8++; |
| 392 | zBuf[k++] = zText[++i]; |
| 393 | } |
| 394 | } |
| 395 | else if( fossil_isspace(c) ){ |
| 396 |