Fossil SCM
Fix overly eager byte swapping when checking for UTF-16 text.
Commit
3f2f1e62fa1570174572b30e86932916414003b8
Parent
8af1541ac536d91…
1 file changed
+6
-5
+6
-5
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -295,18 +295,19 @@ | ||
| 295 | 295 | /* |
| 296 | 296 | ** Maximum length of a line in a text file, in UTF-16 characters. (4096) |
| 297 | 297 | ** The number of bytes represented by this value cannot exceed LENGTH_MASK |
| 298 | 298 | ** bytes, because that is the line buffer size used by the diff engine. |
| 299 | 299 | */ |
| 300 | -#define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char))) | |
| 301 | -#define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1) | |
| 300 | +#define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char))) | |
| 301 | +#define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1) | |
| 302 | 302 | |
| 303 | 303 | /* |
| 304 | 304 | ** This macro is used to swap the byte order of a UTF-16 character in the |
| 305 | 305 | ** looks_like_utf16() function. |
| 306 | 306 | */ |
| 307 | -#define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF)) | |
| 307 | +#define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF)) | |
| 308 | +#define UTF16_SWAP_IF(expr,ch) ((expr) ? UTF16_SWAP((ch)) : (ch)) | |
| 308 | 309 | |
| 309 | 310 | /* |
| 310 | 311 | ** This function attempts to scan each logical line within the blob to |
| 311 | 312 | ** determine the type of content it appears to contain. The return value |
| 312 | 313 | ** is a combination of one or more of the LOOK_XXX flags (see above): |
| @@ -354,11 +355,11 @@ | ||
| 354 | 355 | } |
| 355 | 356 | if( c==0 ){ |
| 356 | 357 | flags |= LOOK_NUL; /* NUL character in a file -> binary */ |
| 357 | 358 | }else if( c=='\r' ){ |
| 358 | 359 | flags |= LOOK_CR; |
| 359 | - if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){ | |
| 360 | + if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ | |
| 360 | 361 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 361 | 362 | } |
| 362 | 363 | } |
| 363 | 364 | j = (c!='\n'); |
| 364 | 365 | if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ |
| @@ -384,11 +385,11 @@ | ||
| 384 | 385 | flags |= LOOK_LONG; /* Very long line -> binary */ |
| 385 | 386 | } |
| 386 | 387 | j = 0; |
| 387 | 388 | }else if( c=='\r' ){ |
| 388 | 389 | flags |= LOOK_CR; |
| 389 | - if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){ | |
| 390 | + if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ | |
| 390 | 391 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 391 | 392 | } |
| 392 | 393 | } |
| 393 | 394 | } |
| 394 | 395 | if( j>UTF16_LENGTH_MASK ){ |
| 395 | 396 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -295,18 +295,19 @@ | |
| 295 | /* |
| 296 | ** Maximum length of a line in a text file, in UTF-16 characters. (4096) |
| 297 | ** The number of bytes represented by this value cannot exceed LENGTH_MASK |
| 298 | ** bytes, because that is the line buffer size used by the diff engine. |
| 299 | */ |
| 300 | #define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char))) |
| 301 | #define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1) |
| 302 | |
| 303 | /* |
| 304 | ** This macro is used to swap the byte order of a UTF-16 character in the |
| 305 | ** looks_like_utf16() function. |
| 306 | */ |
| 307 | #define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF)) |
| 308 | |
| 309 | /* |
| 310 | ** This function attempts to scan each logical line within the blob to |
| 311 | ** determine the type of content it appears to contain. The return value |
| 312 | ** is a combination of one or more of the LOOK_XXX flags (see above): |
| @@ -354,11 +355,11 @@ | |
| 354 | } |
| 355 | if( c==0 ){ |
| 356 | flags |= LOOK_NUL; /* NUL character in a file -> binary */ |
| 357 | }else if( c=='\r' ){ |
| 358 | flags |= LOOK_CR; |
| 359 | if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){ |
| 360 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 361 | } |
| 362 | } |
| 363 | j = (c!='\n'); |
| 364 | if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ |
| @@ -384,11 +385,11 @@ | |
| 384 | flags |= LOOK_LONG; /* Very long line -> binary */ |
| 385 | } |
| 386 | j = 0; |
| 387 | }else if( c=='\r' ){ |
| 388 | flags |= LOOK_CR; |
| 389 | if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){ |
| 390 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 391 | } |
| 392 | } |
| 393 | } |
| 394 | if( j>UTF16_LENGTH_MASK ){ |
| 395 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -295,18 +295,19 @@ | |
| 295 | /* |
| 296 | ** Maximum length of a line in a text file, in UTF-16 characters. (4096) |
| 297 | ** The number of bytes represented by this value cannot exceed LENGTH_MASK |
| 298 | ** bytes, because that is the line buffer size used by the diff engine. |
| 299 | */ |
| 300 | #define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char))) |
| 301 | #define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1) |
| 302 | |
| 303 | /* |
| 304 | ** This macro is used to swap the byte order of a UTF-16 character in the |
| 305 | ** looks_like_utf16() function. |
| 306 | */ |
| 307 | #define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF)) |
| 308 | #define UTF16_SWAP_IF(expr,ch) ((expr) ? UTF16_SWAP((ch)) : (ch)) |
| 309 | |
| 310 | /* |
| 311 | ** This function attempts to scan each logical line within the blob to |
| 312 | ** determine the type of content it appears to contain. The return value |
| 313 | ** is a combination of one or more of the LOOK_XXX flags (see above): |
| @@ -354,11 +355,11 @@ | |
| 355 | } |
| 356 | if( c==0 ){ |
| 357 | flags |= LOOK_NUL; /* NUL character in a file -> binary */ |
| 358 | }else if( c=='\r' ){ |
| 359 | flags |= LOOK_CR; |
| 360 | if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ |
| 361 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 362 | } |
| 363 | } |
| 364 | j = (c!='\n'); |
| 365 | if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ |
| @@ -384,11 +385,11 @@ | |
| 385 | flags |= LOOK_LONG; /* Very long line -> binary */ |
| 386 | } |
| 387 | j = 0; |
| 388 | }else if( c=='\r' ){ |
| 389 | flags |= LOOK_CR; |
| 390 | if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ |
| 391 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 392 | } |
| 393 | } |
| 394 | } |
| 395 | if( j>UTF16_LENGTH_MASK ){ |
| 396 |