Fossil SCM
Re-apply off-by-one fix. Otherwise blobs like {0x00, 0x0a, 0x00} will result in access of the 4th byte of the BLOB. In test-looks-like-utf, could_be_utf16() should be called always, otherwise the behavior on UTF-16 without BOM will be endian-dependant.
Commit
e545d3514e29b964a014e76d166ad55e6f0849ea
Parent
4ffaf2ee08a4bc5…
1 file changed
+5
-6
+5
-6
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -361,11 +361,11 @@ | ||
| 361 | 361 | } |
| 362 | 362 | if( c==0 ){ |
| 363 | 363 | flags |= LOOK_NUL; /* NUL character in a file -> binary */ |
| 364 | 364 | }else if( c=='\r' ){ |
| 365 | 365 | flags |= LOOK_CR; |
| 366 | - if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ | |
| 366 | + if( n<2*sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ | |
| 367 | 367 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 368 | 368 | } |
| 369 | 369 | } |
| 370 | 370 | j = (c!='\n'); |
| 371 | 371 | if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ |
| @@ -2600,20 +2600,19 @@ | ||
| 2600 | 2600 | int fUtf16; /* return value of starts_with_utf16_bom() */ |
| 2601 | 2601 | int fUnicode; /* return value of could_be_utf16() */ |
| 2602 | 2602 | int lookFlags; /* output flags from looks_like_utf8/utf16() */ |
| 2603 | 2603 | int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */ |
| 2604 | 2604 | int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */ |
| 2605 | - int fForceUtf8 = find_option("utf8",0,0)!=0; | |
| 2606 | - int fForceUtf16 = find_option("utf16",0,0)!=0; | |
| 2607 | 2605 | if( g.argc!=3 ) usage("FILENAME"); |
| 2608 | 2606 | blob_read_from_file(&blob, g.argv[2]); |
| 2609 | 2607 | fUtf8 = starts_with_utf8_bom(&blob, 0); |
| 2610 | 2608 | fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16); |
| 2611 | - if( fForceUtf8 ){ | |
| 2609 | + fUnicode = could_be_utf16(&blob, &bRevUnicode); | |
| 2610 | + if( find_option("utf8",0,0)!=0 ){ | |
| 2612 | 2611 | fUnicode = 0; |
| 2613 | - }else{ | |
| 2614 | - fUnicode = fForceUtf16 || could_be_utf16(&blob, &bRevUnicode); | |
| 2612 | + }else if( find_option("utf16",0,0)!=0 ){ | |
| 2613 | + fUnicode = 1; | |
| 2615 | 2614 | } |
| 2616 | 2615 | lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) : |
| 2617 | 2616 | looks_like_utf8(&blob, 0); |
| 2618 | 2617 | fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); |
| 2619 | 2618 | fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); |
| 2620 | 2619 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -361,11 +361,11 @@ | |
| 361 | } |
| 362 | if( c==0 ){ |
| 363 | flags |= LOOK_NUL; /* NUL character in a file -> binary */ |
| 364 | }else if( c=='\r' ){ |
| 365 | flags |= LOOK_CR; |
| 366 | if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ |
| 367 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 368 | } |
| 369 | } |
| 370 | j = (c!='\n'); |
| 371 | if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ |
| @@ -2600,20 +2600,19 @@ | |
| 2600 | int fUtf16; /* return value of starts_with_utf16_bom() */ |
| 2601 | int fUnicode; /* return value of could_be_utf16() */ |
| 2602 | int lookFlags; /* output flags from looks_like_utf8/utf16() */ |
| 2603 | int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */ |
| 2604 | int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */ |
| 2605 | int fForceUtf8 = find_option("utf8",0,0)!=0; |
| 2606 | int fForceUtf16 = find_option("utf16",0,0)!=0; |
| 2607 | if( g.argc!=3 ) usage("FILENAME"); |
| 2608 | blob_read_from_file(&blob, g.argv[2]); |
| 2609 | fUtf8 = starts_with_utf8_bom(&blob, 0); |
| 2610 | fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16); |
| 2611 | if( fForceUtf8 ){ |
| 2612 | fUnicode = 0; |
| 2613 | }else{ |
| 2614 | fUnicode = fForceUtf16 || could_be_utf16(&blob, &bRevUnicode); |
| 2615 | } |
| 2616 | lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) : |
| 2617 | looks_like_utf8(&blob, 0); |
| 2618 | fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); |
| 2619 | fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); |
| 2620 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -361,11 +361,11 @@ | |
| 361 | } |
| 362 | if( c==0 ){ |
| 363 | flags |= LOOK_NUL; /* NUL character in a file -> binary */ |
| 364 | }else if( c=='\r' ){ |
| 365 | flags |= LOOK_CR; |
| 366 | if( n<2*sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){ |
| 367 | flags |= LOOK_LONE_CR; /* More chars, next char is not LF */ |
| 368 | } |
| 369 | } |
| 370 | j = (c!='\n'); |
| 371 | if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */ |
| @@ -2600,20 +2600,19 @@ | |
| 2600 | int fUtf16; /* return value of starts_with_utf16_bom() */ |
| 2601 | int fUnicode; /* return value of could_be_utf16() */ |
| 2602 | int lookFlags; /* output flags from looks_like_utf8/utf16() */ |
| 2603 | int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */ |
| 2604 | int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */ |
| 2605 | if( g.argc!=3 ) usage("FILENAME"); |
| 2606 | blob_read_from_file(&blob, g.argv[2]); |
| 2607 | fUtf8 = starts_with_utf8_bom(&blob, 0); |
| 2608 | fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16); |
| 2609 | fUnicode = could_be_utf16(&blob, &bRevUnicode); |
| 2610 | if( find_option("utf8",0,0)!=0 ){ |
| 2611 | fUnicode = 0; |
| 2612 | }else if( find_option("utf16",0,0)!=0 ){ |
| 2613 | fUnicode = 1; |
| 2614 | } |
| 2615 | lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) : |
| 2616 | looks_like_utf8(&blob, 0); |
| 2617 | fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); |
| 2618 | fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); |
| 2619 |