Fossil SCM
move starts_with_utf16_bom() call out of looks_like_utf16(), so we do it only once.
Commit
b58800e5ebc3b191debd2a9d0615653a6c6d0932
Parent
d4d66d133a24722…
2 files changed
+2
-2
+8
-6
+2
-2
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -915,13 +915,13 @@ | ||
| 915 | 915 | char *zMsg; /* Warning message */ |
| 916 | 916 | Blob fname; /* Relative pathname of the file */ |
| 917 | 917 | static int allOk = 0; /* Set to true to disable this routine */ |
| 918 | 918 | |
| 919 | 919 | if( allOk ) return 0; |
| 920 | - fUnicode = starts_with_utf16_bom(p, 0, 0); | |
| 920 | + fUnicode = starts_with_utf16_bom(p, 0, &lookFlags); | |
| 921 | 921 | if( fUnicode ){ |
| 922 | - lookFlags = looks_like_utf16(p); | |
| 922 | + lookFlags = looks_like_utf16(p, lookFlags); | |
| 923 | 923 | if( lookFlags&LOOK_ODD ){ |
| 924 | 924 | /* Content with an odd number of bytes cannot be UTF-16. */ |
| 925 | 925 | fUnicode = 0; |
| 926 | 926 | /* Therefore, check if the content appears to be UTF-8. */ |
| 927 | 927 | lookFlags = looks_like_utf8(p); |
| 928 | 928 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -915,13 +915,13 @@ | |
| 915 | char *zMsg; /* Warning message */ |
| 916 | Blob fname; /* Relative pathname of the file */ |
| 917 | static int allOk = 0; /* Set to true to disable this routine */ |
| 918 | |
| 919 | if( allOk ) return 0; |
| 920 | fUnicode = starts_with_utf16_bom(p, 0, 0); |
| 921 | if( fUnicode ){ |
| 922 | lookFlags = looks_like_utf16(p); |
| 923 | if( lookFlags&LOOK_ODD ){ |
| 924 | /* Content with an odd number of bytes cannot be UTF-16. */ |
| 925 | fUnicode = 0; |
| 926 | /* Therefore, check if the content appears to be UTF-8. */ |
| 927 | lookFlags = looks_like_utf8(p); |
| 928 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -915,13 +915,13 @@ | |
| 915 | char *zMsg; /* Warning message */ |
| 916 | Blob fname; /* Relative pathname of the file */ |
| 917 | static int allOk = 0; /* Set to true to disable this routine */ |
| 918 | |
| 919 | if( allOk ) return 0; |
| 920 | fUnicode = starts_with_utf16_bom(p, 0, &lookFlags); |
| 921 | if( fUnicode ){ |
| 922 | lookFlags = looks_like_utf16(p, lookFlags); |
| 923 | if( lookFlags&LOOK_ODD ){ |
| 924 | /* Content with an odd number of bytes cannot be UTF-16. */ |
| 925 | fUnicode = 0; |
| 926 | /* Therefore, check if the content appears to be UTF-8. */ |
| 927 | lookFlags = looks_like_utf8(p); |
| 928 |
+8
-6
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -68,19 +68,22 @@ | ||
| 68 | 68 | /* |
| 69 | 69 | ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used |
| 70 | 70 | ** to convey status information about the blob content. |
| 71 | 71 | */ |
| 72 | 72 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ |
| 73 | -#define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */ | |
| 74 | 73 | #define LOOK_NUL ((int)0x00000002) /* One or more NUL chars were found. */ |
| 75 | 74 | #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */ |
| 76 | 75 | #define LOOK_LONE_LF ((int)0x00000008) /* An unpaired LF char was found. */ |
| 77 | 76 | #define LOOK_CRLF ((int)0x00000010) /* One or more CR/LF pairs were found. */ |
| 78 | 77 | #define LOOK_LENGTH ((int)0x00000020) /* An over length line was found. */ |
| 79 | 78 | #define LOOK_ODD ((int)0x00000040) /* An odd number of bytes was found. */ |
| 80 | 79 | #define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */ |
| 81 | 80 | #define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */ |
| 81 | + | |
| 82 | +/* Only used in starts_with_utf16_bom() and looks_like_utf16() */ | |
| 83 | +#define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */ | |
| 84 | + | |
| 82 | 85 | #endif /* INTERFACE */ |
| 83 | 86 | |
| 84 | 87 | /* |
| 85 | 88 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 86 | 89 | */ |
| @@ -296,16 +299,15 @@ | ||
| 296 | 299 | ** Whether or not this function examines the entire contents of the blob is |
| 297 | 300 | ** officially unspecified. |
| 298 | 301 | ** |
| 299 | 302 | ************************************ WARNING ********************************** |
| 300 | 303 | */ |
| 301 | -int looks_like_utf16(const Blob *pContent){ | |
| 304 | +int looks_like_utf16(const Blob *pContent, int flags){ | |
| 302 | 305 | const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent); |
| 303 | 306 | unsigned int n = blob_size(pContent); |
| 304 | - int j = 1, c, flags = LOOK_NONE; | |
| 307 | + int j = 1, c; | |
| 305 | 308 | |
| 306 | - if( !starts_with_utf16_bom(pContent, 0, &flags) ) return flags; | |
| 307 | 309 | if( n%sizeof(WCHAR_T) ){ |
| 308 | 310 | flags |= LOOK_ODD; |
| 309 | 311 | } |
| 310 | 312 | c = *z; |
| 311 | 313 | while( n>=sizeof(WCHAR_T) ){ |
| @@ -2466,12 +2468,12 @@ | ||
| 2466 | 2468 | int fUtf16; /* return value of starts_with_utf16_bom() */ |
| 2467 | 2469 | int lookFlags; /* output flags from looks_like_utf8/utf16() */ |
| 2468 | 2470 | if( g.argc<3 ) usage("FILENAME"); |
| 2469 | 2471 | blob_read_from_file(&blob, g.argv[2]); |
| 2470 | 2472 | fUtf8 = starts_with_utf8_bom(&blob, 0); |
| 2471 | - fUtf16 = starts_with_utf16_bom(&blob, 0, 0); | |
| 2472 | - lookFlags = fUtf16 ? looks_like_utf16(&blob) : | |
| 2473 | + fUtf16 = starts_with_utf16_bom(&blob, 0, &lookFlags); | |
| 2474 | + lookFlags = fUtf16 ? looks_like_utf16(&blob, lookFlags) : | |
| 2473 | 2475 | looks_like_utf8(&blob); |
| 2474 | 2476 | eType = !(lookFlags&(LOOK_NUL|LOOK_LENGTH|LOOK_ODD)); |
| 2475 | 2477 | fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); |
| 2476 | 2478 | fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); |
| 2477 | 2479 | fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no"); |
| 2478 | 2480 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -68,19 +68,22 @@ | |
| 68 | /* |
| 69 | ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used |
| 70 | ** to convey status information about the blob content. |
| 71 | */ |
| 72 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ |
| 73 | #define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */ |
| 74 | #define LOOK_NUL ((int)0x00000002) /* One or more NUL chars were found. */ |
| 75 | #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */ |
| 76 | #define LOOK_LONE_LF ((int)0x00000008) /* An unpaired LF char was found. */ |
| 77 | #define LOOK_CRLF ((int)0x00000010) /* One or more CR/LF pairs were found. */ |
| 78 | #define LOOK_LENGTH ((int)0x00000020) /* An over length line was found. */ |
| 79 | #define LOOK_ODD ((int)0x00000040) /* An odd number of bytes was found. */ |
| 80 | #define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */ |
| 81 | #define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */ |
| 82 | #endif /* INTERFACE */ |
| 83 | |
| 84 | /* |
| 85 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 86 | */ |
| @@ -296,16 +299,15 @@ | |
| 296 | ** Whether or not this function examines the entire contents of the blob is |
| 297 | ** officially unspecified. |
| 298 | ** |
| 299 | ************************************ WARNING ********************************** |
| 300 | */ |
| 301 | int looks_like_utf16(const Blob *pContent){ |
| 302 | const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent); |
| 303 | unsigned int n = blob_size(pContent); |
| 304 | int j = 1, c, flags = LOOK_NONE; |
| 305 | |
| 306 | if( !starts_with_utf16_bom(pContent, 0, &flags) ) return flags; |
| 307 | if( n%sizeof(WCHAR_T) ){ |
| 308 | flags |= LOOK_ODD; |
| 309 | } |
| 310 | c = *z; |
| 311 | while( n>=sizeof(WCHAR_T) ){ |
| @@ -2466,12 +2468,12 @@ | |
| 2466 | int fUtf16; /* return value of starts_with_utf16_bom() */ |
| 2467 | int lookFlags; /* output flags from looks_like_utf8/utf16() */ |
| 2468 | if( g.argc<3 ) usage("FILENAME"); |
| 2469 | blob_read_from_file(&blob, g.argv[2]); |
| 2470 | fUtf8 = starts_with_utf8_bom(&blob, 0); |
| 2471 | fUtf16 = starts_with_utf16_bom(&blob, 0, 0); |
| 2472 | lookFlags = fUtf16 ? looks_like_utf16(&blob) : |
| 2473 | looks_like_utf8(&blob); |
| 2474 | eType = !(lookFlags&(LOOK_NUL|LOOK_LENGTH|LOOK_ODD)); |
| 2475 | fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); |
| 2476 | fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); |
| 2477 | fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no"); |
| 2478 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -68,19 +68,22 @@ | |
| 68 | /* |
| 69 | ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used |
| 70 | ** to convey status information about the blob content. |
| 71 | */ |
| 72 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ |
| 73 | #define LOOK_NUL ((int)0x00000002) /* One or more NUL chars were found. */ |
| 74 | #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */ |
| 75 | #define LOOK_LONE_LF ((int)0x00000008) /* An unpaired LF char was found. */ |
| 76 | #define LOOK_CRLF ((int)0x00000010) /* One or more CR/LF pairs were found. */ |
| 77 | #define LOOK_LENGTH ((int)0x00000020) /* An over length line was found. */ |
| 78 | #define LOOK_ODD ((int)0x00000040) /* An odd number of bytes was found. */ |
| 79 | #define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */ |
| 80 | #define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */ |
| 81 | |
| 82 | /* Only used in starts_with_utf16_bom() and looks_like_utf16() */ |
| 83 | #define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */ |
| 84 | |
| 85 | #endif /* INTERFACE */ |
| 86 | |
| 87 | /* |
| 88 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 89 | */ |
| @@ -296,16 +299,15 @@ | |
| 299 | ** Whether or not this function examines the entire contents of the blob is |
| 300 | ** officially unspecified. |
| 301 | ** |
| 302 | ************************************ WARNING ********************************** |
| 303 | */ |
| 304 | int looks_like_utf16(const Blob *pContent, int flags){ |
| 305 | const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent); |
| 306 | unsigned int n = blob_size(pContent); |
| 307 | int j = 1, c; |
| 308 | |
| 309 | if( n%sizeof(WCHAR_T) ){ |
| 310 | flags |= LOOK_ODD; |
| 311 | } |
| 312 | c = *z; |
| 313 | while( n>=sizeof(WCHAR_T) ){ |
| @@ -2466,12 +2468,12 @@ | |
| 2468 | int fUtf16; /* return value of starts_with_utf16_bom() */ |
| 2469 | int lookFlags; /* output flags from looks_like_utf8/utf16() */ |
| 2470 | if( g.argc<3 ) usage("FILENAME"); |
| 2471 | blob_read_from_file(&blob, g.argv[2]); |
| 2472 | fUtf8 = starts_with_utf8_bom(&blob, 0); |
| 2473 | fUtf16 = starts_with_utf16_bom(&blob, 0, &lookFlags); |
| 2474 | lookFlags = fUtf16 ? looks_like_utf16(&blob, lookFlags) : |
| 2475 | looks_like_utf8(&blob); |
| 2476 | eType = !(lookFlags&(LOOK_NUL|LOOK_LENGTH|LOOK_ODD)); |
| 2477 | fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob)); |
| 2478 | fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no"); |
| 2479 | fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no"); |
| 2480 |