Fossil SCM

move starts_with_utf16_bom() call out of looks_like_utf16(), so we do it only once.

jan.nijtmans 2013-03-16 06:46 trunk
Commit b58800e5ebc3b191debd2a9d0615653a6c6d0932
2 files changed +2 -2 +8 -6
+2 -2
--- src/checkin.c
+++ src/checkin.c
@@ -915,13 +915,13 @@
915915
char *zMsg; /* Warning message */
916916
Blob fname; /* Relative pathname of the file */
917917
static int allOk = 0; /* Set to true to disable this routine */
918918
919919
if( allOk ) return 0;
920
- fUnicode = starts_with_utf16_bom(p, 0, 0);
920
+ fUnicode = starts_with_utf16_bom(p, 0, &lookFlags);
921921
if( fUnicode ){
922
- lookFlags = looks_like_utf16(p);
922
+ lookFlags = looks_like_utf16(p, lookFlags);
923923
if( lookFlags&LOOK_ODD ){
924924
/* Content with an odd number of bytes cannot be UTF-16. */
925925
fUnicode = 0;
926926
/* Therefore, check if the content appears to be UTF-8. */
927927
lookFlags = looks_like_utf8(p);
928928
--- src/checkin.c
+++ src/checkin.c
@@ -915,13 +915,13 @@
915 char *zMsg; /* Warning message */
916 Blob fname; /* Relative pathname of the file */
917 static int allOk = 0; /* Set to true to disable this routine */
918
919 if( allOk ) return 0;
920 fUnicode = starts_with_utf16_bom(p, 0, 0);
921 if( fUnicode ){
922 lookFlags = looks_like_utf16(p);
923 if( lookFlags&LOOK_ODD ){
924 /* Content with an odd number of bytes cannot be UTF-16. */
925 fUnicode = 0;
926 /* Therefore, check if the content appears to be UTF-8. */
927 lookFlags = looks_like_utf8(p);
928
--- src/checkin.c
+++ src/checkin.c
@@ -915,13 +915,13 @@
915 char *zMsg; /* Warning message */
916 Blob fname; /* Relative pathname of the file */
917 static int allOk = 0; /* Set to true to disable this routine */
918
919 if( allOk ) return 0;
920 fUnicode = starts_with_utf16_bom(p, 0, &lookFlags);
921 if( fUnicode ){
922 lookFlags = looks_like_utf16(p, lookFlags);
923 if( lookFlags&LOOK_ODD ){
924 /* Content with an odd number of bytes cannot be UTF-16. */
925 fUnicode = 0;
926 /* Therefore, check if the content appears to be UTF-8. */
927 lookFlags = looks_like_utf8(p);
928
+8 -6
--- src/diff.c
+++ src/diff.c
@@ -68,19 +68,22 @@
6868
/*
6969
** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
7070
** to convey status information about the blob content.
7171
*/
7272
#define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73
-#define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */
7473
#define LOOK_NUL ((int)0x00000002) /* One or more NUL chars were found. */
7574
#define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */
7675
#define LOOK_LONE_LF ((int)0x00000008) /* An unpaired LF char was found. */
7776
#define LOOK_CRLF ((int)0x00000010) /* One or more CR/LF pairs were found. */
7877
#define LOOK_LENGTH ((int)0x00000020) /* An over length line was found. */
7978
#define LOOK_ODD ((int)0x00000040) /* An odd number of bytes was found. */
8079
#define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */
8180
#define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */
81
+
82
+/* Only used in starts_with_utf16_bom() and looks_like_utf16() */
83
+#define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */
84
+
8285
#endif /* INTERFACE */
8386
8487
/*
8588
** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
8689
*/
@@ -296,16 +299,15 @@
296299
** Whether or not this function examines the entire contents of the blob is
297300
** officially unspecified.
298301
**
299302
************************************ WARNING **********************************
300303
*/
301
-int looks_like_utf16(const Blob *pContent){
304
+int looks_like_utf16(const Blob *pContent, int flags){
302305
const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
303306
unsigned int n = blob_size(pContent);
304
- int j = 1, c, flags = LOOK_NONE;
307
+ int j = 1, c;
305308
306
- if( !starts_with_utf16_bom(pContent, 0, &flags) ) return flags;
307309
if( n%sizeof(WCHAR_T) ){
308310
flags |= LOOK_ODD;
309311
}
310312
c = *z;
311313
while( n>=sizeof(WCHAR_T) ){
@@ -2466,12 +2468,12 @@
24662468
int fUtf16; /* return value of starts_with_utf16_bom() */
24672469
int lookFlags; /* output flags from looks_like_utf8/utf16() */
24682470
if( g.argc<3 ) usage("FILENAME");
24692471
blob_read_from_file(&blob, g.argv[2]);
24702472
fUtf8 = starts_with_utf8_bom(&blob, 0);
2471
- fUtf16 = starts_with_utf16_bom(&blob, 0, 0);
2472
- lookFlags = fUtf16 ? looks_like_utf16(&blob) :
2473
+ fUtf16 = starts_with_utf16_bom(&blob, 0, &lookFlags);
2474
+ lookFlags = fUtf16 ? looks_like_utf16(&blob, lookFlags) :
24732475
looks_like_utf8(&blob);
24742476
eType = !(lookFlags&(LOOK_NUL|LOOK_LENGTH|LOOK_ODD));
24752477
fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
24762478
fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
24772479
fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no");
24782480
--- src/diff.c
+++ src/diff.c
@@ -68,19 +68,22 @@
68 /*
69 ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70 ** to convey status information about the blob content.
71 */
72 #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73 #define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */
74 #define LOOK_NUL ((int)0x00000002) /* One or more NUL chars were found. */
75 #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */
76 #define LOOK_LONE_LF ((int)0x00000008) /* An unpaired LF char was found. */
77 #define LOOK_CRLF ((int)0x00000010) /* One or more CR/LF pairs were found. */
78 #define LOOK_LENGTH ((int)0x00000020) /* An over length line was found. */
79 #define LOOK_ODD ((int)0x00000040) /* An odd number of bytes was found. */
80 #define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */
81 #define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */
 
 
 
 
82 #endif /* INTERFACE */
83
84 /*
85 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
86 */
@@ -296,16 +299,15 @@
296 ** Whether or not this function examines the entire contents of the blob is
297 ** officially unspecified.
298 **
299 ************************************ WARNING **********************************
300 */
301 int looks_like_utf16(const Blob *pContent){
302 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
303 unsigned int n = blob_size(pContent);
304 int j = 1, c, flags = LOOK_NONE;
305
306 if( !starts_with_utf16_bom(pContent, 0, &flags) ) return flags;
307 if( n%sizeof(WCHAR_T) ){
308 flags |= LOOK_ODD;
309 }
310 c = *z;
311 while( n>=sizeof(WCHAR_T) ){
@@ -2466,12 +2468,12 @@
2466 int fUtf16; /* return value of starts_with_utf16_bom() */
2467 int lookFlags; /* output flags from looks_like_utf8/utf16() */
2468 if( g.argc<3 ) usage("FILENAME");
2469 blob_read_from_file(&blob, g.argv[2]);
2470 fUtf8 = starts_with_utf8_bom(&blob, 0);
2471 fUtf16 = starts_with_utf16_bom(&blob, 0, 0);
2472 lookFlags = fUtf16 ? looks_like_utf16(&blob) :
2473 looks_like_utf8(&blob);
2474 eType = !(lookFlags&(LOOK_NUL|LOOK_LENGTH|LOOK_ODD));
2475 fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2476 fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2477 fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no");
2478
--- src/diff.c
+++ src/diff.c
@@ -68,19 +68,22 @@
68 /*
69 ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70 ** to convey status information about the blob content.
71 */
72 #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
 
73 #define LOOK_NUL ((int)0x00000002) /* One or more NUL chars were found. */
74 #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */
75 #define LOOK_LONE_LF ((int)0x00000008) /* An unpaired LF char was found. */
76 #define LOOK_CRLF ((int)0x00000010) /* One or more CR/LF pairs were found. */
77 #define LOOK_LENGTH ((int)0x00000020) /* An over length line was found. */
78 #define LOOK_ODD ((int)0x00000040) /* An odd number of bytes was found. */
79 #define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */
80 #define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */
81
82 /* Only used in starts_with_utf16_bom() and looks_like_utf16() */
83 #define LOOK_REVERSE ((int)0x00000001) /* Reversed UTF-16 BOM is found. */
84
85 #endif /* INTERFACE */
86
87 /*
88 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
89 */
@@ -296,16 +299,15 @@
299 ** Whether or not this function examines the entire contents of the blob is
300 ** officially unspecified.
301 **
302 ************************************ WARNING **********************************
303 */
304 int looks_like_utf16(const Blob *pContent, int flags){
305 const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
306 unsigned int n = blob_size(pContent);
307 int j = 1, c;
308
 
309 if( n%sizeof(WCHAR_T) ){
310 flags |= LOOK_ODD;
311 }
312 c = *z;
313 while( n>=sizeof(WCHAR_T) ){
@@ -2466,12 +2468,12 @@
2468 int fUtf16; /* return value of starts_with_utf16_bom() */
2469 int lookFlags; /* output flags from looks_like_utf8/utf16() */
2470 if( g.argc<3 ) usage("FILENAME");
2471 blob_read_from_file(&blob, g.argv[2]);
2472 fUtf8 = starts_with_utf8_bom(&blob, 0);
2473 fUtf16 = starts_with_utf16_bom(&blob, 0, &lookFlags);
2474 lookFlags = fUtf16 ? looks_like_utf16(&blob, lookFlags) :
2475 looks_like_utf8(&blob);
2476 eType = !(lookFlags&(LOOK_NUL|LOOK_LENGTH|LOOK_ODD));
2477 fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2478 fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2479 fossil_print("Starts with UTF-16 BOM: %s\n",fUtf16?"yes":"no");
2480

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button