Fossil SCM
merge trunk <p>let looks_like_text() give different values for UTF-16 BE/LE. Not used yet.
Commit
348637dedfdef286ad80d1cb9002dec242fef3b1
Parent
b5123d51542a179…
4 files changed
+1
-1
+1
-1
+10
-6
+10
-6
+1
-1
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -898,11 +898,11 @@ | ||
| 898 | 898 | if( eType<0 ){ |
| 899 | 899 | const char *zWarning ; |
| 900 | 900 | Blob ans; |
| 901 | 901 | char cReply; |
| 902 | 902 | |
| 903 | - if( eType&1 ){ | |
| 903 | + if( eType==-3 ){ | |
| 904 | 904 | if( crnlOk ){ |
| 905 | 905 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | 906 | } |
| 907 | 907 | zWarning = "CR/NL line endings"; |
| 908 | 908 | }else{ |
| 909 | 909 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -898,11 +898,11 @@ | |
| 898 | if( eType<0 ){ |
| 899 | const char *zWarning ; |
| 900 | Blob ans; |
| 901 | char cReply; |
| 902 | |
| 903 | if( eType&1 ){ |
| 904 | if( crnlOk ){ |
| 905 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | } |
| 907 | zWarning = "CR/NL line endings"; |
| 908 | }else{ |
| 909 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -898,11 +898,11 @@ | |
| 898 | if( eType<0 ){ |
| 899 | const char *zWarning ; |
| 900 | Blob ans; |
| 901 | char cReply; |
| 902 | |
| 903 | if( eType==-3 ){ |
| 904 | if( crnlOk ){ |
| 905 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | } |
| 907 | zWarning = "CR/NL line endings"; |
| 908 | }else{ |
| 909 |
+1
-1
| --- src/checkin.c | ||
| +++ src/checkin.c | ||
| @@ -898,11 +898,11 @@ | ||
| 898 | 898 | if( eType<0 ){ |
| 899 | 899 | const char *zWarning ; |
| 900 | 900 | Blob ans; |
| 901 | 901 | char cReply; |
| 902 | 902 | |
| 903 | - if( eType&1 ){ | |
| 903 | + if( eType==-3 ){ | |
| 904 | 904 | if( crnlOk ){ |
| 905 | 905 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | 906 | } |
| 907 | 907 | zWarning = "CR/NL line endings"; |
| 908 | 908 | }else{ |
| 909 | 909 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -898,11 +898,11 @@ | |
| 898 | if( eType<0 ){ |
| 899 | const char *zWarning ; |
| 900 | Blob ans; |
| 901 | char cReply; |
| 902 | |
| 903 | if( eType&1 ){ |
| 904 | if( crnlOk ){ |
| 905 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | } |
| 907 | zWarning = "CR/NL line endings"; |
| 908 | }else{ |
| 909 |
| --- src/checkin.c | |
| +++ src/checkin.c | |
| @@ -898,11 +898,11 @@ | |
| 898 | if( eType<0 ){ |
| 899 | const char *zWarning ; |
| 900 | Blob ans; |
| 901 | char cReply; |
| 902 | |
| 903 | if( eType==-3 ){ |
| 904 | if( crnlOk ){ |
| 905 | return; /* We don't want CR/NL warnings for this file. */ |
| 906 | } |
| 907 | zWarning = "CR/NL line endings"; |
| 908 | }else{ |
| 909 |
+10
-6
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -48,11 +48,11 @@ | ||
| 48 | 48 | "cannot compute difference between binary files\n" |
| 49 | 49 | |
| 50 | 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | 52 | |
| 53 | -#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) | |
| 53 | +#define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1) | |
| 54 | 54 | #endif /* INTERFACE */ |
| 55 | 55 | |
| 56 | 56 | /* |
| 57 | 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | 58 | */ |
| @@ -181,16 +181,20 @@ | ||
| 181 | 181 | ** not be UTF-8. |
| 182 | 182 | ** |
| 183 | 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | 184 | ** NUL (\000) characters or an extremely long line. |
| 185 | 185 | ** |
| 186 | -** (-1) -- The content appears to consist entirely of text, with lines | |
| 186 | +** (-1) -- The content appears to consist entirely of text, in the | |
| 187 | +** UTF-16 (LE) encoding. | |
| 188 | +** | |
| 189 | +** (-2) -- The content appears to consist entirely of text, in the | |
| 190 | +** UTF-16 (BE) encoding. | |
| 191 | +** | |
| 192 | +** (-3) -- The content appears to consist entirely of text, with lines | |
| 187 | 193 | ** delimited by carriage-return, line-feed pairs; however, the |
| 188 | 194 | ** encoding may not be UTF-8. |
| 189 | 195 | ** |
| 190 | -** (-2) -- The content appears to consist entirely of text, in the | |
| 191 | -** UTF-16 (BE or LE) encoding. | |
| 192 | 196 | */ |
| 193 | 197 | int looks_like_text(const Blob *pContent){ |
| 194 | 198 | unsigned char *z = (unsigned char *) blob_buffer(pContent); |
| 195 | 199 | unsigned int n = blob_size(pContent); |
| 196 | 200 | int j; |
| @@ -202,11 +206,11 @@ | ||
| 202 | 206 | if( n==0 ) return result; /* Empty file -> text */ |
| 203 | 207 | c = *z; |
| 204 | 208 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 205 | 209 | if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */ |
| 206 | 210 | if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */ |
| 207 | - result = -2; | |
| 211 | + result = -1; | |
| 208 | 212 | j = LENGTH_MASK/3; |
| 209 | 213 | while( (n-=2)>0 ){ |
| 210 | 214 | c = *(z+=2); |
| 211 | 215 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 212 | 216 | if( c==0 ) return 0; /* \000 char in a file -> binary */ |
| @@ -241,11 +245,11 @@ | ||
| 241 | 245 | while( --n>0 ){ |
| 242 | 246 | c = *++z; |
| 243 | 247 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 244 | 248 | if( c=='\n' ){ |
| 245 | 249 | if( z[-1]=='\r' ){ |
| 246 | - result = -1; /* Contains CR/NL, continue */ | |
| 250 | + result = -3; /* Contains CR/NL, continue */ | |
| 247 | 251 | } |
| 248 | 252 | j = LENGTH_MASK; |
| 249 | 253 | } |
| 250 | 254 | if( --j==0 ){ |
| 251 | 255 | return 0; /* Very long line -> binary */ |
| 252 | 256 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -181,16 +181,20 @@ | |
| 181 | ** not be UTF-8. |
| 182 | ** |
| 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | ** NUL (\000) characters or an extremely long line. |
| 185 | ** |
| 186 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 187 | ** delimited by carriage-return, line-feed pairs; however, the |
| 188 | ** encoding may not be UTF-8. |
| 189 | ** |
| 190 | ** (-2) -- The content appears to consist entirely of text, in the |
| 191 | ** UTF-16 (BE or LE) encoding. |
| 192 | */ |
| 193 | int looks_like_text(const Blob *pContent){ |
| 194 | unsigned char *z = (unsigned char *) blob_buffer(pContent); |
| 195 | unsigned int n = blob_size(pContent); |
| 196 | int j; |
| @@ -202,11 +206,11 @@ | |
| 202 | if( n==0 ) return result; /* Empty file -> text */ |
| 203 | c = *z; |
| 204 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 205 | if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */ |
| 206 | if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */ |
| 207 | result = -2; |
| 208 | j = LENGTH_MASK/3; |
| 209 | while( (n-=2)>0 ){ |
| 210 | c = *(z+=2); |
| 211 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 212 | if( c==0 ) return 0; /* \000 char in a file -> binary */ |
| @@ -241,11 +245,11 @@ | |
| 241 | while( --n>0 ){ |
| 242 | c = *++z; |
| 243 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 244 | if( c=='\n' ){ |
| 245 | if( z[-1]=='\r' ){ |
| 246 | result = -1; /* Contains CR/NL, continue */ |
| 247 | } |
| 248 | j = LENGTH_MASK; |
| 249 | } |
| 250 | if( --j==0 ){ |
| 251 | return 0; /* Very long line -> binary */ |
| 252 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -181,16 +181,20 @@ | |
| 181 | ** not be UTF-8. |
| 182 | ** |
| 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | ** NUL (\000) characters or an extremely long line. |
| 185 | ** |
| 186 | ** (-1) -- The content appears to consist entirely of text, in the |
| 187 | ** UTF-16 (LE) encoding. |
| 188 | ** |
| 189 | ** (-2) -- The content appears to consist entirely of text, in the |
| 190 | ** UTF-16 (BE) encoding. |
| 191 | ** |
| 192 | ** (-3) -- The content appears to consist entirely of text, with lines |
| 193 | ** delimited by carriage-return, line-feed pairs; however, the |
| 194 | ** encoding may not be UTF-8. |
| 195 | ** |
| 196 | */ |
| 197 | int looks_like_text(const Blob *pContent){ |
| 198 | unsigned char *z = (unsigned char *) blob_buffer(pContent); |
| 199 | unsigned int n = blob_size(pContent); |
| 200 | int j; |
| @@ -202,11 +206,11 @@ | |
| 206 | if( n==0 ) return result; /* Empty file -> text */ |
| 207 | c = *z; |
| 208 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 209 | if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */ |
| 210 | if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */ |
| 211 | result = -1; |
| 212 | j = LENGTH_MASK/3; |
| 213 | while( (n-=2)>0 ){ |
| 214 | c = *(z+=2); |
| 215 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 216 | if( c==0 ) return 0; /* \000 char in a file -> binary */ |
| @@ -241,11 +245,11 @@ | |
| 245 | while( --n>0 ){ |
| 246 | c = *++z; |
| 247 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 248 | if( c=='\n' ){ |
| 249 | if( z[-1]=='\r' ){ |
| 250 | result = -3; /* Contains CR/NL, continue */ |
| 251 | } |
| 252 | j = LENGTH_MASK; |
| 253 | } |
| 254 | if( --j==0 ){ |
| 255 | return 0; /* Very long line -> binary */ |
| 256 |
+10
-6
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -48,11 +48,11 @@ | ||
| 48 | 48 | "cannot compute difference between binary files\n" |
| 49 | 49 | |
| 50 | 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | 52 | |
| 53 | -#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) | |
| 53 | +#define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1) | |
| 54 | 54 | #endif /* INTERFACE */ |
| 55 | 55 | |
| 56 | 56 | /* |
| 57 | 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | 58 | */ |
| @@ -181,16 +181,20 @@ | ||
| 181 | 181 | ** not be UTF-8. |
| 182 | 182 | ** |
| 183 | 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | 184 | ** NUL (\000) characters or an extremely long line. |
| 185 | 185 | ** |
| 186 | -** (-1) -- The content appears to consist entirely of text, with lines | |
| 186 | +** (-1) -- The content appears to consist entirely of text, in the | |
| 187 | +** UTF-16 (LE) encoding. | |
| 188 | +** | |
| 189 | +** (-2) -- The content appears to consist entirely of text, in the | |
| 190 | +** UTF-16 (BE) encoding. | |
| 191 | +** | |
| 192 | +** (-3) -- The content appears to consist entirely of text, with lines | |
| 187 | 193 | ** delimited by carriage-return, line-feed pairs; however, the |
| 188 | 194 | ** encoding may not be UTF-8. |
| 189 | 195 | ** |
| 190 | -** (-2) -- The content appears to consist entirely of text, in the | |
| 191 | -** UTF-16 (BE or LE) encoding. | |
| 192 | 196 | */ |
| 193 | 197 | int looks_like_text(const Blob *pContent){ |
| 194 | 198 | unsigned char *z = (unsigned char *) blob_buffer(pContent); |
| 195 | 199 | unsigned int n = blob_size(pContent); |
| 196 | 200 | int j; |
| @@ -202,11 +206,11 @@ | ||
| 202 | 206 | if( n==0 ) return result; /* Empty file -> text */ |
| 203 | 207 | c = *z; |
| 204 | 208 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 205 | 209 | if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */ |
| 206 | 210 | if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */ |
| 207 | - result = -2; | |
| 211 | + result = -1; | |
| 208 | 212 | j = LENGTH_MASK/3; |
| 209 | 213 | while( (n-=2)>0 ){ |
| 210 | 214 | c = *(z+=2); |
| 211 | 215 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 212 | 216 | if( c==0 ) return 0; /* \000 char in a file -> binary */ |
| @@ -241,11 +245,11 @@ | ||
| 241 | 245 | while( --n>0 ){ |
| 242 | 246 | c = *++z; |
| 243 | 247 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 244 | 248 | if( c=='\n' ){ |
| 245 | 249 | if( z[-1]=='\r' ){ |
| 246 | - result = -1; /* Contains CR/NL, continue */ | |
| 250 | + result = -3; /* Contains CR/NL, continue */ | |
| 247 | 251 | } |
| 248 | 252 | j = LENGTH_MASK; |
| 249 | 253 | } |
| 250 | 254 | if( --j==0 ){ |
| 251 | 255 | return 0; /* Very long line -> binary */ |
| 252 | 256 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -181,16 +181,20 @@ | |
| 181 | ** not be UTF-8. |
| 182 | ** |
| 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | ** NUL (\000) characters or an extremely long line. |
| 185 | ** |
| 186 | ** (-1) -- The content appears to consist entirely of text, with lines |
| 187 | ** delimited by carriage-return, line-feed pairs; however, the |
| 188 | ** encoding may not be UTF-8. |
| 189 | ** |
| 190 | ** (-2) -- The content appears to consist entirely of text, in the |
| 191 | ** UTF-16 (BE or LE) encoding. |
| 192 | */ |
| 193 | int looks_like_text(const Blob *pContent){ |
| 194 | unsigned char *z = (unsigned char *) blob_buffer(pContent); |
| 195 | unsigned int n = blob_size(pContent); |
| 196 | int j; |
| @@ -202,11 +206,11 @@ | |
| 202 | if( n==0 ) return result; /* Empty file -> text */ |
| 203 | c = *z; |
| 204 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 205 | if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */ |
| 206 | if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */ |
| 207 | result = -2; |
| 208 | j = LENGTH_MASK/3; |
| 209 | while( (n-=2)>0 ){ |
| 210 | c = *(z+=2); |
| 211 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 212 | if( c==0 ) return 0; /* \000 char in a file -> binary */ |
| @@ -241,11 +245,11 @@ | |
| 241 | while( --n>0 ){ |
| 242 | c = *++z; |
| 243 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 244 | if( c=='\n' ){ |
| 245 | if( z[-1]=='\r' ){ |
| 246 | result = -1; /* Contains CR/NL, continue */ |
| 247 | } |
| 248 | j = LENGTH_MASK; |
| 249 | } |
| 250 | if( --j==0 ){ |
| 251 | return 0; /* Very long line -> binary */ |
| 252 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -48,11 +48,11 @@ | |
| 48 | "cannot compute difference between binary files\n" |
| 49 | |
| 50 | #define DIFF_CANNOT_COMPUTE_SYMLINK \ |
| 51 | "cannot compute difference between symlink and regular file\n" |
| 52 | |
| 53 | #define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1) |
| 54 | #endif /* INTERFACE */ |
| 55 | |
| 56 | /* |
| 57 | ** Maximum length of a line in a text file. (8192) |
| 58 | */ |
| @@ -181,16 +181,20 @@ | |
| 181 | ** not be UTF-8. |
| 182 | ** |
| 183 | ** (0) -- The content appears to be binary because it contains embedded |
| 184 | ** NUL (\000) characters or an extremely long line. |
| 185 | ** |
| 186 | ** (-1) -- The content appears to consist entirely of text, in the |
| 187 | ** UTF-16 (LE) encoding. |
| 188 | ** |
| 189 | ** (-2) -- The content appears to consist entirely of text, in the |
| 190 | ** UTF-16 (BE) encoding. |
| 191 | ** |
| 192 | ** (-3) -- The content appears to consist entirely of text, with lines |
| 193 | ** delimited by carriage-return, line-feed pairs; however, the |
| 194 | ** encoding may not be UTF-8. |
| 195 | ** |
| 196 | */ |
| 197 | int looks_like_text(const Blob *pContent){ |
| 198 | unsigned char *z = (unsigned char *) blob_buffer(pContent); |
| 199 | unsigned int n = blob_size(pContent); |
| 200 | int j; |
| @@ -202,11 +206,11 @@ | |
| 206 | if( n==0 ) return result; /* Empty file -> text */ |
| 207 | c = *z; |
| 208 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 209 | if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */ |
| 210 | if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */ |
| 211 | result = -1; |
| 212 | j = LENGTH_MASK/3; |
| 213 | while( (n-=2)>0 ){ |
| 214 | c = *(z+=2); |
| 215 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 216 | if( c==0 ) return 0; /* \000 char in a file -> binary */ |
| @@ -241,11 +245,11 @@ | |
| 245 | while( --n>0 ){ |
| 246 | c = *++z; |
| 247 | if( c==0 ) return 0; /* \000 byte in a file -> binary */ |
| 248 | if( c=='\n' ){ |
| 249 | if( z[-1]=='\r' ){ |
| 250 | result = -3; /* Contains CR/NL, continue */ |
| 251 | } |
| 252 | j = LENGTH_MASK; |
| 253 | } |
| 254 | if( --j==0 ){ |
| 255 | return 0; /* Very long line -> binary */ |
| 256 |