Fossil SCM
Two more enhancements. <br>- DOS text files sometimes use Control-Z (0x1a) as eof-marker, so this byte should be considered text. <br>- FEFF, FFFE and FFFF are invalid UTF-16 code points (when not used as BOM), so files containing those should be considered binary.
Commit
e3f3c390f1c3461d5a5f1c061316ca7ae90c73e5
Parent
44c6be2ab6d0471…
1 file changed
+7
-1
+7
-1
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -197,11 +197,11 @@ | ||
| 197 | 197 | int j; |
| 198 | 198 | unsigned char c; |
| 199 | 199 | int result = 1; /* Assume text with no CR/NL */ |
| 200 | 200 | static const char isBinary[256] = { |
| 201 | 201 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, |
| 202 | - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1 | |
| 202 | + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 | |
| 203 | 203 | }; |
| 204 | 204 | |
| 205 | 205 | |
| 206 | 206 | /* Check individual lines. |
| 207 | 207 | */ |
| @@ -217,10 +217,13 @@ | ||
| 217 | 217 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 218 | 218 | if( isBinary[c] ) return 0; /* non-text char in a file -> binary */ |
| 219 | 219 | if( c=='\n' ){ |
| 220 | 220 | j = LENGTH_MASK/3; |
| 221 | 221 | } |
| 222 | + }else if( (c+z[1])>0x1fc ){ | |
| 223 | + /* FEFF, FFFE and FFFF are invalid UTF-16 here. */ | |
| 224 | + return 0; | |
| 222 | 225 | } |
| 223 | 226 | if( --j==0 ){ |
| 224 | 227 | return 0; /* Very long line -> binary */ |
| 225 | 228 | } |
| 226 | 229 | } |
| @@ -233,10 +236,13 @@ | ||
| 233 | 236 | if ( z[-1]==0 ){ /* High-byte must be 0 for further checks */ |
| 234 | 237 | if( isBinary[c] ) return 0; /* non-text char in a file -> binary */ |
| 235 | 238 | if( c=='\n' ){ |
| 236 | 239 | j = LENGTH_MASK/3; |
| 237 | 240 | } |
| 241 | + }else if( (c+z[-1])>0x1fc ){ | |
| 242 | + /* FEFF, FFFE and FFFF are invalid UTF-16 here. */ | |
| 243 | + return 0; | |
| 238 | 244 | } |
| 239 | 245 | if( --j==0 ){ |
| 240 | 246 | return 0; /* Very long line -> binary */ |
| 241 | 247 | } |
| 242 | 248 | } |
| 243 | 249 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -197,11 +197,11 @@ | |
| 197 | int j; |
| 198 | unsigned char c; |
| 199 | int result = 1; /* Assume text with no CR/NL */ |
| 200 | static const char isBinary[256] = { |
| 201 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, |
| 202 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1 |
| 203 | }; |
| 204 | |
| 205 | |
| 206 | /* Check individual lines. |
| 207 | */ |
| @@ -217,10 +217,13 @@ | |
| 217 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 218 | if( isBinary[c] ) return 0; /* non-text char in a file -> binary */ |
| 219 | if( c=='\n' ){ |
| 220 | j = LENGTH_MASK/3; |
| 221 | } |
| 222 | } |
| 223 | if( --j==0 ){ |
| 224 | return 0; /* Very long line -> binary */ |
| 225 | } |
| 226 | } |
| @@ -233,10 +236,13 @@ | |
| 233 | if ( z[-1]==0 ){ /* High-byte must be 0 for further checks */ |
| 234 | if( isBinary[c] ) return 0; /* non-text char in a file -> binary */ |
| 235 | if( c=='\n' ){ |
| 236 | j = LENGTH_MASK/3; |
| 237 | } |
| 238 | } |
| 239 | if( --j==0 ){ |
| 240 | return 0; /* Very long line -> binary */ |
| 241 | } |
| 242 | } |
| 243 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -197,11 +197,11 @@ | |
| 197 | int j; |
| 198 | unsigned char c; |
| 199 | int result = 1; /* Assume text with no CR/NL */ |
| 200 | static const char isBinary[256] = { |
| 201 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, |
| 202 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 |
| 203 | }; |
| 204 | |
| 205 | |
| 206 | /* Check individual lines. |
| 207 | */ |
| @@ -217,10 +217,13 @@ | |
| 217 | if( z[1]==0 ){ /* High-byte must be 0 for further checks */ |
| 218 | if( isBinary[c] ) return 0; /* non-text char in a file -> binary */ |
| 219 | if( c=='\n' ){ |
| 220 | j = LENGTH_MASK/3; |
| 221 | } |
| 222 | }else if( (c+z[1])>0x1fc ){ |
| 223 | /* FEFF, FFFE and FFFF are invalid UTF-16 here. */ |
| 224 | return 0; |
| 225 | } |
| 226 | if( --j==0 ){ |
| 227 | return 0; /* Very long line -> binary */ |
| 228 | } |
| 229 | } |
| @@ -233,10 +236,13 @@ | |
| 236 | if ( z[-1]==0 ){ /* High-byte must be 0 for further checks */ |
| 237 | if( isBinary[c] ) return 0; /* non-text char in a file -> binary */ |
| 238 | if( c=='\n' ){ |
| 239 | j = LENGTH_MASK/3; |
| 240 | } |
| 241 | }else if( (c+z[-1])>0x1fc ){ |
| 242 | /* FEFF, FFFE and FFFF are invalid UTF-16 here. */ |
| 243 | return 0; |
| 244 | } |
| 245 | if( --j==0 ){ |
| 246 | return 0; /* Very long line -> binary */ |
| 247 | } |
| 248 | } |
| 249 |