Fossil SCM

Two more enhancements. <br>- DOS text files sometimes use Control-Z (0x1a) as eof-marker, so this byte should be considered text. <br>- FEFF, FFFE and FFFF are invalid UTF-16 code points (when not used as BOM), so files containing those should be considered binary.

jan.nijtmans 2012-10-31 12:58 UTC improve_looks_like_binary
Commit e3f3c390f1c3461d5a5f1c061316ca7ae90c73e5
1 file changed +7 -1
+7 -1
--- src/diff.c
+++ src/diff.c
@@ -197,11 +197,11 @@
197197
int j;
198198
unsigned char c;
199199
int result = 1; /* Assume text with no CR/NL */
200200
static const char isBinary[256] = {
201201
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
202
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1
202
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1
203203
};
204204
205205
206206
/* Check individual lines.
207207
*/
@@ -217,10 +217,13 @@
217217
if( z[1]==0 ){ /* High-byte must be 0 for further checks */
218218
if( isBinary[c] ) return 0; /* non-text char in a file -> binary */
219219
if( c=='\n' ){
220220
j = LENGTH_MASK/3;
221221
}
222
+ }else if( (c+z[1])>0x1fc ){
223
+ /* FEFF, FFFE and FFFF are invalid UTF-16 here. */
224
+ return 0;
222225
}
223226
if( --j==0 ){
224227
return 0; /* Very long line -> binary */
225228
}
226229
}
@@ -233,10 +236,13 @@
233236
if ( z[-1]==0 ){ /* High-byte must be 0 for further checks */
234237
if( isBinary[c] ) return 0; /* non-text char in a file -> binary */
235238
if( c=='\n' ){
236239
j = LENGTH_MASK/3;
237240
}
241
+ }else if( (c+z[-1])>0x1fc ){
242
+ /* FEFF, FFFE and FFFF are invalid UTF-16 here. */
243
+ return 0;
238244
}
239245
if( --j==0 ){
240246
return 0; /* Very long line -> binary */
241247
}
242248
}
243249
--- src/diff.c
+++ src/diff.c
@@ -197,11 +197,11 @@
197 int j;
198 unsigned char c;
199 int result = 1; /* Assume text with no CR/NL */
200 static const char isBinary[256] = {
201 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
202 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1
203 };
204
205
206 /* Check individual lines.
207 */
@@ -217,10 +217,13 @@
217 if( z[1]==0 ){ /* High-byte must be 0 for further checks */
218 if( isBinary[c] ) return 0; /* non-text char in a file -> binary */
219 if( c=='\n' ){
220 j = LENGTH_MASK/3;
221 }
 
 
 
222 }
223 if( --j==0 ){
224 return 0; /* Very long line -> binary */
225 }
226 }
@@ -233,10 +236,13 @@
233 if ( z[-1]==0 ){ /* High-byte must be 0 for further checks */
234 if( isBinary[c] ) return 0; /* non-text char in a file -> binary */
235 if( c=='\n' ){
236 j = LENGTH_MASK/3;
237 }
 
 
 
238 }
239 if( --j==0 ){
240 return 0; /* Very long line -> binary */
241 }
242 }
243
--- src/diff.c
+++ src/diff.c
@@ -197,11 +197,11 @@
197 int j;
198 unsigned char c;
199 int result = 1; /* Assume text with no CR/NL */
200 static const char isBinary[256] = {
201 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
202 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1
203 };
204
205
206 /* Check individual lines.
207 */
@@ -217,10 +217,13 @@
217 if( z[1]==0 ){ /* High-byte must be 0 for further checks */
218 if( isBinary[c] ) return 0; /* non-text char in a file -> binary */
219 if( c=='\n' ){
220 j = LENGTH_MASK/3;
221 }
222 }else if( (c+z[1])>0x1fc ){
223 /* FEFF, FFFE and FFFF are invalid UTF-16 here. */
224 return 0;
225 }
226 if( --j==0 ){
227 return 0; /* Very long line -> binary */
228 }
229 }
@@ -233,10 +236,13 @@
236 if ( z[-1]==0 ){ /* High-byte must be 0 for further checks */
237 if( isBinary[c] ) return 0; /* non-text char in a file -> binary */
238 if( c=='\n' ){
239 j = LENGTH_MASK/3;
240 }
241 }else if( (c+z[-1])>0x1fc ){
242 /* FEFF, FFFE and FFFF are invalid UTF-16 here. */
243 return 0;
244 }
245 if( --j==0 ){
246 return 0; /* Very long line -> binary */
247 }
248 }
249

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button