Fossil SCM

Fix detection of LOOK_LONE_CR/LOOK_LONE_LF for the first byte/character of the file. Fix off-by-one error in utf8 LOOK_LONE_CR detection, which could access one byte past the blob. Simplify the code, making use of the know relation between some LOOK_XXX flags.

jan.nijtmans 2013-03-14 10:33 trunk
Commit ce9d6abea2010dbbd631065ed020d0624621bbb5
1 file changed +21 -31
+21 -31
--- src/diff.c
+++ src/diff.c
@@ -69,17 +69,17 @@
6969
** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
7070
** to convey status information about the blob content.
7171
*/
7272
#define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
7373
#define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74
-#define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */
75
-#define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */
76
-#define LOOK_LF ((int)0x00000008) /* One or more LF chars were found. */
77
-#define LOOK_LONE_LF ((int)0x00000010) /* An unpaired CR char was found. */
78
-#define LOOK_CRLF ((int)0x00000020) /* One or more CR/LF pairs were found. */
79
-#define LOOK_LENGTH ((int)0x00000040) /* An over length line was found. */
80
-#define LOOK_ODD ((int)0x00000080) /* An odd number of bytes was found. */
74
+#define LOOK_LONE_CR ((int)0x00000002) /* An unpaired CR char was found. */
75
+#define LOOK_LONE_LF ((int)0x00000004) /* An unpaired CR char was found. */
76
+#define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */
77
+#define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */
78
+#define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */
79
+#define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */
80
+#define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */
8181
#endif /* INTERFACE */
8282
8383
/*
8484
** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
8585
*/
@@ -238,24 +238,23 @@
238238
if( n==0 ) return result; /* Empty file -> text */
239239
c = *z;
240240
if( c==0 ){
241241
if( pFlags ) *pFlags |= LOOK_NUL;
242242
result = 0; /* NUL character in a file -> binary */
243
- }else if( (c=='\r') && pFlags ){
244
- *pFlags |= LOOK_CR;
243
+ }else if( c=='\r' && pFlags && (n<2 || z[1]!='\n') ){
244
+ *pFlags |= LOOK_LONE_CR;
245245
}
246246
j = (c!='\n');
247
- if( !j && pFlags ) *pFlags |= LOOK_LF;
247
+ if( !j && pFlags ) *pFlags |= LOOK_LONE_LF;
248248
while( --n>0 ){
249
+ int c2 = c;
249250
c = *++z; ++j;
250251
if( c==0 ){
251252
if( pFlags ) *pFlags |= LOOK_NUL;
252253
result = 0; /* NUL character in a file -> binary */
253254
}else if( c=='\n' ){
254
- int c2 = z[-1];
255255
if( pFlags ){
256
- *pFlags |= LOOK_LF;
257256
if( c2=='\r' ){
258257
*pFlags |= LOOK_CRLF;
259258
}else{
260259
*pFlags |= LOOK_LONE_LF;
261260
}
@@ -263,17 +262,12 @@
263262
if( j>LENGTH_MASK ){
264263
if( pFlags ) *pFlags |= LOOK_LENGTH;
265264
result = 0; /* Very long line -> binary */
266265
}
267266
j = 0;
268
- }else if( c=='\r' ){
269
- if( pFlags ){
270
- *pFlags |= LOOK_CR;
271
- if( n<=1 || z[1]!='\n' ){
272
- *pFlags |= LOOK_LONE_CR;
273
- }
274
- }
267
+ }else if( c=='\r' && pFlags && (n<1 || z[1]!='\n') ){
268
+ *pFlags |= LOOK_LONE_CR;
275269
}
276270
}
277271
if( j>LENGTH_MASK ){
278272
if( pFlags ) *pFlags |= LOOK_LENGTH;
279273
result = 0; /* Very long line -> binary */
@@ -351,26 +345,26 @@
351345
}
352346
c = *z;
353347
if( c==0 ){
354348
if( pFlags ) *pFlags |= LOOK_NUL;
355349
result = 0; /* NUL character in a file -> binary */
356
- }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags ){
357
- *pFlags |= LOOK_CR;
350
+ }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags
351
+ && (n<(2*sizeof(WCHAR_T)) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){
352
+ *pFlags |= LOOK_LONE_CR;
358353
}
359354
j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
360
- if( !j && pFlags ) *pFlags |= LOOK_LF;
355
+ if( !j && pFlags ) *pFlags |= LOOK_LONE_LF;
361356
while( 1 ){
357
+ int c2 = c;
362358
if ( n<sizeof(WCHAR_T) ) break;
363359
n -= sizeof(WCHAR_T);
364360
c = *++z; ++j;
365361
if( c==0 ){
366362
if( pFlags ) *pFlags |= LOOK_NUL;
367363
result = 0; /* NUL character in a file -> binary */
368364
}else if( c==UTF16BE_LF || c==UTF16LE_LF ){
369
- int c2 = z[-1];
370365
if( pFlags ){
371
- *pFlags |= LOOK_LF;
372366
if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
373367
*pFlags |= LOOK_CRLF;
374368
}else{
375369
*pFlags |= LOOK_LONE_LF;
376370
}
@@ -378,17 +372,13 @@
378372
if( j>UTF16_LENGTH_MASK ){
379373
if( pFlags ) *pFlags |= LOOK_LENGTH;
380374
result = 0; /* Very long line -> binary */
381375
}
382376
j = 0;
383
- }else if( c==UTF16BE_CR || c==UTF16LE_CR ){
384
- if( pFlags ){
385
- *pFlags |= LOOK_CR;
386
- if( n<=1 || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF) ){
387
- *pFlags |= LOOK_LONE_CR;
388
- }
389
- }
377
+ }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags
378
+ && (n<sizeof(WCHAR_T) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){
379
+ *pFlags |= LOOK_LONE_CR;
390380
}
391381
}
392382
if( j>UTF16_LENGTH_MASK ){
393383
if( pFlags ) *pFlags |= LOOK_LENGTH;
394384
result = 0; /* Very long line -> binary */
395385
--- src/diff.c
+++ src/diff.c
@@ -69,17 +69,17 @@
69 ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70 ** to convey status information about the blob content.
71 */
72 #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73 #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74 #define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */
75 #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */
76 #define LOOK_LF ((int)0x00000008) /* One or more LF chars were found. */
77 #define LOOK_LONE_LF ((int)0x00000010) /* An unpaired CR char was found. */
78 #define LOOK_CRLF ((int)0x00000020) /* One or more CR/LF pairs were found. */
79 #define LOOK_LENGTH ((int)0x00000040) /* An over length line was found. */
80 #define LOOK_ODD ((int)0x00000080) /* An odd number of bytes was found. */
81 #endif /* INTERFACE */
82
83 /*
84 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
85 */
@@ -238,24 +238,23 @@
238 if( n==0 ) return result; /* Empty file -> text */
239 c = *z;
240 if( c==0 ){
241 if( pFlags ) *pFlags |= LOOK_NUL;
242 result = 0; /* NUL character in a file -> binary */
243 }else if( (c=='\r') && pFlags ){
244 *pFlags |= LOOK_CR;
245 }
246 j = (c!='\n');
247 if( !j && pFlags ) *pFlags |= LOOK_LF;
248 while( --n>0 ){
 
249 c = *++z; ++j;
250 if( c==0 ){
251 if( pFlags ) *pFlags |= LOOK_NUL;
252 result = 0; /* NUL character in a file -> binary */
253 }else if( c=='\n' ){
254 int c2 = z[-1];
255 if( pFlags ){
256 *pFlags |= LOOK_LF;
257 if( c2=='\r' ){
258 *pFlags |= LOOK_CRLF;
259 }else{
260 *pFlags |= LOOK_LONE_LF;
261 }
@@ -263,17 +262,12 @@
263 if( j>LENGTH_MASK ){
264 if( pFlags ) *pFlags |= LOOK_LENGTH;
265 result = 0; /* Very long line -> binary */
266 }
267 j = 0;
268 }else if( c=='\r' ){
269 if( pFlags ){
270 *pFlags |= LOOK_CR;
271 if( n<=1 || z[1]!='\n' ){
272 *pFlags |= LOOK_LONE_CR;
273 }
274 }
275 }
276 }
277 if( j>LENGTH_MASK ){
278 if( pFlags ) *pFlags |= LOOK_LENGTH;
279 result = 0; /* Very long line -> binary */
@@ -351,26 +345,26 @@
351 }
352 c = *z;
353 if( c==0 ){
354 if( pFlags ) *pFlags |= LOOK_NUL;
355 result = 0; /* NUL character in a file -> binary */
356 }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags ){
357 *pFlags |= LOOK_CR;
 
358 }
359 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
360 if( !j && pFlags ) *pFlags |= LOOK_LF;
361 while( 1 ){
 
362 if ( n<sizeof(WCHAR_T) ) break;
363 n -= sizeof(WCHAR_T);
364 c = *++z; ++j;
365 if( c==0 ){
366 if( pFlags ) *pFlags |= LOOK_NUL;
367 result = 0; /* NUL character in a file -> binary */
368 }else if( c==UTF16BE_LF || c==UTF16LE_LF ){
369 int c2 = z[-1];
370 if( pFlags ){
371 *pFlags |= LOOK_LF;
372 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
373 *pFlags |= LOOK_CRLF;
374 }else{
375 *pFlags |= LOOK_LONE_LF;
376 }
@@ -378,17 +372,13 @@
378 if( j>UTF16_LENGTH_MASK ){
379 if( pFlags ) *pFlags |= LOOK_LENGTH;
380 result = 0; /* Very long line -> binary */
381 }
382 j = 0;
383 }else if( c==UTF16BE_CR || c==UTF16LE_CR ){
384 if( pFlags ){
385 *pFlags |= LOOK_CR;
386 if( n<=1 || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF) ){
387 *pFlags |= LOOK_LONE_CR;
388 }
389 }
390 }
391 }
392 if( j>UTF16_LENGTH_MASK ){
393 if( pFlags ) *pFlags |= LOOK_LENGTH;
394 result = 0; /* Very long line -> binary */
395
--- src/diff.c
+++ src/diff.c
@@ -69,17 +69,17 @@
69 ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
70 ** to convey status information about the blob content.
71 */
72 #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */
73 #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */
74 #define LOOK_LONE_CR ((int)0x00000002) /* An unpaired CR char was found. */
75 #define LOOK_LONE_LF ((int)0x00000004) /* An unpaired CR char was found. */
76 #define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */
77 #define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */
78 #define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */
79 #define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */
80 #define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */
81 #endif /* INTERFACE */
82
83 /*
84 ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes)
85 */
@@ -238,24 +238,23 @@
238 if( n==0 ) return result; /* Empty file -> text */
239 c = *z;
240 if( c==0 ){
241 if( pFlags ) *pFlags |= LOOK_NUL;
242 result = 0; /* NUL character in a file -> binary */
243 }else if( c=='\r' && pFlags && (n<2 || z[1]!='\n') ){
244 *pFlags |= LOOK_LONE_CR;
245 }
246 j = (c!='\n');
247 if( !j && pFlags ) *pFlags |= LOOK_LONE_LF;
248 while( --n>0 ){
249 int c2 = c;
250 c = *++z; ++j;
251 if( c==0 ){
252 if( pFlags ) *pFlags |= LOOK_NUL;
253 result = 0; /* NUL character in a file -> binary */
254 }else if( c=='\n' ){
 
255 if( pFlags ){
 
256 if( c2=='\r' ){
257 *pFlags |= LOOK_CRLF;
258 }else{
259 *pFlags |= LOOK_LONE_LF;
260 }
@@ -263,17 +262,12 @@
262 if( j>LENGTH_MASK ){
263 if( pFlags ) *pFlags |= LOOK_LENGTH;
264 result = 0; /* Very long line -> binary */
265 }
266 j = 0;
267 }else if( c=='\r' && pFlags && (n<1 || z[1]!='\n') ){
268 *pFlags |= LOOK_LONE_CR;
 
 
 
 
 
269 }
270 }
271 if( j>LENGTH_MASK ){
272 if( pFlags ) *pFlags |= LOOK_LENGTH;
273 result = 0; /* Very long line -> binary */
@@ -351,26 +345,26 @@
345 }
346 c = *z;
347 if( c==0 ){
348 if( pFlags ) *pFlags |= LOOK_NUL;
349 result = 0; /* NUL character in a file -> binary */
350 }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags
351 && (n<(2*sizeof(WCHAR_T)) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){
352 *pFlags |= LOOK_LONE_CR;
353 }
354 j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
355 if( !j && pFlags ) *pFlags |= LOOK_LONE_LF;
356 while( 1 ){
357 int c2 = c;
358 if ( n<sizeof(WCHAR_T) ) break;
359 n -= sizeof(WCHAR_T);
360 c = *++z; ++j;
361 if( c==0 ){
362 if( pFlags ) *pFlags |= LOOK_NUL;
363 result = 0; /* NUL character in a file -> binary */
364 }else if( c==UTF16BE_LF || c==UTF16LE_LF ){
 
365 if( pFlags ){
 
366 if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
367 *pFlags |= LOOK_CRLF;
368 }else{
369 *pFlags |= LOOK_LONE_LF;
370 }
@@ -378,17 +372,13 @@
372 if( j>UTF16_LENGTH_MASK ){
373 if( pFlags ) *pFlags |= LOOK_LENGTH;
374 result = 0; /* Very long line -> binary */
375 }
376 j = 0;
377 }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags
378 && (n<sizeof(WCHAR_T) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){
379 *pFlags |= LOOK_LONE_CR;
 
 
 
 
380 }
381 }
382 if( j>UTF16_LENGTH_MASK ){
383 if( pFlags ) *pFlags |= LOOK_LENGTH;
384 result = 0; /* Very long line -> binary */
385

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button