Fossil SCM
Fix detection of LOOK_LONE_CR/LOOK_LONE_LF for the first byte/character of the file. Fix off-by-one error in utf8 LOOK_LONE_CR detection, which could access one byte past the blob. Simplify the code, making use of the know relation between some LOOK_XXX flags.
Commit
ce9d6abea2010dbbd631065ed020d0624621bbb5
Parent
5a9f855810b49d7…
1 file changed
+21
-31
+21
-31
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -69,17 +69,17 @@ | ||
| 69 | 69 | ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used |
| 70 | 70 | ** to convey status information about the blob content. |
| 71 | 71 | */ |
| 72 | 72 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ |
| 73 | 73 | #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */ |
| 74 | -#define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */ | |
| 75 | -#define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */ | |
| 76 | -#define LOOK_LF ((int)0x00000008) /* One or more LF chars were found. */ | |
| 77 | -#define LOOK_LONE_LF ((int)0x00000010) /* An unpaired CR char was found. */ | |
| 78 | -#define LOOK_CRLF ((int)0x00000020) /* One or more CR/LF pairs were found. */ | |
| 79 | -#define LOOK_LENGTH ((int)0x00000040) /* An over length line was found. */ | |
| 80 | -#define LOOK_ODD ((int)0x00000080) /* An odd number of bytes was found. */ | |
| 74 | +#define LOOK_LONE_CR ((int)0x00000002) /* An unpaired CR char was found. */ | |
| 75 | +#define LOOK_LONE_LF ((int)0x00000004) /* An unpaired CR char was found. */ | |
| 76 | +#define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */ | |
| 77 | +#define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */ | |
| 78 | +#define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */ | |
| 79 | +#define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */ | |
| 80 | +#define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */ | |
| 81 | 81 | #endif /* INTERFACE */ |
| 82 | 82 | |
| 83 | 83 | /* |
| 84 | 84 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 85 | 85 | */ |
| @@ -238,24 +238,23 @@ | ||
| 238 | 238 | if( n==0 ) return result; /* Empty file -> text */ |
| 239 | 239 | c = *z; |
| 240 | 240 | if( c==0 ){ |
| 241 | 241 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 242 | 242 | result = 0; /* NUL character in a file -> binary */ |
| 243 | - }else if( (c=='\r') && pFlags ){ | |
| 244 | - *pFlags |= LOOK_CR; | |
| 243 | + }else if( c=='\r' && pFlags && (n<2 || z[1]!='\n') ){ | |
| 244 | + *pFlags |= LOOK_LONE_CR; | |
| 245 | 245 | } |
| 246 | 246 | j = (c!='\n'); |
| 247 | - if( !j && pFlags ) *pFlags |= LOOK_LF; | |
| 247 | + if( !j && pFlags ) *pFlags |= LOOK_LONE_LF; | |
| 248 | 248 | while( --n>0 ){ |
| 249 | + int c2 = c; | |
| 249 | 250 | c = *++z; ++j; |
| 250 | 251 | if( c==0 ){ |
| 251 | 252 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 252 | 253 | result = 0; /* NUL character in a file -> binary */ |
| 253 | 254 | }else if( c=='\n' ){ |
| 254 | - int c2 = z[-1]; | |
| 255 | 255 | if( pFlags ){ |
| 256 | - *pFlags |= LOOK_LF; | |
| 257 | 256 | if( c2=='\r' ){ |
| 258 | 257 | *pFlags |= LOOK_CRLF; |
| 259 | 258 | }else{ |
| 260 | 259 | *pFlags |= LOOK_LONE_LF; |
| 261 | 260 | } |
| @@ -263,17 +262,12 @@ | ||
| 263 | 262 | if( j>LENGTH_MASK ){ |
| 264 | 263 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 265 | 264 | result = 0; /* Very long line -> binary */ |
| 266 | 265 | } |
| 267 | 266 | j = 0; |
| 268 | - }else if( c=='\r' ){ | |
| 269 | - if( pFlags ){ | |
| 270 | - *pFlags |= LOOK_CR; | |
| 271 | - if( n<=1 || z[1]!='\n' ){ | |
| 272 | - *pFlags |= LOOK_LONE_CR; | |
| 273 | - } | |
| 274 | - } | |
| 267 | + }else if( c=='\r' && pFlags && (n<1 || z[1]!='\n') ){ | |
| 268 | + *pFlags |= LOOK_LONE_CR; | |
| 275 | 269 | } |
| 276 | 270 | } |
| 277 | 271 | if( j>LENGTH_MASK ){ |
| 278 | 272 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 279 | 273 | result = 0; /* Very long line -> binary */ |
| @@ -351,26 +345,26 @@ | ||
| 351 | 345 | } |
| 352 | 346 | c = *z; |
| 353 | 347 | if( c==0 ){ |
| 354 | 348 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 355 | 349 | result = 0; /* NUL character in a file -> binary */ |
| 356 | - }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags ){ | |
| 357 | - *pFlags |= LOOK_CR; | |
| 350 | + }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags | |
| 351 | + && (n<(2*sizeof(WCHAR_T)) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){ | |
| 352 | + *pFlags |= LOOK_LONE_CR; | |
| 358 | 353 | } |
| 359 | 354 | j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF)); |
| 360 | - if( !j && pFlags ) *pFlags |= LOOK_LF; | |
| 355 | + if( !j && pFlags ) *pFlags |= LOOK_LONE_LF; | |
| 361 | 356 | while( 1 ){ |
| 357 | + int c2 = c; | |
| 362 | 358 | if ( n<sizeof(WCHAR_T) ) break; |
| 363 | 359 | n -= sizeof(WCHAR_T); |
| 364 | 360 | c = *++z; ++j; |
| 365 | 361 | if( c==0 ){ |
| 366 | 362 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 367 | 363 | result = 0; /* NUL character in a file -> binary */ |
| 368 | 364 | }else if( c==UTF16BE_LF || c==UTF16LE_LF ){ |
| 369 | - int c2 = z[-1]; | |
| 370 | 365 | if( pFlags ){ |
| 371 | - *pFlags |= LOOK_LF; | |
| 372 | 366 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 373 | 367 | *pFlags |= LOOK_CRLF; |
| 374 | 368 | }else{ |
| 375 | 369 | *pFlags |= LOOK_LONE_LF; |
| 376 | 370 | } |
| @@ -378,17 +372,13 @@ | ||
| 378 | 372 | if( j>UTF16_LENGTH_MASK ){ |
| 379 | 373 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 380 | 374 | result = 0; /* Very long line -> binary */ |
| 381 | 375 | } |
| 382 | 376 | j = 0; |
| 383 | - }else if( c==UTF16BE_CR || c==UTF16LE_CR ){ | |
| 384 | - if( pFlags ){ | |
| 385 | - *pFlags |= LOOK_CR; | |
| 386 | - if( n<=1 || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF) ){ | |
| 387 | - *pFlags |= LOOK_LONE_CR; | |
| 388 | - } | |
| 389 | - } | |
| 377 | + }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags | |
| 378 | + && (n<sizeof(WCHAR_T) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){ | |
| 379 | + *pFlags |= LOOK_LONE_CR; | |
| 390 | 380 | } |
| 391 | 381 | } |
| 392 | 382 | if( j>UTF16_LENGTH_MASK ){ |
| 393 | 383 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 394 | 384 | result = 0; /* Very long line -> binary */ |
| 395 | 385 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -69,17 +69,17 @@ | |
| 69 | ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used |
| 70 | ** to convey status information about the blob content. |
| 71 | */ |
| 72 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ |
| 73 | #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */ |
| 74 | #define LOOK_CR ((int)0x00000002) /* One or more CR chars were found. */ |
| 75 | #define LOOK_LONE_CR ((int)0x00000004) /* An unpaired CR char was found. */ |
| 76 | #define LOOK_LF ((int)0x00000008) /* One or more LF chars were found. */ |
| 77 | #define LOOK_LONE_LF ((int)0x00000010) /* An unpaired CR char was found. */ |
| 78 | #define LOOK_CRLF ((int)0x00000020) /* One or more CR/LF pairs were found. */ |
| 79 | #define LOOK_LENGTH ((int)0x00000040) /* An over length line was found. */ |
| 80 | #define LOOK_ODD ((int)0x00000080) /* An odd number of bytes was found. */ |
| 81 | #endif /* INTERFACE */ |
| 82 | |
| 83 | /* |
| 84 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 85 | */ |
| @@ -238,24 +238,23 @@ | |
| 238 | if( n==0 ) return result; /* Empty file -> text */ |
| 239 | c = *z; |
| 240 | if( c==0 ){ |
| 241 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 242 | result = 0; /* NUL character in a file -> binary */ |
| 243 | }else if( (c=='\r') && pFlags ){ |
| 244 | *pFlags |= LOOK_CR; |
| 245 | } |
| 246 | j = (c!='\n'); |
| 247 | if( !j && pFlags ) *pFlags |= LOOK_LF; |
| 248 | while( --n>0 ){ |
| 249 | c = *++z; ++j; |
| 250 | if( c==0 ){ |
| 251 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 252 | result = 0; /* NUL character in a file -> binary */ |
| 253 | }else if( c=='\n' ){ |
| 254 | int c2 = z[-1]; |
| 255 | if( pFlags ){ |
| 256 | *pFlags |= LOOK_LF; |
| 257 | if( c2=='\r' ){ |
| 258 | *pFlags |= LOOK_CRLF; |
| 259 | }else{ |
| 260 | *pFlags |= LOOK_LONE_LF; |
| 261 | } |
| @@ -263,17 +262,12 @@ | |
| 263 | if( j>LENGTH_MASK ){ |
| 264 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 265 | result = 0; /* Very long line -> binary */ |
| 266 | } |
| 267 | j = 0; |
| 268 | }else if( c=='\r' ){ |
| 269 | if( pFlags ){ |
| 270 | *pFlags |= LOOK_CR; |
| 271 | if( n<=1 || z[1]!='\n' ){ |
| 272 | *pFlags |= LOOK_LONE_CR; |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | if( j>LENGTH_MASK ){ |
| 278 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 279 | result = 0; /* Very long line -> binary */ |
| @@ -351,26 +345,26 @@ | |
| 351 | } |
| 352 | c = *z; |
| 353 | if( c==0 ){ |
| 354 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 355 | result = 0; /* NUL character in a file -> binary */ |
| 356 | }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags ){ |
| 357 | *pFlags |= LOOK_CR; |
| 358 | } |
| 359 | j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF)); |
| 360 | if( !j && pFlags ) *pFlags |= LOOK_LF; |
| 361 | while( 1 ){ |
| 362 | if ( n<sizeof(WCHAR_T) ) break; |
| 363 | n -= sizeof(WCHAR_T); |
| 364 | c = *++z; ++j; |
| 365 | if( c==0 ){ |
| 366 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 367 | result = 0; /* NUL character in a file -> binary */ |
| 368 | }else if( c==UTF16BE_LF || c==UTF16LE_LF ){ |
| 369 | int c2 = z[-1]; |
| 370 | if( pFlags ){ |
| 371 | *pFlags |= LOOK_LF; |
| 372 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 373 | *pFlags |= LOOK_CRLF; |
| 374 | }else{ |
| 375 | *pFlags |= LOOK_LONE_LF; |
| 376 | } |
| @@ -378,17 +372,13 @@ | |
| 378 | if( j>UTF16_LENGTH_MASK ){ |
| 379 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 380 | result = 0; /* Very long line -> binary */ |
| 381 | } |
| 382 | j = 0; |
| 383 | }else if( c==UTF16BE_CR || c==UTF16LE_CR ){ |
| 384 | if( pFlags ){ |
| 385 | *pFlags |= LOOK_CR; |
| 386 | if( n<=1 || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF) ){ |
| 387 | *pFlags |= LOOK_LONE_CR; |
| 388 | } |
| 389 | } |
| 390 | } |
| 391 | } |
| 392 | if( j>UTF16_LENGTH_MASK ){ |
| 393 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 394 | result = 0; /* Very long line -> binary */ |
| 395 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -69,17 +69,17 @@ | |
| 69 | ** Output flags for the looks_like_utf8() and looks_like_utf16() routines used |
| 70 | ** to convey status information about the blob content. |
| 71 | */ |
| 72 | #define LOOK_NONE ((int)0x00000000) /* Nothing special was found. */ |
| 73 | #define LOOK_NUL ((int)0x00000001) /* One or more NUL chars were found. */ |
| 74 | #define LOOK_LONE_CR ((int)0x00000002) /* An unpaired CR char was found. */ |
| 75 | #define LOOK_LONE_LF ((int)0x00000004) /* An unpaired CR char was found. */ |
| 76 | #define LOOK_CRLF ((int)0x00000008) /* One or more CR/LF pairs were found. */ |
| 77 | #define LOOK_LENGTH ((int)0x00000010) /* An over length line was found. */ |
| 78 | #define LOOK_ODD ((int)0x00000020) /* An odd number of bytes was found. */ |
| 79 | #define LOOK_CR (LOOK_LONE_CR|LOOK_CRLF) /* One or more CR chars were found. */ |
| 80 | #define LOOK_LF (LOOK_LONE_LF|LOOK_CRLF) /* One or more LF chars were found. */ |
| 81 | #endif /* INTERFACE */ |
| 82 | |
| 83 | /* |
| 84 | ** Maximum length of a line in a text file, in bytes. (2**13 = 8192 bytes) |
| 85 | */ |
| @@ -238,24 +238,23 @@ | |
| 238 | if( n==0 ) return result; /* Empty file -> text */ |
| 239 | c = *z; |
| 240 | if( c==0 ){ |
| 241 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 242 | result = 0; /* NUL character in a file -> binary */ |
| 243 | }else if( c=='\r' && pFlags && (n<2 || z[1]!='\n') ){ |
| 244 | *pFlags |= LOOK_LONE_CR; |
| 245 | } |
| 246 | j = (c!='\n'); |
| 247 | if( !j && pFlags ) *pFlags |= LOOK_LONE_LF; |
| 248 | while( --n>0 ){ |
| 249 | int c2 = c; |
| 250 | c = *++z; ++j; |
| 251 | if( c==0 ){ |
| 252 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 253 | result = 0; /* NUL character in a file -> binary */ |
| 254 | }else if( c=='\n' ){ |
| 255 | if( pFlags ){ |
| 256 | if( c2=='\r' ){ |
| 257 | *pFlags |= LOOK_CRLF; |
| 258 | }else{ |
| 259 | *pFlags |= LOOK_LONE_LF; |
| 260 | } |
| @@ -263,17 +262,12 @@ | |
| 262 | if( j>LENGTH_MASK ){ |
| 263 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 264 | result = 0; /* Very long line -> binary */ |
| 265 | } |
| 266 | j = 0; |
| 267 | }else if( c=='\r' && pFlags && (n<1 || z[1]!='\n') ){ |
| 268 | *pFlags |= LOOK_LONE_CR; |
| 269 | } |
| 270 | } |
| 271 | if( j>LENGTH_MASK ){ |
| 272 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 273 | result = 0; /* Very long line -> binary */ |
| @@ -351,26 +345,26 @@ | |
| 345 | } |
| 346 | c = *z; |
| 347 | if( c==0 ){ |
| 348 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 349 | result = 0; /* NUL character in a file -> binary */ |
| 350 | }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags |
| 351 | && (n<(2*sizeof(WCHAR_T)) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){ |
| 352 | *pFlags |= LOOK_LONE_CR; |
| 353 | } |
| 354 | j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF)); |
| 355 | if( !j && pFlags ) *pFlags |= LOOK_LONE_LF; |
| 356 | while( 1 ){ |
| 357 | int c2 = c; |
| 358 | if ( n<sizeof(WCHAR_T) ) break; |
| 359 | n -= sizeof(WCHAR_T); |
| 360 | c = *++z; ++j; |
| 361 | if( c==0 ){ |
| 362 | if( pFlags ) *pFlags |= LOOK_NUL; |
| 363 | result = 0; /* NUL character in a file -> binary */ |
| 364 | }else if( c==UTF16BE_LF || c==UTF16LE_LF ){ |
| 365 | if( pFlags ){ |
| 366 | if( c2==UTF16BE_CR || c2==UTF16LE_CR ){ |
| 367 | *pFlags |= LOOK_CRLF; |
| 368 | }else{ |
| 369 | *pFlags |= LOOK_LONE_LF; |
| 370 | } |
| @@ -378,17 +372,13 @@ | |
| 372 | if( j>UTF16_LENGTH_MASK ){ |
| 373 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 374 | result = 0; /* Very long line -> binary */ |
| 375 | } |
| 376 | j = 0; |
| 377 | }else if( (c==UTF16BE_CR || c==UTF16LE_CR) && pFlags |
| 378 | && (n<sizeof(WCHAR_T) || (z[1]!=UTF16BE_LF && z[1]!=UTF16LE_LF)) ){ |
| 379 | *pFlags |= LOOK_LONE_CR; |
| 380 | } |
| 381 | } |
| 382 | if( j>UTF16_LENGTH_MASK ){ |
| 383 | if( pFlags ) *pFlags |= LOOK_LENGTH; |
| 384 | result = 0; /* Very long line -> binary */ |
| 385 |