Fossil SCM

Fix overly eager byte swapping when checking for UTF-16 text.

mistachkin 2013-03-19 17:37 UTC bomRefactor
Commit 3f2f1e62fa1570174572b30e86932916414003b8
1 file changed +6 -5
+6 -5
--- src/diff.c
+++ src/diff.c
@@ -295,18 +295,19 @@
295295
/*
296296
** Maximum length of a line in a text file, in UTF-16 characters. (4096)
297297
** The number of bytes represented by this value cannot exceed LENGTH_MASK
298298
** bytes, because that is the line buffer size used by the diff engine.
299299
*/
300
-#define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char)))
301
-#define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1)
300
+#define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char)))
301
+#define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1)
302302
303303
/*
304304
** This macro is used to swap the byte order of a UTF-16 character in the
305305
** looks_like_utf16() function.
306306
*/
307
-#define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF))
307
+#define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF))
308
+#define UTF16_SWAP_IF(expr,ch) ((expr) ? UTF16_SWAP((ch)) : (ch))
308309
309310
/*
310311
** This function attempts to scan each logical line within the blob to
311312
** determine the type of content it appears to contain. The return value
312313
** is a combination of one or more of the LOOK_XXX flags (see above):
@@ -354,11 +355,11 @@
354355
}
355356
if( c==0 ){
356357
flags |= LOOK_NUL; /* NUL character in a file -> binary */
357358
}else if( c=='\r' ){
358359
flags |= LOOK_CR;
359
- if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){
360
+ if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
360361
flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
361362
}
362363
}
363364
j = (c!='\n');
364365
if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */
@@ -384,11 +385,11 @@
384385
flags |= LOOK_LONG; /* Very long line -> binary */
385386
}
386387
j = 0;
387388
}else if( c=='\r' ){
388389
flags |= LOOK_CR;
389
- if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){
390
+ if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
390391
flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
391392
}
392393
}
393394
}
394395
if( j>UTF16_LENGTH_MASK ){
395396
--- src/diff.c
+++ src/diff.c
@@ -295,18 +295,19 @@
295 /*
296 ** Maximum length of a line in a text file, in UTF-16 characters. (4096)
297 ** The number of bytes represented by this value cannot exceed LENGTH_MASK
298 ** bytes, because that is the line buffer size used by the diff engine.
299 */
300 #define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char)))
301 #define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1)
302
303 /*
304 ** This macro is used to swap the byte order of a UTF-16 character in the
305 ** looks_like_utf16() function.
306 */
307 #define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF))
 
308
309 /*
310 ** This function attempts to scan each logical line within the blob to
311 ** determine the type of content it appears to contain. The return value
312 ** is a combination of one or more of the LOOK_XXX flags (see above):
@@ -354,11 +355,11 @@
354 }
355 if( c==0 ){
356 flags |= LOOK_NUL; /* NUL character in a file -> binary */
357 }else if( c=='\r' ){
358 flags |= LOOK_CR;
359 if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){
360 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
361 }
362 }
363 j = (c!='\n');
364 if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */
@@ -384,11 +385,11 @@
384 flags |= LOOK_LONG; /* Very long line -> binary */
385 }
386 j = 0;
387 }else if( c=='\r' ){
388 flags |= LOOK_CR;
389 if( n<=sizeof(WCHAR_T) || UTF16_SWAP(z[1])!='\n' ){
390 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
391 }
392 }
393 }
394 if( j>UTF16_LENGTH_MASK ){
395
--- src/diff.c
+++ src/diff.c
@@ -295,18 +295,19 @@
295 /*
296 ** Maximum length of a line in a text file, in UTF-16 characters. (4096)
297 ** The number of bytes represented by this value cannot exceed LENGTH_MASK
298 ** bytes, because that is the line buffer size used by the diff engine.
299 */
300 #define UTF16_LENGTH_MASK_SZ (LENGTH_MASK_SZ-(sizeof(WCHAR_T)-sizeof(char)))
301 #define UTF16_LENGTH_MASK ((1<<UTF16_LENGTH_MASK_SZ)-1)
302
303 /*
304 ** This macro is used to swap the byte order of a UTF-16 character in the
305 ** looks_like_utf16() function.
306 */
307 #define UTF16_SWAP(ch) ((((ch) << 8) & 0xFF00) | (((ch) >> 8) & 0xFF))
308 #define UTF16_SWAP_IF(expr,ch) ((expr) ? UTF16_SWAP((ch)) : (ch))
309
310 /*
311 ** This function attempts to scan each logical line within the blob to
312 ** determine the type of content it appears to contain. The return value
313 ** is a combination of one or more of the LOOK_XXX flags (see above):
@@ -354,11 +355,11 @@
355 }
356 if( c==0 ){
357 flags |= LOOK_NUL; /* NUL character in a file -> binary */
358 }else if( c=='\r' ){
359 flags |= LOOK_CR;
360 if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
361 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
362 }
363 }
364 j = (c!='\n');
365 if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */
@@ -384,11 +385,11 @@
385 flags |= LOOK_LONG; /* Very long line -> binary */
386 }
387 j = 0;
388 }else if( c=='\r' ){
389 flags |= LOOK_CR;
390 if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
391 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
392 }
393 }
394 }
395 if( j>UTF16_LENGTH_MASK ){
396

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button