Fossil SCM

Re-apply off-by-one fix. Otherwise blobs like {0x00, 0x0a, 0x00} will result in access of the 4th byte of the BLOB. In test-looks-like-utf, could_be_utf16() should be called always, otherwise the behavior on UTF-16 without BOM will be endian-dependant.

jan.nijtmans 2013-05-14 07:45 trunk
Commit e545d3514e29b964a014e76d166ad55e6f0849ea
1 file changed +5 -6
+5 -6
--- src/diff.c
+++ src/diff.c
@@ -361,11 +361,11 @@
361361
}
362362
if( c==0 ){
363363
flags |= LOOK_NUL; /* NUL character in a file -> binary */
364364
}else if( c=='\r' ){
365365
flags |= LOOK_CR;
366
- if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
366
+ if( n<2*sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
367367
flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
368368
}
369369
}
370370
j = (c!='\n');
371371
if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */
@@ -2600,20 +2600,19 @@
26002600
int fUtf16; /* return value of starts_with_utf16_bom() */
26012601
int fUnicode; /* return value of could_be_utf16() */
26022602
int lookFlags; /* output flags from looks_like_utf8/utf16() */
26032603
int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
26042604
int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */
2605
- int fForceUtf8 = find_option("utf8",0,0)!=0;
2606
- int fForceUtf16 = find_option("utf16",0,0)!=0;
26072605
if( g.argc!=3 ) usage("FILENAME");
26082606
blob_read_from_file(&blob, g.argv[2]);
26092607
fUtf8 = starts_with_utf8_bom(&blob, 0);
26102608
fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
2611
- if( fForceUtf8 ){
2609
+ fUnicode = could_be_utf16(&blob, &bRevUnicode);
2610
+ if( find_option("utf8",0,0)!=0 ){
26122611
fUnicode = 0;
2613
- }else{
2614
- fUnicode = fForceUtf16 || could_be_utf16(&blob, &bRevUnicode);
2612
+ }else if( find_option("utf16",0,0)!=0 ){
2613
+ fUnicode = 1;
26152614
}
26162615
lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) :
26172616
looks_like_utf8(&blob, 0);
26182617
fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
26192618
fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
26202619
--- src/diff.c
+++ src/diff.c
@@ -361,11 +361,11 @@
361 }
362 if( c==0 ){
363 flags |= LOOK_NUL; /* NUL character in a file -> binary */
364 }else if( c=='\r' ){
365 flags |= LOOK_CR;
366 if( n<=sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
367 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
368 }
369 }
370 j = (c!='\n');
371 if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */
@@ -2600,20 +2600,19 @@
2600 int fUtf16; /* return value of starts_with_utf16_bom() */
2601 int fUnicode; /* return value of could_be_utf16() */
2602 int lookFlags; /* output flags from looks_like_utf8/utf16() */
2603 int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
2604 int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */
2605 int fForceUtf8 = find_option("utf8",0,0)!=0;
2606 int fForceUtf16 = find_option("utf16",0,0)!=0;
2607 if( g.argc!=3 ) usage("FILENAME");
2608 blob_read_from_file(&blob, g.argv[2]);
2609 fUtf8 = starts_with_utf8_bom(&blob, 0);
2610 fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
2611 if( fForceUtf8 ){
 
2612 fUnicode = 0;
2613 }else{
2614 fUnicode = fForceUtf16 || could_be_utf16(&blob, &bRevUnicode);
2615 }
2616 lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) :
2617 looks_like_utf8(&blob, 0);
2618 fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2619 fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2620
--- src/diff.c
+++ src/diff.c
@@ -361,11 +361,11 @@
361 }
362 if( c==0 ){
363 flags |= LOOK_NUL; /* NUL character in a file -> binary */
364 }else if( c=='\r' ){
365 flags |= LOOK_CR;
366 if( n<2*sizeof(WCHAR_T) || UTF16_SWAP_IF(bReverse, z[1])!='\n' ){
367 flags |= LOOK_LONE_CR; /* More chars, next char is not LF */
368 }
369 }
370 j = (c!='\n');
371 if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF); /* Found LF as first char */
@@ -2600,20 +2600,19 @@
2600 int fUtf16; /* return value of starts_with_utf16_bom() */
2601 int fUnicode; /* return value of could_be_utf16() */
2602 int lookFlags; /* output flags from looks_like_utf8/utf16() */
2603 int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
2604 int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */
 
 
2605 if( g.argc!=3 ) usage("FILENAME");
2606 blob_read_from_file(&blob, g.argv[2]);
2607 fUtf8 = starts_with_utf8_bom(&blob, 0);
2608 fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
2609 fUnicode = could_be_utf16(&blob, &bRevUnicode);
2610 if( find_option("utf8",0,0)!=0 ){
2611 fUnicode = 0;
2612 }else if( find_option("utf16",0,0)!=0 ){
2613 fUnicode = 1;
2614 }
2615 lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) :
2616 looks_like_utf8(&blob, 0);
2617 fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
2618 fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
2619

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button