Fossil SCM

Eliminate use of starts_with_utf16(be|le)_bom functions, starts_with_utf16_bom should be enough. External code will typically call "starts_with_utf16_bom" first, and if it returns true call "blob_to_utf8_no_bom" converting it to utf-8. There is no reason any more then for external code to know wheter the BOM was le or be.

jan.nijtmans 2013-02-08 09:37 trunk
Commit fab09a17105957aac5ced72bfbf65e1fc6ae0429
2 files changed +10 -15 -38
+10 -15
--- src/blob.c
+++ src/blob.c
@@ -1104,26 +1104,21 @@
11041104
blob_zero(&temp);
11051105
blob_append(&temp, zUtf8, -1);
11061106
blob_swap(pBlob, &temp);
11071107
blob_reset(&temp);
11081108
#ifdef _WIN32
1109
- }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1110
- /* Make sure the blob contains two terminating 0-bytes */
1111
- blob_append(pBlob, "", 1);
1112
- zUtf8 = blob_str(pBlob) + bomSize;
1113
- zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114
- blob_zero(pBlob);
1115
- blob_append(pBlob, zUtf8, -1);
1116
- fossil_unicode_free(zUtf8);
1117
- }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1118
- unsigned int i = blob_size(pBlob);
1109
+ }else if( starts_with_utf16_bom(pBlob, &bomSize) ){
11191110
zUtf8 = blob_buffer(pBlob);
1120
- while( i > 0 ){
1121
- /* swap bytes of unicode representation */
1122
- char zTemp = zUtf8[--i];
1123
- zUtf8[i] = zUtf8[i-1];
1124
- zUtf8[--i] = zTemp;
1111
+ if (*((unsigned short *)zUtf8) == 0xfffe) {
1112
+ /* Found BOM, but with reversed bytes */
1113
+ unsigned int i = blob_size(pBlob);
1114
+ while( i > 0 ){
1115
+ /* swap bytes of unicode representation */
1116
+ char zTemp = zUtf8[--i];
1117
+ zUtf8[i] = zUtf8[i-1];
1118
+ zUtf8[--i] = zTemp;
1119
+ }
11251120
}
11261121
/* Make sure the blob contains two terminating 0-bytes */
11271122
blob_append(pBlob, "", 1);
11281123
zUtf8 = blob_str(pBlob) + bomSize;
11291124
zUtf8 = fossil_unicode_to_utf8(zUtf8);
11301125
--- src/blob.c
+++ src/blob.c
@@ -1104,26 +1104,21 @@
1104 blob_zero(&temp);
1105 blob_append(&temp, zUtf8, -1);
1106 blob_swap(pBlob, &temp);
1107 blob_reset(&temp);
1108 #ifdef _WIN32
1109 }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){
1110 /* Make sure the blob contains two terminating 0-bytes */
1111 blob_append(pBlob, "", 1);
1112 zUtf8 = blob_str(pBlob) + bomSize;
1113 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1114 blob_zero(pBlob);
1115 blob_append(pBlob, zUtf8, -1);
1116 fossil_unicode_free(zUtf8);
1117 }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){
1118 unsigned int i = blob_size(pBlob);
1119 zUtf8 = blob_buffer(pBlob);
1120 while( i > 0 ){
1121 /* swap bytes of unicode representation */
1122 char zTemp = zUtf8[--i];
1123 zUtf8[i] = zUtf8[i-1];
1124 zUtf8[--i] = zTemp;
 
 
 
 
1125 }
1126 /* Make sure the blob contains two terminating 0-bytes */
1127 blob_append(pBlob, "", 1);
1128 zUtf8 = blob_str(pBlob) + bomSize;
1129 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1130
--- src/blob.c
+++ src/blob.c
@@ -1104,26 +1104,21 @@
1104 blob_zero(&temp);
1105 blob_append(&temp, zUtf8, -1);
1106 blob_swap(pBlob, &temp);
1107 blob_reset(&temp);
1108 #ifdef _WIN32
1109 }else if( starts_with_utf16_bom(pBlob, &bomSize) ){
 
 
 
 
 
 
 
 
 
1110 zUtf8 = blob_buffer(pBlob);
1111 if (*((unsigned short *)zUtf8) == 0xfffe) {
1112 /* Found BOM, but with reversed bytes */
1113 unsigned int i = blob_size(pBlob);
1114 while( i > 0 ){
1115 /* swap bytes of unicode representation */
1116 char zTemp = zUtf8[--i];
1117 zUtf8[i] = zUtf8[i-1];
1118 zUtf8[--i] = zTemp;
1119 }
1120 }
1121 /* Make sure the blob contains two terminating 0-bytes */
1122 blob_append(pBlob, "", 1);
1123 zUtf8 = blob_str(pBlob) + bomSize;
1124 zUtf8 = fossil_unicode_to_utf8(zUtf8);
1125
-38
--- src/diff.c
+++ src/diff.c
@@ -365,48 +365,10 @@
365365
366366
if( pnByte ) *pnByte = 2;
367367
if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
368368
c1 = ((unsigned short *)z)[0];
369369
if( (c1==0xfeff) || (c1==0xfffe) ){
370
- if( blob_size(pContent) < 4 ) return 1;
371
- c1 = ((unsigned short *)z)[1];
372
- if( c1 != 0 ) return 1;
373
- }
374
- return 0;
375
-}
376
-
377
-/*
378
-** This function returns non-zero if the blob starts with a UTF-16le
379
-** byte-order-mark (BOM).
380
-*/
381
-int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
382
- const char *z = blob_buffer(pContent);
383
- int c1;
384
-
385
- if( pnByte ) *pnByte = 2;
386
- if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
387
- c1 = ((unsigned short *)z)[0];
388
- if( c1==0xfeff ){
389
- if( blob_size(pContent) < 4 ) return 1;
390
- c1 = ((unsigned short *)z)[1];
391
- if( c1 != 0 ) return 1;
392
- }
393
- return 0;
394
-}
395
-
396
-/*
397
-** This function returns non-zero if the blob starts with a UTF-16be
398
-** byte-order-mark (BOM).
399
-*/
400
-int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
401
- const char *z = blob_buffer(pContent);
402
- int c1;
403
-
404
- if( pnByte ) *pnByte = 2;
405
- if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
406
- c1 = ((unsigned short *)z)[0];
407
- if( c1==0xfffe ){
408370
if( blob_size(pContent) < 4 ) return 1;
409371
c1 = ((unsigned short *)z)[1];
410372
if( c1 != 0 ) return 1;
411373
}
412374
return 0;
413375
--- src/diff.c
+++ src/diff.c
@@ -365,48 +365,10 @@
365
366 if( pnByte ) *pnByte = 2;
367 if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
368 c1 = ((unsigned short *)z)[0];
369 if( (c1==0xfeff) || (c1==0xfffe) ){
370 if( blob_size(pContent) < 4 ) return 1;
371 c1 = ((unsigned short *)z)[1];
372 if( c1 != 0 ) return 1;
373 }
374 return 0;
375 }
376
377 /*
378 ** This function returns non-zero if the blob starts with a UTF-16le
379 ** byte-order-mark (BOM).
380 */
381 int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
382 const char *z = blob_buffer(pContent);
383 int c1;
384
385 if( pnByte ) *pnByte = 2;
386 if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
387 c1 = ((unsigned short *)z)[0];
388 if( c1==0xfeff ){
389 if( blob_size(pContent) < 4 ) return 1;
390 c1 = ((unsigned short *)z)[1];
391 if( c1 != 0 ) return 1;
392 }
393 return 0;
394 }
395
396 /*
397 ** This function returns non-zero if the blob starts with a UTF-16be
398 ** byte-order-mark (BOM).
399 */
400 int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
401 const char *z = blob_buffer(pContent);
402 int c1;
403
404 if( pnByte ) *pnByte = 2;
405 if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
406 c1 = ((unsigned short *)z)[0];
407 if( c1==0xfffe ){
408 if( blob_size(pContent) < 4 ) return 1;
409 c1 = ((unsigned short *)z)[1];
410 if( c1 != 0 ) return 1;
411 }
412 return 0;
413
--- src/diff.c
+++ src/diff.c
@@ -365,48 +365,10 @@
365
366 if( pnByte ) *pnByte = 2;
367 if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
368 c1 = ((unsigned short *)z)[0];
369 if( (c1==0xfeff) || (c1==0xfffe) ){
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370 if( blob_size(pContent) < 4 ) return 1;
371 c1 = ((unsigned short *)z)[1];
372 if( c1 != 0 ) return 1;
373 }
374 return 0;
375

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button