Fossil SCM
Eliminate use of starts_with_utf16(be|le)_bom functions, starts_with_utf16_bom should be enough. External code will typically call "starts_with_utf16_bom" first, and if it returns true call "blob_to_utf8_no_bom" converting it to utf-8. There is no reason any more then for external code to know wheter the BOM was le or be.
Commit
fab09a17105957aac5ced72bfbf65e1fc6ae0429
Parent
c31bbd40846e3ca…
2 files changed
+10
-15
-38
+10
-15
| --- src/blob.c | ||
| +++ src/blob.c | ||
| @@ -1104,26 +1104,21 @@ | ||
| 1104 | 1104 | blob_zero(&temp); |
| 1105 | 1105 | blob_append(&temp, zUtf8, -1); |
| 1106 | 1106 | blob_swap(pBlob, &temp); |
| 1107 | 1107 | blob_reset(&temp); |
| 1108 | 1108 | #ifdef _WIN32 |
| 1109 | - }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){ | |
| 1110 | - /* Make sure the blob contains two terminating 0-bytes */ | |
| 1111 | - blob_append(pBlob, "", 1); | |
| 1112 | - zUtf8 = blob_str(pBlob) + bomSize; | |
| 1113 | - zUtf8 = fossil_unicode_to_utf8(zUtf8); | |
| 1114 | - blob_zero(pBlob); | |
| 1115 | - blob_append(pBlob, zUtf8, -1); | |
| 1116 | - fossil_unicode_free(zUtf8); | |
| 1117 | - }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){ | |
| 1118 | - unsigned int i = blob_size(pBlob); | |
| 1109 | + }else if( starts_with_utf16_bom(pBlob, &bomSize) ){ | |
| 1119 | 1110 | zUtf8 = blob_buffer(pBlob); |
| 1120 | - while( i > 0 ){ | |
| 1121 | - /* swap bytes of unicode representation */ | |
| 1122 | - char zTemp = zUtf8[--i]; | |
| 1123 | - zUtf8[i] = zUtf8[i-1]; | |
| 1124 | - zUtf8[--i] = zTemp; | |
| 1111 | + if (*((unsigned short *)zUtf8) == 0xfffe) { | |
| 1112 | + /* Found BOM, but with reversed bytes */ | |
| 1113 | + unsigned int i = blob_size(pBlob); | |
| 1114 | + while( i > 0 ){ | |
| 1115 | + /* swap bytes of unicode representation */ | |
| 1116 | + char zTemp = zUtf8[--i]; | |
| 1117 | + zUtf8[i] = zUtf8[i-1]; | |
| 1118 | + zUtf8[--i] = zTemp; | |
| 1119 | + } | |
| 1125 | 1120 | } |
| 1126 | 1121 | /* Make sure the blob contains two terminating 0-bytes */ |
| 1127 | 1122 | blob_append(pBlob, "", 1); |
| 1128 | 1123 | zUtf8 = blob_str(pBlob) + bomSize; |
| 1129 | 1124 | zUtf8 = fossil_unicode_to_utf8(zUtf8); |
| 1130 | 1125 |
| --- src/blob.c | |
| +++ src/blob.c | |
| @@ -1104,26 +1104,21 @@ | |
| 1104 | blob_zero(&temp); |
| 1105 | blob_append(&temp, zUtf8, -1); |
| 1106 | blob_swap(pBlob, &temp); |
| 1107 | blob_reset(&temp); |
| 1108 | #ifdef _WIN32 |
| 1109 | }else if( starts_with_utf16le_bom(pBlob, &bomSize) ){ |
| 1110 | /* Make sure the blob contains two terminating 0-bytes */ |
| 1111 | blob_append(pBlob, "", 1); |
| 1112 | zUtf8 = blob_str(pBlob) + bomSize; |
| 1113 | zUtf8 = fossil_unicode_to_utf8(zUtf8); |
| 1114 | blob_zero(pBlob); |
| 1115 | blob_append(pBlob, zUtf8, -1); |
| 1116 | fossil_unicode_free(zUtf8); |
| 1117 | }else if( starts_with_utf16be_bom(pBlob, &bomSize) ){ |
| 1118 | unsigned int i = blob_size(pBlob); |
| 1119 | zUtf8 = blob_buffer(pBlob); |
| 1120 | while( i > 0 ){ |
| 1121 | /* swap bytes of unicode representation */ |
| 1122 | char zTemp = zUtf8[--i]; |
| 1123 | zUtf8[i] = zUtf8[i-1]; |
| 1124 | zUtf8[--i] = zTemp; |
| 1125 | } |
| 1126 | /* Make sure the blob contains two terminating 0-bytes */ |
| 1127 | blob_append(pBlob, "", 1); |
| 1128 | zUtf8 = blob_str(pBlob) + bomSize; |
| 1129 | zUtf8 = fossil_unicode_to_utf8(zUtf8); |
| 1130 |
| --- src/blob.c | |
| +++ src/blob.c | |
| @@ -1104,26 +1104,21 @@ | |
| 1104 | blob_zero(&temp); |
| 1105 | blob_append(&temp, zUtf8, -1); |
| 1106 | blob_swap(pBlob, &temp); |
| 1107 | blob_reset(&temp); |
| 1108 | #ifdef _WIN32 |
| 1109 | }else if( starts_with_utf16_bom(pBlob, &bomSize) ){ |
| 1110 | zUtf8 = blob_buffer(pBlob); |
| 1111 | if (*((unsigned short *)zUtf8) == 0xfffe) { |
| 1112 | /* Found BOM, but with reversed bytes */ |
| 1113 | unsigned int i = blob_size(pBlob); |
| 1114 | while( i > 0 ){ |
| 1115 | /* swap bytes of unicode representation */ |
| 1116 | char zTemp = zUtf8[--i]; |
| 1117 | zUtf8[i] = zUtf8[i-1]; |
| 1118 | zUtf8[--i] = zTemp; |
| 1119 | } |
| 1120 | } |
| 1121 | /* Make sure the blob contains two terminating 0-bytes */ |
| 1122 | blob_append(pBlob, "", 1); |
| 1123 | zUtf8 = blob_str(pBlob) + bomSize; |
| 1124 | zUtf8 = fossil_unicode_to_utf8(zUtf8); |
| 1125 |
-38
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -365,48 +365,10 @@ | ||
| 365 | 365 | |
| 366 | 366 | if( pnByte ) *pnByte = 2; |
| 367 | 367 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 368 | 368 | c1 = ((unsigned short *)z)[0]; |
| 369 | 369 | if( (c1==0xfeff) || (c1==0xfffe) ){ |
| 370 | - if( blob_size(pContent) < 4 ) return 1; | |
| 371 | - c1 = ((unsigned short *)z)[1]; | |
| 372 | - if( c1 != 0 ) return 1; | |
| 373 | - } | |
| 374 | - return 0; | |
| 375 | -} | |
| 376 | - | |
| 377 | -/* | |
| 378 | -** This function returns non-zero if the blob starts with a UTF-16le | |
| 379 | -** byte-order-mark (BOM). | |
| 380 | -*/ | |
| 381 | -int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){ | |
| 382 | - const char *z = blob_buffer(pContent); | |
| 383 | - int c1; | |
| 384 | - | |
| 385 | - if( pnByte ) *pnByte = 2; | |
| 386 | - if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; | |
| 387 | - c1 = ((unsigned short *)z)[0]; | |
| 388 | - if( c1==0xfeff ){ | |
| 389 | - if( blob_size(pContent) < 4 ) return 1; | |
| 390 | - c1 = ((unsigned short *)z)[1]; | |
| 391 | - if( c1 != 0 ) return 1; | |
| 392 | - } | |
| 393 | - return 0; | |
| 394 | -} | |
| 395 | - | |
| 396 | -/* | |
| 397 | -** This function returns non-zero if the blob starts with a UTF-16be | |
| 398 | -** byte-order-mark (BOM). | |
| 399 | -*/ | |
| 400 | -int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){ | |
| 401 | - const char *z = blob_buffer(pContent); | |
| 402 | - int c1; | |
| 403 | - | |
| 404 | - if( pnByte ) *pnByte = 2; | |
| 405 | - if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; | |
| 406 | - c1 = ((unsigned short *)z)[0]; | |
| 407 | - if( c1==0xfffe ){ | |
| 408 | 370 | if( blob_size(pContent) < 4 ) return 1; |
| 409 | 371 | c1 = ((unsigned short *)z)[1]; |
| 410 | 372 | if( c1 != 0 ) return 1; |
| 411 | 373 | } |
| 412 | 374 | return 0; |
| 413 | 375 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -365,48 +365,10 @@ | |
| 365 | |
| 366 | if( pnByte ) *pnByte = 2; |
| 367 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 368 | c1 = ((unsigned short *)z)[0]; |
| 369 | if( (c1==0xfeff) || (c1==0xfffe) ){ |
| 370 | if( blob_size(pContent) < 4 ) return 1; |
| 371 | c1 = ((unsigned short *)z)[1]; |
| 372 | if( c1 != 0 ) return 1; |
| 373 | } |
| 374 | return 0; |
| 375 | } |
| 376 | |
| 377 | /* |
| 378 | ** This function returns non-zero if the blob starts with a UTF-16le |
| 379 | ** byte-order-mark (BOM). |
| 380 | */ |
| 381 | int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){ |
| 382 | const char *z = blob_buffer(pContent); |
| 383 | int c1; |
| 384 | |
| 385 | if( pnByte ) *pnByte = 2; |
| 386 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 387 | c1 = ((unsigned short *)z)[0]; |
| 388 | if( c1==0xfeff ){ |
| 389 | if( blob_size(pContent) < 4 ) return 1; |
| 390 | c1 = ((unsigned short *)z)[1]; |
| 391 | if( c1 != 0 ) return 1; |
| 392 | } |
| 393 | return 0; |
| 394 | } |
| 395 | |
| 396 | /* |
| 397 | ** This function returns non-zero if the blob starts with a UTF-16be |
| 398 | ** byte-order-mark (BOM). |
| 399 | */ |
| 400 | int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){ |
| 401 | const char *z = blob_buffer(pContent); |
| 402 | int c1; |
| 403 | |
| 404 | if( pnByte ) *pnByte = 2; |
| 405 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 406 | c1 = ((unsigned short *)z)[0]; |
| 407 | if( c1==0xfffe ){ |
| 408 | if( blob_size(pContent) < 4 ) return 1; |
| 409 | c1 = ((unsigned short *)z)[1]; |
| 410 | if( c1 != 0 ) return 1; |
| 411 | } |
| 412 | return 0; |
| 413 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -365,48 +365,10 @@ | |
| 365 | |
| 366 | if( pnByte ) *pnByte = 2; |
| 367 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 368 | c1 = ((unsigned short *)z)[0]; |
| 369 | if( (c1==0xfeff) || (c1==0xfffe) ){ |
| 370 | if( blob_size(pContent) < 4 ) return 1; |
| 371 | c1 = ((unsigned short *)z)[1]; |
| 372 | if( c1 != 0 ) return 1; |
| 373 | } |
| 374 | return 0; |
| 375 |