Fossil SCM
Simplify/speedup starts_with_utf16_bom and enhance doc for it.
Commit
957be426253c20e2412c1df602b5b379ecad18c2
Parent
646c4a67f9a5b21…
2 files changed
+14
-19
+14
-19
+14
-19
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -363,38 +363,33 @@ | ||
| 363 | 363 | } |
| 364 | 364 | |
| 365 | 365 | /* |
| 366 | 366 | ** This function returns non-zero if the blob starts with a UTF-16 |
| 367 | 367 | ** byte-order-mark (BOM), either in the endianness of the machine |
| 368 | -** or in reversed byte order. | |
| 368 | +** or in reversed byte order. The UTF-32 BOM is ruled out by checking | |
| 369 | +** if the UTF-16 BOM is not immediately followed by (utf16) 0. | |
| 370 | +** pnByte and pbReverse are only set when the function returns 1. | |
| 369 | 371 | */ |
| 370 | 372 | int starts_with_utf16_bom( |
| 371 | 373 | const Blob *pContent, /* IN: Blob content to perform BOM detection on. */ |
| 372 | 374 | int *pnByte, /* OUT: The number of bytes used for the BOM. */ |
| 373 | 375 | int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */ |
| 374 | 376 | ){ |
| 375 | - const char *z = blob_buffer(pContent); | |
| 376 | - int bomSize = 2; | |
| 377 | - static const unsigned short bom = 0xfeff; | |
| 378 | - static const unsigned short bom_reversed = 0xfffe; | |
| 379 | - static const unsigned short null = 0; | |
| 380 | - int size; | |
| 381 | - | |
| 382 | - if( pnByte ) *pnByte = bomSize; | |
| 383 | - if( pbReverse ) *pbReverse = -1; /* Unknown. */ | |
| 384 | - size = blob_size(pContent); | |
| 385 | - if( (size<bomSize) || (size%2) ) return 0; | |
| 386 | - if( memcmp(z, &bom_reversed, bomSize)==0 ){ | |
| 377 | + const unsigned short *z = (unsigned short *)blob_buffer(pContent); | |
| 378 | + int size = blob_size(pContent); | |
| 379 | + | |
| 380 | + if( (size<2) || (size%2) | |
| 381 | + || (size>=4 && z[1]==0) ) return 0; | |
| 382 | + if( z[0] == 0xfffe ){ | |
| 387 | 383 | if( pbReverse ) *pbReverse = 1; |
| 388 | - if( size<(2*bomSize) ) return 1; | |
| 389 | - if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; | |
| 390 | - }else if( memcmp(z, &bom, bomSize)==0 ){ | |
| 384 | + }else if( z[0] == 0xfeff ){ | |
| 391 | 385 | if( pbReverse ) *pbReverse = 0; |
| 392 | - if( size<(2*bomSize) ) return 1; | |
| 393 | - if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; | |
| 386 | + }else{ | |
| 387 | + return 0; | |
| 394 | 388 | } |
| 395 | - return 0; | |
| 389 | + if( pnByte ) *pnByte = 2; | |
| 390 | + return 1; | |
| 396 | 391 | } |
| 397 | 392 | |
| 398 | 393 | /* |
| 399 | 394 | ** Return true if two DLine elements are identical. |
| 400 | 395 | */ |
| 401 | 396 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -363,38 +363,33 @@ | |
| 363 | } |
| 364 | |
| 365 | /* |
| 366 | ** This function returns non-zero if the blob starts with a UTF-16 |
| 367 | ** byte-order-mark (BOM), either in the endianness of the machine |
| 368 | ** or in reversed byte order. |
| 369 | */ |
| 370 | int starts_with_utf16_bom( |
| 371 | const Blob *pContent, /* IN: Blob content to perform BOM detection on. */ |
| 372 | int *pnByte, /* OUT: The number of bytes used for the BOM. */ |
| 373 | int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */ |
| 374 | ){ |
| 375 | const char *z = blob_buffer(pContent); |
| 376 | int bomSize = 2; |
| 377 | static const unsigned short bom = 0xfeff; |
| 378 | static const unsigned short bom_reversed = 0xfffe; |
| 379 | static const unsigned short null = 0; |
| 380 | int size; |
| 381 | |
| 382 | if( pnByte ) *pnByte = bomSize; |
| 383 | if( pbReverse ) *pbReverse = -1; /* Unknown. */ |
| 384 | size = blob_size(pContent); |
| 385 | if( (size<bomSize) || (size%2) ) return 0; |
| 386 | if( memcmp(z, &bom_reversed, bomSize)==0 ){ |
| 387 | if( pbReverse ) *pbReverse = 1; |
| 388 | if( size<(2*bomSize) ) return 1; |
| 389 | if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; |
| 390 | }else if( memcmp(z, &bom, bomSize)==0 ){ |
| 391 | if( pbReverse ) *pbReverse = 0; |
| 392 | if( size<(2*bomSize) ) return 1; |
| 393 | if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; |
| 394 | } |
| 395 | return 0; |
| 396 | } |
| 397 | |
| 398 | /* |
| 399 | ** Return true if two DLine elements are identical. |
| 400 | */ |
| 401 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -363,38 +363,33 @@ | |
| 363 | } |
| 364 | |
| 365 | /* |
| 366 | ** This function returns non-zero if the blob starts with a UTF-16 |
| 367 | ** byte-order-mark (BOM), either in the endianness of the machine |
| 368 | ** or in reversed byte order. The UTF-32 BOM is ruled out by checking |
| 369 | ** if the UTF-16 BOM is not immediately followed by (utf16) 0. |
| 370 | ** pnByte and pbReverse are only set when the function returns 1. |
| 371 | */ |
| 372 | int starts_with_utf16_bom( |
| 373 | const Blob *pContent, /* IN: Blob content to perform BOM detection on. */ |
| 374 | int *pnByte, /* OUT: The number of bytes used for the BOM. */ |
| 375 | int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */ |
| 376 | ){ |
| 377 | const unsigned short *z = (unsigned short *)blob_buffer(pContent); |
| 378 | int size = blob_size(pContent); |
| 379 | |
| 380 | if( (size<2) || (size%2) |
| 381 | || (size>=4 && z[1]==0) ) return 0; |
| 382 | if( z[0] == 0xfffe ){ |
| 383 | if( pbReverse ) *pbReverse = 1; |
| 384 | }else if( z[0] == 0xfeff ){ |
| 385 | if( pbReverse ) *pbReverse = 0; |
| 386 | }else{ |
| 387 | return 0; |
| 388 | } |
| 389 | if( pnByte ) *pnByte = 2; |
| 390 | return 1; |
| 391 | } |
| 392 | |
| 393 | /* |
| 394 | ** Return true if two DLine elements are identical. |
| 395 | */ |
| 396 |
+14
-19
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -363,38 +363,33 @@ | ||
| 363 | 363 | } |
| 364 | 364 | |
| 365 | 365 | /* |
| 366 | 366 | ** This function returns non-zero if the blob starts with a UTF-16 |
| 367 | 367 | ** byte-order-mark (BOM), either in the endianness of the machine |
| 368 | -** or in reversed byte order. | |
| 368 | +** or in reversed byte order. The UTF-32 BOM is ruled out by checking | |
| 369 | +** if the UTF-16 BOM is not immediately followed by (utf16) 0. | |
| 370 | +** pnByte and pbReverse are only set when the function returns 1. | |
| 369 | 371 | */ |
| 370 | 372 | int starts_with_utf16_bom( |
| 371 | 373 | const Blob *pContent, /* IN: Blob content to perform BOM detection on. */ |
| 372 | 374 | int *pnByte, /* OUT: The number of bytes used for the BOM. */ |
| 373 | 375 | int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */ |
| 374 | 376 | ){ |
| 375 | - const char *z = blob_buffer(pContent); | |
| 376 | - int bomSize = 2; | |
| 377 | - static const unsigned short bom = 0xfeff; | |
| 378 | - static const unsigned short bom_reversed = 0xfffe; | |
| 379 | - static const unsigned short null = 0; | |
| 380 | - int size; | |
| 381 | - | |
| 382 | - if( pnByte ) *pnByte = bomSize; | |
| 383 | - if( pbReverse ) *pbReverse = -1; /* Unknown. */ | |
| 384 | - size = blob_size(pContent); | |
| 385 | - if( (size<bomSize) || (size%2) ) return 0; | |
| 386 | - if( memcmp(z, &bom_reversed, bomSize)==0 ){ | |
| 377 | + const unsigned short *z = (unsigned short *)blob_buffer(pContent); | |
| 378 | + int size = blob_size(pContent); | |
| 379 | + | |
| 380 | + if( (size<2) || (size%2) | |
| 381 | + || (size>=4 && z[1]==0) ) return 0; | |
| 382 | + if( z[0] == 0xfffe ){ | |
| 387 | 383 | if( pbReverse ) *pbReverse = 1; |
| 388 | - if( size<(2*bomSize) ) return 1; | |
| 389 | - if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; | |
| 390 | - }else if( memcmp(z, &bom, bomSize)==0 ){ | |
| 384 | + }else if( z[0] == 0xfeff ){ | |
| 391 | 385 | if( pbReverse ) *pbReverse = 0; |
| 392 | - if( size<(2*bomSize) ) return 1; | |
| 393 | - if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; | |
| 386 | + }else{ | |
| 387 | + return 0; | |
| 394 | 388 | } |
| 395 | - return 0; | |
| 389 | + if( pnByte ) *pnByte = 2; | |
| 390 | + return 1; | |
| 396 | 391 | } |
| 397 | 392 | |
| 398 | 393 | /* |
| 399 | 394 | ** Return true if two DLine elements are identical. |
| 400 | 395 | */ |
| 401 | 396 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -363,38 +363,33 @@ | |
| 363 | } |
| 364 | |
| 365 | /* |
| 366 | ** This function returns non-zero if the blob starts with a UTF-16 |
| 367 | ** byte-order-mark (BOM), either in the endianness of the machine |
| 368 | ** or in reversed byte order. |
| 369 | */ |
| 370 | int starts_with_utf16_bom( |
| 371 | const Blob *pContent, /* IN: Blob content to perform BOM detection on. */ |
| 372 | int *pnByte, /* OUT: The number of bytes used for the BOM. */ |
| 373 | int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */ |
| 374 | ){ |
| 375 | const char *z = blob_buffer(pContent); |
| 376 | int bomSize = 2; |
| 377 | static const unsigned short bom = 0xfeff; |
| 378 | static const unsigned short bom_reversed = 0xfffe; |
| 379 | static const unsigned short null = 0; |
| 380 | int size; |
| 381 | |
| 382 | if( pnByte ) *pnByte = bomSize; |
| 383 | if( pbReverse ) *pbReverse = -1; /* Unknown. */ |
| 384 | size = blob_size(pContent); |
| 385 | if( (size<bomSize) || (size%2) ) return 0; |
| 386 | if( memcmp(z, &bom_reversed, bomSize)==0 ){ |
| 387 | if( pbReverse ) *pbReverse = 1; |
| 388 | if( size<(2*bomSize) ) return 1; |
| 389 | if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; |
| 390 | }else if( memcmp(z, &bom, bomSize)==0 ){ |
| 391 | if( pbReverse ) *pbReverse = 0; |
| 392 | if( size<(2*bomSize) ) return 1; |
| 393 | if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1; |
| 394 | } |
| 395 | return 0; |
| 396 | } |
| 397 | |
| 398 | /* |
| 399 | ** Return true if two DLine elements are identical. |
| 400 | */ |
| 401 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -363,38 +363,33 @@ | |
| 363 | } |
| 364 | |
| 365 | /* |
| 366 | ** This function returns non-zero if the blob starts with a UTF-16 |
| 367 | ** byte-order-mark (BOM), either in the endianness of the machine |
| 368 | ** or in reversed byte order. The UTF-32 BOM is ruled out by checking |
| 369 | ** if the UTF-16 BOM is not immediately followed by (utf16) 0. |
| 370 | ** pnByte and pbReverse are only set when the function returns 1. |
| 371 | */ |
| 372 | int starts_with_utf16_bom( |
| 373 | const Blob *pContent, /* IN: Blob content to perform BOM detection on. */ |
| 374 | int *pnByte, /* OUT: The number of bytes used for the BOM. */ |
| 375 | int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */ |
| 376 | ){ |
| 377 | const unsigned short *z = (unsigned short *)blob_buffer(pContent); |
| 378 | int size = blob_size(pContent); |
| 379 | |
| 380 | if( (size<2) || (size%2) |
| 381 | || (size>=4 && z[1]==0) ) return 0; |
| 382 | if( z[0] == 0xfffe ){ |
| 383 | if( pbReverse ) *pbReverse = 1; |
| 384 | }else if( z[0] == 0xfeff ){ |
| 385 | if( pbReverse ) *pbReverse = 0; |
| 386 | }else{ |
| 387 | return 0; |
| 388 | } |
| 389 | if( pnByte ) *pnByte = 2; |
| 390 | return 1; |
| 391 | } |
| 392 | |
| 393 | /* |
| 394 | ** Return true if two DLine elements are identical. |
| 395 | */ |
| 396 |