Fossil SCM

Simplify/speedup starts_with_utf16_bom and enhance doc for it.

jan.nijtmans 2013-02-20 08:33 trunk merge
Commit 957be426253c20e2412c1df602b5b379ecad18c2
2 files changed +14 -19 +14 -19
+14 -19
--- src/diff.c
+++ src/diff.c
@@ -363,38 +363,33 @@
363363
}
364364
365365
/*
366366
** This function returns non-zero if the blob starts with a UTF-16
367367
** byte-order-mark (BOM), either in the endianness of the machine
368
-** or in reversed byte order.
368
+** or in reversed byte order. The UTF-32 BOM is ruled out by checking
369
+** if the UTF-16 BOM is not immediately followed by (utf16) 0.
370
+** pnByte and pbReverse are only set when the function returns 1.
369371
*/
370372
int starts_with_utf16_bom(
371373
const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
372374
int *pnByte, /* OUT: The number of bytes used for the BOM. */
373375
int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
374376
){
375
- const char *z = blob_buffer(pContent);
376
- int bomSize = 2;
377
- static const unsigned short bom = 0xfeff;
378
- static const unsigned short bom_reversed = 0xfffe;
379
- static const unsigned short null = 0;
380
- int size;
381
-
382
- if( pnByte ) *pnByte = bomSize;
383
- if( pbReverse ) *pbReverse = -1; /* Unknown. */
384
- size = blob_size(pContent);
385
- if( (size<bomSize) || (size%2) ) return 0;
386
- if( memcmp(z, &bom_reversed, bomSize)==0 ){
377
+ const unsigned short *z = (unsigned short *)blob_buffer(pContent);
378
+ int size = blob_size(pContent);
379
+
380
+ if( (size<2) || (size%2)
381
+ || (size>=4 && z[1]==0) ) return 0;
382
+ if( z[0] == 0xfffe ){
387383
if( pbReverse ) *pbReverse = 1;
388
- if( size<(2*bomSize) ) return 1;
389
- if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
390
- }else if( memcmp(z, &bom, bomSize)==0 ){
384
+ }else if( z[0] == 0xfeff ){
391385
if( pbReverse ) *pbReverse = 0;
392
- if( size<(2*bomSize) ) return 1;
393
- if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
386
+ }else{
387
+ return 0;
394388
}
395
- return 0;
389
+ if( pnByte ) *pnByte = 2;
390
+ return 1;
396391
}
397392
398393
/*
399394
** Return true if two DLine elements are identical.
400395
*/
401396
--- src/diff.c
+++ src/diff.c
@@ -363,38 +363,33 @@
363 }
364
365 /*
366 ** This function returns non-zero if the blob starts with a UTF-16
367 ** byte-order-mark (BOM), either in the endianness of the machine
368 ** or in reversed byte order.
 
 
369 */
370 int starts_with_utf16_bom(
371 const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
372 int *pnByte, /* OUT: The number of bytes used for the BOM. */
373 int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
374 ){
375 const char *z = blob_buffer(pContent);
376 int bomSize = 2;
377 static const unsigned short bom = 0xfeff;
378 static const unsigned short bom_reversed = 0xfffe;
379 static const unsigned short null = 0;
380 int size;
381
382 if( pnByte ) *pnByte = bomSize;
383 if( pbReverse ) *pbReverse = -1; /* Unknown. */
384 size = blob_size(pContent);
385 if( (size<bomSize) || (size%2) ) return 0;
386 if( memcmp(z, &bom_reversed, bomSize)==0 ){
387 if( pbReverse ) *pbReverse = 1;
388 if( size<(2*bomSize) ) return 1;
389 if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
390 }else if( memcmp(z, &bom, bomSize)==0 ){
391 if( pbReverse ) *pbReverse = 0;
392 if( size<(2*bomSize) ) return 1;
393 if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
394 }
395 return 0;
 
396 }
397
398 /*
399 ** Return true if two DLine elements are identical.
400 */
401
--- src/diff.c
+++ src/diff.c
@@ -363,38 +363,33 @@
363 }
364
365 /*
366 ** This function returns non-zero if the blob starts with a UTF-16
367 ** byte-order-mark (BOM), either in the endianness of the machine
368 ** or in reversed byte order. The UTF-32 BOM is ruled out by checking
369 ** if the UTF-16 BOM is not immediately followed by (utf16) 0.
370 ** pnByte and pbReverse are only set when the function returns 1.
371 */
372 int starts_with_utf16_bom(
373 const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
374 int *pnByte, /* OUT: The number of bytes used for the BOM. */
375 int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
376 ){
377 const unsigned short *z = (unsigned short *)blob_buffer(pContent);
378 int size = blob_size(pContent);
379
380 if( (size<2) || (size%2)
381 || (size>=4 && z[1]==0) ) return 0;
382 if( z[0] == 0xfffe ){
 
 
 
 
 
 
383 if( pbReverse ) *pbReverse = 1;
384 }else if( z[0] == 0xfeff ){
 
 
385 if( pbReverse ) *pbReverse = 0;
386 }else{
387 return 0;
388 }
389 if( pnByte ) *pnByte = 2;
390 return 1;
391 }
392
393 /*
394 ** Return true if two DLine elements are identical.
395 */
396
+14 -19
--- src/diff.c
+++ src/diff.c
@@ -363,38 +363,33 @@
363363
}
364364
365365
/*
366366
** This function returns non-zero if the blob starts with a UTF-16
367367
** byte-order-mark (BOM), either in the endianness of the machine
368
-** or in reversed byte order.
368
+** or in reversed byte order. The UTF-32 BOM is ruled out by checking
369
+** if the UTF-16 BOM is not immediately followed by (utf16) 0.
370
+** pnByte and pbReverse are only set when the function returns 1.
369371
*/
370372
int starts_with_utf16_bom(
371373
const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
372374
int *pnByte, /* OUT: The number of bytes used for the BOM. */
373375
int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
374376
){
375
- const char *z = blob_buffer(pContent);
376
- int bomSize = 2;
377
- static const unsigned short bom = 0xfeff;
378
- static const unsigned short bom_reversed = 0xfffe;
379
- static const unsigned short null = 0;
380
- int size;
381
-
382
- if( pnByte ) *pnByte = bomSize;
383
- if( pbReverse ) *pbReverse = -1; /* Unknown. */
384
- size = blob_size(pContent);
385
- if( (size<bomSize) || (size%2) ) return 0;
386
- if( memcmp(z, &bom_reversed, bomSize)==0 ){
377
+ const unsigned short *z = (unsigned short *)blob_buffer(pContent);
378
+ int size = blob_size(pContent);
379
+
380
+ if( (size<2) || (size%2)
381
+ || (size>=4 && z[1]==0) ) return 0;
382
+ if( z[0] == 0xfffe ){
387383
if( pbReverse ) *pbReverse = 1;
388
- if( size<(2*bomSize) ) return 1;
389
- if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
390
- }else if( memcmp(z, &bom, bomSize)==0 ){
384
+ }else if( z[0] == 0xfeff ){
391385
if( pbReverse ) *pbReverse = 0;
392
- if( size<(2*bomSize) ) return 1;
393
- if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
386
+ }else{
387
+ return 0;
394388
}
395
- return 0;
389
+ if( pnByte ) *pnByte = 2;
390
+ return 1;
396391
}
397392
398393
/*
399394
** Return true if two DLine elements are identical.
400395
*/
401396
--- src/diff.c
+++ src/diff.c
@@ -363,38 +363,33 @@
363 }
364
365 /*
366 ** This function returns non-zero if the blob starts with a UTF-16
367 ** byte-order-mark (BOM), either in the endianness of the machine
368 ** or in reversed byte order.
 
 
369 */
370 int starts_with_utf16_bom(
371 const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
372 int *pnByte, /* OUT: The number of bytes used for the BOM. */
373 int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
374 ){
375 const char *z = blob_buffer(pContent);
376 int bomSize = 2;
377 static const unsigned short bom = 0xfeff;
378 static const unsigned short bom_reversed = 0xfffe;
379 static const unsigned short null = 0;
380 int size;
381
382 if( pnByte ) *pnByte = bomSize;
383 if( pbReverse ) *pbReverse = -1; /* Unknown. */
384 size = blob_size(pContent);
385 if( (size<bomSize) || (size%2) ) return 0;
386 if( memcmp(z, &bom_reversed, bomSize)==0 ){
387 if( pbReverse ) *pbReverse = 1;
388 if( size<(2*bomSize) ) return 1;
389 if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
390 }else if( memcmp(z, &bom, bomSize)==0 ){
391 if( pbReverse ) *pbReverse = 0;
392 if( size<(2*bomSize) ) return 1;
393 if( memcmp(z+bomSize, &null, bomSize)!=0 ) return 1;
394 }
395 return 0;
 
396 }
397
398 /*
399 ** Return true if two DLine elements are identical.
400 */
401
--- src/diff.c
+++ src/diff.c
@@ -363,38 +363,33 @@
363 }
364
365 /*
366 ** This function returns non-zero if the blob starts with a UTF-16
367 ** byte-order-mark (BOM), either in the endianness of the machine
368 ** or in reversed byte order. The UTF-32 BOM is ruled out by checking
369 ** if the UTF-16 BOM is not immediately followed by (utf16) 0.
370 ** pnByte and pbReverse are only set when the function returns 1.
371 */
372 int starts_with_utf16_bom(
373 const Blob *pContent, /* IN: Blob content to perform BOM detection on. */
374 int *pnByte, /* OUT: The number of bytes used for the BOM. */
375 int *pbReverse /* OUT: Non-zero for BOM in reverse byte-order. */
376 ){
377 const unsigned short *z = (unsigned short *)blob_buffer(pContent);
378 int size = blob_size(pContent);
379
380 if( (size<2) || (size%2)
381 || (size>=4 && z[1]==0) ) return 0;
382 if( z[0] == 0xfffe ){
 
 
 
 
 
 
383 if( pbReverse ) *pbReverse = 1;
384 }else if( z[0] == 0xfeff ){
 
 
385 if( pbReverse ) *pbReverse = 0;
386 }else{
387 return 0;
388 }
389 if( pnByte ) *pnByte = 2;
390 return 1;
391 }
392
393 /*
394 ** Return true if two DLine elements are identical.
395 */
396

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button