Fossil SCM

Divide blob length check (even number of bytes) and UTF-32 check in the 3 versions of the UTF-16 BOM functions.

jan.nijtmans 2013-02-07 15:28 trunk
Commit be6756e26b9c150e9126418cc57a0fbb66079773
1 file changed +21 -17
+21 -17
--- src/diff.c
+++ src/diff.c
@@ -359,19 +359,19 @@
359359
** This function returns non-zero if the blob starts with a UTF-16le or
360360
** UTF-16be byte-order-mark (BOM).
361361
*/
362362
int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
363363
const char *z = blob_buffer(pContent);
364
- int c1, c2;
364
+ int c1;
365365
366366
if( pnByte ) *pnByte = 2;
367
- if( blob_size(pContent)<2 ) return 0;
368
- c1 = z[0]; c2 = z[1];
369
- if( (c1==(char)0xff) && (c2==(char)0xfe) ){
370
- return 1;
371
- }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
372
- return 1;
367
+ if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
368
+ c1 = ((unsigned short *)z)[0];
369
+ if( (c1==0xfeff) || (c1==0xfffe) ){
370
+ if( blob_size(pContent) < 4 ) return 1;
371
+ c1 = ((unsigned short *)z)[1];
372
+ if( c1 != 0 ) return 1;
373373
}
374374
return 0;
375375
}
376376
377377
/*
@@ -378,17 +378,19 @@
378378
** This function returns non-zero if the blob starts with a UTF-16le
379379
** byte-order-mark (BOM).
380380
*/
381381
int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
382382
const char *z = blob_buffer(pContent);
383
- int c1, c2;
383
+ int c1;
384384
385385
if( pnByte ) *pnByte = 2;
386
- if( blob_size(pContent)<2 ) return 0;
387
- c1 = z[0]; c2 = z[1];
388
- if( (c1==(char)0xff) && (c2==(char)0xfe) ){
389
- return 1;
386
+ if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
387
+ c1 = ((unsigned short *)z)[0];
388
+ if( c1==0xfeff ){
389
+ if( blob_size(pContent) < 4 ) return 1;
390
+ c1 = ((unsigned short *)z)[1];
391
+ if( c1 != 0 ) return 1;
390392
}
391393
return 0;
392394
}
393395
394396
/*
@@ -395,17 +397,19 @@
395397
** This function returns non-zero if the blob starts with a UTF-16be
396398
** byte-order-mark (BOM).
397399
*/
398400
int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
399401
const char *z = blob_buffer(pContent);
400
- int c1, c2;
402
+ int c1;
401403
402404
if( pnByte ) *pnByte = 2;
403
- if( blob_size(pContent)<2 ) return 0;
404
- c1 = z[0]; c2 = z[1];
405
- if( (c1==(char)0xfe) && (c2==(char)0xff) ){
406
- return 1;
405
+ if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
406
+ c1 = ((unsigned short *)z)[0];
407
+ if( c1==0xfffe ){
408
+ if( blob_size(pContent) < 4 ) return 1;
409
+ c1 = ((unsigned short *)z)[1];
410
+ if( c1 != 0 ) return 1;
407411
}
408412
return 0;
409413
}
410414
411415
/*
412416
--- src/diff.c
+++ src/diff.c
@@ -359,19 +359,19 @@
359 ** This function returns non-zero if the blob starts with a UTF-16le or
360 ** UTF-16be byte-order-mark (BOM).
361 */
362 int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
363 const char *z = blob_buffer(pContent);
364 int c1, c2;
365
366 if( pnByte ) *pnByte = 2;
367 if( blob_size(pContent)<2 ) return 0;
368 c1 = z[0]; c2 = z[1];
369 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
370 return 1;
371 }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){
372 return 1;
373 }
374 return 0;
375 }
376
377 /*
@@ -378,17 +378,19 @@
378 ** This function returns non-zero if the blob starts with a UTF-16le
379 ** byte-order-mark (BOM).
380 */
381 int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
382 const char *z = blob_buffer(pContent);
383 int c1, c2;
384
385 if( pnByte ) *pnByte = 2;
386 if( blob_size(pContent)<2 ) return 0;
387 c1 = z[0]; c2 = z[1];
388 if( (c1==(char)0xff) && (c2==(char)0xfe) ){
389 return 1;
 
 
390 }
391 return 0;
392 }
393
394 /*
@@ -395,17 +397,19 @@
395 ** This function returns non-zero if the blob starts with a UTF-16be
396 ** byte-order-mark (BOM).
397 */
398 int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
399 const char *z = blob_buffer(pContent);
400 int c1, c2;
401
402 if( pnByte ) *pnByte = 2;
403 if( blob_size(pContent)<2 ) return 0;
404 c1 = z[0]; c2 = z[1];
405 if( (c1==(char)0xfe) && (c2==(char)0xff) ){
406 return 1;
 
 
407 }
408 return 0;
409 }
410
411 /*
412
--- src/diff.c
+++ src/diff.c
@@ -359,19 +359,19 @@
359 ** This function returns non-zero if the blob starts with a UTF-16le or
360 ** UTF-16be byte-order-mark (BOM).
361 */
362 int starts_with_utf16_bom(const Blob *pContent, int *pnByte){
363 const char *z = blob_buffer(pContent);
364 int c1;
365
366 if( pnByte ) *pnByte = 2;
367 if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
368 c1 = ((unsigned short *)z)[0];
369 if( (c1==0xfeff) || (c1==0xfffe) ){
370 if( blob_size(pContent) < 4 ) return 1;
371 c1 = ((unsigned short *)z)[1];
372 if( c1 != 0 ) return 1;
373 }
374 return 0;
375 }
376
377 /*
@@ -378,17 +378,19 @@
378 ** This function returns non-zero if the blob starts with a UTF-16le
379 ** byte-order-mark (BOM).
380 */
381 int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){
382 const char *z = blob_buffer(pContent);
383 int c1;
384
385 if( pnByte ) *pnByte = 2;
386 if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
387 c1 = ((unsigned short *)z)[0];
388 if( c1==0xfeff ){
389 if( blob_size(pContent) < 4 ) return 1;
390 c1 = ((unsigned short *)z)[1];
391 if( c1 != 0 ) return 1;
392 }
393 return 0;
394 }
395
396 /*
@@ -395,17 +397,19 @@
397 ** This function returns non-zero if the blob starts with a UTF-16be
398 ** byte-order-mark (BOM).
399 */
400 int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){
401 const char *z = blob_buffer(pContent);
402 int c1;
403
404 if( pnByte ) *pnByte = 2;
405 if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0;
406 c1 = ((unsigned short *)z)[0];
407 if( c1==0xfffe ){
408 if( blob_size(pContent) < 4 ) return 1;
409 c1 = ((unsigned short *)z)[1];
410 if( c1 != 0 ) return 1;
411 }
412 return 0;
413 }
414
415 /*
416

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button