Fossil SCM
Divide blob length check (even number of bytes) and UTF-32 check in the 3 versions of the UTF-16 BOM functions.
Commit
be6756e26b9c150e9126418cc57a0fbb66079773
Parent
43c452262344bf1…
1 file changed
+21
-17
+21
-17
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -359,19 +359,19 @@ | ||
| 359 | 359 | ** This function returns non-zero if the blob starts with a UTF-16le or |
| 360 | 360 | ** UTF-16be byte-order-mark (BOM). |
| 361 | 361 | */ |
| 362 | 362 | int starts_with_utf16_bom(const Blob *pContent, int *pnByte){ |
| 363 | 363 | const char *z = blob_buffer(pContent); |
| 364 | - int c1, c2; | |
| 364 | + int c1; | |
| 365 | 365 | |
| 366 | 366 | if( pnByte ) *pnByte = 2; |
| 367 | - if( blob_size(pContent)<2 ) return 0; | |
| 368 | - c1 = z[0]; c2 = z[1]; | |
| 369 | - if( (c1==(char)0xff) && (c2==(char)0xfe) ){ | |
| 370 | - return 1; | |
| 371 | - }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){ | |
| 372 | - return 1; | |
| 367 | + if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; | |
| 368 | + c1 = ((unsigned short *)z)[0]; | |
| 369 | + if( (c1==0xfeff) || (c1==0xfffe) ){ | |
| 370 | + if( blob_size(pContent) < 4 ) return 1; | |
| 371 | + c1 = ((unsigned short *)z)[1]; | |
| 372 | + if( c1 != 0 ) return 1; | |
| 373 | 373 | } |
| 374 | 374 | return 0; |
| 375 | 375 | } |
| 376 | 376 | |
| 377 | 377 | /* |
| @@ -378,17 +378,19 @@ | ||
| 378 | 378 | ** This function returns non-zero if the blob starts with a UTF-16le |
| 379 | 379 | ** byte-order-mark (BOM). |
| 380 | 380 | */ |
| 381 | 381 | int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){ |
| 382 | 382 | const char *z = blob_buffer(pContent); |
| 383 | - int c1, c2; | |
| 383 | + int c1; | |
| 384 | 384 | |
| 385 | 385 | if( pnByte ) *pnByte = 2; |
| 386 | - if( blob_size(pContent)<2 ) return 0; | |
| 387 | - c1 = z[0]; c2 = z[1]; | |
| 388 | - if( (c1==(char)0xff) && (c2==(char)0xfe) ){ | |
| 389 | - return 1; | |
| 386 | + if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; | |
| 387 | + c1 = ((unsigned short *)z)[0]; | |
| 388 | + if( c1==0xfeff ){ | |
| 389 | + if( blob_size(pContent) < 4 ) return 1; | |
| 390 | + c1 = ((unsigned short *)z)[1]; | |
| 391 | + if( c1 != 0 ) return 1; | |
| 390 | 392 | } |
| 391 | 393 | return 0; |
| 392 | 394 | } |
| 393 | 395 | |
| 394 | 396 | /* |
| @@ -395,17 +397,19 @@ | ||
| 395 | 397 | ** This function returns non-zero if the blob starts with a UTF-16be |
| 396 | 398 | ** byte-order-mark (BOM). |
| 397 | 399 | */ |
| 398 | 400 | int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){ |
| 399 | 401 | const char *z = blob_buffer(pContent); |
| 400 | - int c1, c2; | |
| 402 | + int c1; | |
| 401 | 403 | |
| 402 | 404 | if( pnByte ) *pnByte = 2; |
| 403 | - if( blob_size(pContent)<2 ) return 0; | |
| 404 | - c1 = z[0]; c2 = z[1]; | |
| 405 | - if( (c1==(char)0xfe) && (c2==(char)0xff) ){ | |
| 406 | - return 1; | |
| 405 | + if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; | |
| 406 | + c1 = ((unsigned short *)z)[0]; | |
| 407 | + if( c1==0xfffe ){ | |
| 408 | + if( blob_size(pContent) < 4 ) return 1; | |
| 409 | + c1 = ((unsigned short *)z)[1]; | |
| 410 | + if( c1 != 0 ) return 1; | |
| 407 | 411 | } |
| 408 | 412 | return 0; |
| 409 | 413 | } |
| 410 | 414 | |
| 411 | 415 | /* |
| 412 | 416 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -359,19 +359,19 @@ | |
| 359 | ** This function returns non-zero if the blob starts with a UTF-16le or |
| 360 | ** UTF-16be byte-order-mark (BOM). |
| 361 | */ |
| 362 | int starts_with_utf16_bom(const Blob *pContent, int *pnByte){ |
| 363 | const char *z = blob_buffer(pContent); |
| 364 | int c1, c2; |
| 365 | |
| 366 | if( pnByte ) *pnByte = 2; |
| 367 | if( blob_size(pContent)<2 ) return 0; |
| 368 | c1 = z[0]; c2 = z[1]; |
| 369 | if( (c1==(char)0xff) && (c2==(char)0xfe) ){ |
| 370 | return 1; |
| 371 | }else if( (c1==(char)0xfe) && (c2==(char)0xff) ){ |
| 372 | return 1; |
| 373 | } |
| 374 | return 0; |
| 375 | } |
| 376 | |
| 377 | /* |
| @@ -378,17 +378,19 @@ | |
| 378 | ** This function returns non-zero if the blob starts with a UTF-16le |
| 379 | ** byte-order-mark (BOM). |
| 380 | */ |
| 381 | int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){ |
| 382 | const char *z = blob_buffer(pContent); |
| 383 | int c1, c2; |
| 384 | |
| 385 | if( pnByte ) *pnByte = 2; |
| 386 | if( blob_size(pContent)<2 ) return 0; |
| 387 | c1 = z[0]; c2 = z[1]; |
| 388 | if( (c1==(char)0xff) && (c2==(char)0xfe) ){ |
| 389 | return 1; |
| 390 | } |
| 391 | return 0; |
| 392 | } |
| 393 | |
| 394 | /* |
| @@ -395,17 +397,19 @@ | |
| 395 | ** This function returns non-zero if the blob starts with a UTF-16be |
| 396 | ** byte-order-mark (BOM). |
| 397 | */ |
| 398 | int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){ |
| 399 | const char *z = blob_buffer(pContent); |
| 400 | int c1, c2; |
| 401 | |
| 402 | if( pnByte ) *pnByte = 2; |
| 403 | if( blob_size(pContent)<2 ) return 0; |
| 404 | c1 = z[0]; c2 = z[1]; |
| 405 | if( (c1==(char)0xfe) && (c2==(char)0xff) ){ |
| 406 | return 1; |
| 407 | } |
| 408 | return 0; |
| 409 | } |
| 410 | |
| 411 | /* |
| 412 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -359,19 +359,19 @@ | |
| 359 | ** This function returns non-zero if the blob starts with a UTF-16le or |
| 360 | ** UTF-16be byte-order-mark (BOM). |
| 361 | */ |
| 362 | int starts_with_utf16_bom(const Blob *pContent, int *pnByte){ |
| 363 | const char *z = blob_buffer(pContent); |
| 364 | int c1; |
| 365 | |
| 366 | if( pnByte ) *pnByte = 2; |
| 367 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 368 | c1 = ((unsigned short *)z)[0]; |
| 369 | if( (c1==0xfeff) || (c1==0xfffe) ){ |
| 370 | if( blob_size(pContent) < 4 ) return 1; |
| 371 | c1 = ((unsigned short *)z)[1]; |
| 372 | if( c1 != 0 ) return 1; |
| 373 | } |
| 374 | return 0; |
| 375 | } |
| 376 | |
| 377 | /* |
| @@ -378,17 +378,19 @@ | |
| 378 | ** This function returns non-zero if the blob starts with a UTF-16le |
| 379 | ** byte-order-mark (BOM). |
| 380 | */ |
| 381 | int starts_with_utf16le_bom(const Blob *pContent, int *pnByte){ |
| 382 | const char *z = blob_buffer(pContent); |
| 383 | int c1; |
| 384 | |
| 385 | if( pnByte ) *pnByte = 2; |
| 386 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 387 | c1 = ((unsigned short *)z)[0]; |
| 388 | if( c1==0xfeff ){ |
| 389 | if( blob_size(pContent) < 4 ) return 1; |
| 390 | c1 = ((unsigned short *)z)[1]; |
| 391 | if( c1 != 0 ) return 1; |
| 392 | } |
| 393 | return 0; |
| 394 | } |
| 395 | |
| 396 | /* |
| @@ -395,17 +397,19 @@ | |
| 397 | ** This function returns non-zero if the blob starts with a UTF-16be |
| 398 | ** byte-order-mark (BOM). |
| 399 | */ |
| 400 | int starts_with_utf16be_bom(const Blob *pContent, int *pnByte){ |
| 401 | const char *z = blob_buffer(pContent); |
| 402 | int c1; |
| 403 | |
| 404 | if( pnByte ) *pnByte = 2; |
| 405 | if( (blob_size(pContent)<2) || (blob_size(pContent)&1)) return 0; |
| 406 | c1 = ((unsigned short *)z)[0]; |
| 407 | if( c1==0xfffe ){ |
| 408 | if( blob_size(pContent) < 4 ) return 1; |
| 409 | c1 = ((unsigned short *)z)[1]; |
| 410 | if( c1 != 0 ) return 1; |
| 411 | } |
| 412 | return 0; |
| 413 | } |
| 414 | |
| 415 | /* |
| 416 |