Fossil SCM
Further fine-tuning of the check for valid UTF8 characters in filenames.
Commit
4d456c9fd17bd289fd38b9b09d4e19a2c1af3183
Parent
3104348ec53e670…
2 files changed
+52
-28
+52
-28
+52
-28
| --- src/file.c | ||
| +++ src/file.c | ||
| @@ -490,44 +490,68 @@ | ||
| 490 | 490 | ** * Does not contain two or more "/" characters in a row. |
| 491 | 491 | ** * Contains at least one character |
| 492 | 492 | ** |
| 493 | 493 | ** Invalid UTF8 characters result in a false return if bStrictUtf8 is |
| 494 | 494 | ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently |
| 495 | -** ignored. | |
| 495 | +** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences | |
| 496 | +** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters) | |
| 497 | +** | |
| 498 | +** The bStrictUtf8 flag is true for new inputs, but is false when parsing | |
| 499 | +** legacy manifests, for backwards compatibility. | |
| 496 | 500 | */ |
| 497 | 501 | int file_is_simple_pathname(const char *z, int bStrictUtf8){ |
| 498 | 502 | int i; |
| 499 | - char c = z[0]; | |
| 503 | + unsigned char c = (unsigned char) z[0]; | |
| 500 | 504 | char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; |
| 501 | 505 | if( c=='/' || c==0 ) return 0; |
| 502 | 506 | if( c=='.' ){ |
| 503 | 507 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 504 | 508 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 505 | 509 | } |
| 506 | - for(i=0; (c=z[i])!=0; i++){ | |
| 507 | - if( c & maskNonAscii ){ | |
| 508 | - if( (c & 0xf0) == 0xf0 ) { | |
| 509 | - /* Unicode characters > U+FFFF are not supported. | |
| 510 | - * Windows XP and earlier cannot handle them. | |
| 511 | - */ | |
| 512 | - return 0; | |
| 513 | - } | |
| 514 | - if( (c & 0xf0) == 0xe0 ) { | |
| 515 | - /* This is a 3-byte UTF-8 character */ | |
| 516 | - if ( (c & 0xfe) == 0xee ){ | |
| 517 | - /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */ | |
| 518 | - if ( !(c & 1) || ((z[i+1] & 0xff) < 0xa4) ){ | |
| 519 | - /* Unicode character in the range U+E000 - U+F8FF are for | |
| 520 | - * private use, they shouldn't occur in filenames. */ | |
| 521 | - return 0; | |
| 522 | - } | |
| 523 | - }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){ | |
| 524 | - /* Unicode character in the range U+D800 - U+DFFF are for | |
| 525 | - * surrogate pairs, they shouldn't occur in filenames. */ | |
| 526 | - return 0; | |
| 527 | - } | |
| 528 | - } | |
| 510 | + for(i=0; (c=(unsigned char)z[i])!=0; i++){ | |
| 511 | + if( c & maskNonAscii ){ | |
| 512 | + if( c<0xc2 ){ | |
| 513 | + /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ | |
| 514 | + return 0; | |
| 515 | + }else if( (c&0xe0)==0xe0 ){ | |
| 516 | + /* 3-byte or more */ | |
| 517 | + int unicode; | |
| 518 | + if( c&0x10 ){ | |
| 519 | + /* Unicode characters > U+FFFF are not supported. | |
| 520 | + * Windows XP and earlier cannot handle them. | |
| 521 | + */ | |
| 522 | + return 0; | |
| 523 | + } | |
| 524 | + /* This is a 3-byte UTF-8 character */ | |
| 525 | + unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); | |
| 526 | + if( unicode <= 0x07ff ){ | |
| 527 | + /* overlong form */ | |
| 528 | + return 0; | |
| 529 | + }else if( unicode>=0xe000 ){ | |
| 530 | + /* U+E000..U+FFFF */ | |
| 531 | + if( (unicode<=0xf8ff) || (unicode>=0xfffe) ){ | |
| 532 | + /* U+E000..U+F8FF are for private use. | |
| 533 | + * U+FFFE..U+FFFF are noncharacters. */ | |
| 534 | + return 0; | |
| 535 | + } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ | |
| 536 | + /* U+FDD0..U+FDEF are noncharacters. */ | |
| 537 | + return 0; | |
| 538 | + } | |
| 539 | + }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ | |
| 540 | + /* U+D800..U+DFFF are for surrogate pairs. */ | |
| 541 | + return 0; | |
| 542 | + } | |
| 543 | + } | |
| 544 | + do{ | |
| 545 | + if( (z[i+1]&0xc0)!=0x80 ){ | |
| 546 | + /* Invalid continuation byte (multi-byte UTF-8) */ | |
| 547 | + return 0; | |
| 548 | + } | |
| 549 | + /* The hi-bits of c are used to keep track of the number of expected | |
| 550 | + * continuation-bytes, so we don't need a separate counter. */ | |
| 551 | + c<<=1; ++i; | |
| 552 | + }while( c>=0xc0 ); | |
| 529 | 553 | }else if( c=='\\' ){ |
| 530 | 554 | return 0; |
| 531 | 555 | } |
| 532 | 556 | if( c=='/' ){ |
| 533 | 557 | if( z[i+1]=='/' ) return 0; |
| @@ -578,11 +602,11 @@ | ||
| 578 | 602 | if( z[i]=='\\' ) z[i] = '/'; |
| 579 | 603 | } |
| 580 | 604 | #endif |
| 581 | 605 | |
| 582 | 606 | /* Removing trailing "/" characters */ |
| 583 | - if ( !slash ){ | |
| 607 | + if( !slash ){ | |
| 584 | 608 | while( n>1 && z[n-1]=='/' ){ n--; } |
| 585 | 609 | } |
| 586 | 610 | |
| 587 | 611 | /* Remove duplicate '/' characters. Except, two // at the beginning |
| 588 | 612 | ** of a pathname is allowed since this is important on windows. */ |
| @@ -835,11 +859,11 @@ | ||
| 835 | 859 | if( zPwd[i]==0 ){ |
| 836 | 860 | blob_append(pOut, ".", 1); |
| 837 | 861 | }else{ |
| 838 | 862 | blob_append(pOut, "..", 2); |
| 839 | 863 | for(j=i+1; zPwd[j]; j++){ |
| 840 | - if( zPwd[j]=='/' ) { | |
| 864 | + if( zPwd[j]=='/' ){ | |
| 841 | 865 | blob_append(pOut, "/..", 3); |
| 842 | 866 | } |
| 843 | 867 | } |
| 844 | 868 | } |
| 845 | 869 | return; |
| @@ -852,11 +876,11 @@ | ||
| 852 | 876 | return; |
| 853 | 877 | } |
| 854 | 878 | while( zPath[i-1]!='/' ){ i--; } |
| 855 | 879 | blob_set(&tmp, "../"); |
| 856 | 880 | for(j=i; zPwd[j]; j++){ |
| 857 | - if( zPwd[j]=='/' ) { | |
| 881 | + if( zPwd[j]=='/' ){ | |
| 858 | 882 | blob_append(&tmp, "../", 3); |
| 859 | 883 | } |
| 860 | 884 | } |
| 861 | 885 | blob_append(&tmp, &zPath[i], -1); |
| 862 | 886 | blob_reset(pOut); |
| 863 | 887 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -490,44 +490,68 @@ | |
| 490 | ** * Does not contain two or more "/" characters in a row. |
| 491 | ** * Contains at least one character |
| 492 | ** |
| 493 | ** Invalid UTF8 characters result in a false return if bStrictUtf8 is |
| 494 | ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently |
| 495 | ** ignored. |
| 496 | */ |
| 497 | int file_is_simple_pathname(const char *z, int bStrictUtf8){ |
| 498 | int i; |
| 499 | char c = z[0]; |
| 500 | char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; |
| 501 | if( c=='/' || c==0 ) return 0; |
| 502 | if( c=='.' ){ |
| 503 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 504 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 505 | } |
| 506 | for(i=0; (c=z[i])!=0; i++){ |
| 507 | if( c & maskNonAscii ){ |
| 508 | if( (c & 0xf0) == 0xf0 ) { |
| 509 | /* Unicode characters > U+FFFF are not supported. |
| 510 | * Windows XP and earlier cannot handle them. |
| 511 | */ |
| 512 | return 0; |
| 513 | } |
| 514 | if( (c & 0xf0) == 0xe0 ) { |
| 515 | /* This is a 3-byte UTF-8 character */ |
| 516 | if ( (c & 0xfe) == 0xee ){ |
| 517 | /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */ |
| 518 | if ( !(c & 1) || ((z[i+1] & 0xff) < 0xa4) ){ |
| 519 | /* Unicode character in the range U+E000 - U+F8FF are for |
| 520 | * private use, they shouldn't occur in filenames. */ |
| 521 | return 0; |
| 522 | } |
| 523 | }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){ |
| 524 | /* Unicode character in the range U+D800 - U+DFFF are for |
| 525 | * surrogate pairs, they shouldn't occur in filenames. */ |
| 526 | return 0; |
| 527 | } |
| 528 | } |
| 529 | }else if( c=='\\' ){ |
| 530 | return 0; |
| 531 | } |
| 532 | if( c=='/' ){ |
| 533 | if( z[i+1]=='/' ) return 0; |
| @@ -578,11 +602,11 @@ | |
| 578 | if( z[i]=='\\' ) z[i] = '/'; |
| 579 | } |
| 580 | #endif |
| 581 | |
| 582 | /* Removing trailing "/" characters */ |
| 583 | if ( !slash ){ |
| 584 | while( n>1 && z[n-1]=='/' ){ n--; } |
| 585 | } |
| 586 | |
| 587 | /* Remove duplicate '/' characters. Except, two // at the beginning |
| 588 | ** of a pathname is allowed since this is important on windows. */ |
| @@ -835,11 +859,11 @@ | |
| 835 | if( zPwd[i]==0 ){ |
| 836 | blob_append(pOut, ".", 1); |
| 837 | }else{ |
| 838 | blob_append(pOut, "..", 2); |
| 839 | for(j=i+1; zPwd[j]; j++){ |
| 840 | if( zPwd[j]=='/' ) { |
| 841 | blob_append(pOut, "/..", 3); |
| 842 | } |
| 843 | } |
| 844 | } |
| 845 | return; |
| @@ -852,11 +876,11 @@ | |
| 852 | return; |
| 853 | } |
| 854 | while( zPath[i-1]!='/' ){ i--; } |
| 855 | blob_set(&tmp, "../"); |
| 856 | for(j=i; zPwd[j]; j++){ |
| 857 | if( zPwd[j]=='/' ) { |
| 858 | blob_append(&tmp, "../", 3); |
| 859 | } |
| 860 | } |
| 861 | blob_append(&tmp, &zPath[i], -1); |
| 862 | blob_reset(pOut); |
| 863 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -490,44 +490,68 @@ | |
| 490 | ** * Does not contain two or more "/" characters in a row. |
| 491 | ** * Contains at least one character |
| 492 | ** |
| 493 | ** Invalid UTF8 characters result in a false return if bStrictUtf8 is |
| 494 | ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently |
| 495 | ** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences |
| 496 | ** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters) |
| 497 | ** |
| 498 | ** The bStrictUtf8 flag is true for new inputs, but is false when parsing |
| 499 | ** legacy manifests, for backwards compatibility. |
| 500 | */ |
| 501 | int file_is_simple_pathname(const char *z, int bStrictUtf8){ |
| 502 | int i; |
| 503 | unsigned char c = (unsigned char) z[0]; |
| 504 | char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; |
| 505 | if( c=='/' || c==0 ) return 0; |
| 506 | if( c=='.' ){ |
| 507 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 508 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 509 | } |
| 510 | for(i=0; (c=(unsigned char)z[i])!=0; i++){ |
| 511 | if( c & maskNonAscii ){ |
| 512 | if( c<0xc2 ){ |
| 513 | /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ |
| 514 | return 0; |
| 515 | }else if( (c&0xe0)==0xe0 ){ |
| 516 | /* 3-byte or more */ |
| 517 | int unicode; |
| 518 | if( c&0x10 ){ |
| 519 | /* Unicode characters > U+FFFF are not supported. |
| 520 | * Windows XP and earlier cannot handle them. |
| 521 | */ |
| 522 | return 0; |
| 523 | } |
| 524 | /* This is a 3-byte UTF-8 character */ |
| 525 | unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); |
| 526 | if( unicode <= 0x07ff ){ |
| 527 | /* overlong form */ |
| 528 | return 0; |
| 529 | }else if( unicode>=0xe000 ){ |
| 530 | /* U+E000..U+FFFF */ |
| 531 | if( (unicode<=0xf8ff) || (unicode>=0xfffe) ){ |
| 532 | /* U+E000..U+F8FF are for private use. |
| 533 | * U+FFFE..U+FFFF are noncharacters. */ |
| 534 | return 0; |
| 535 | } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ |
| 536 | /* U+FDD0..U+FDEF are noncharacters. */ |
| 537 | return 0; |
| 538 | } |
| 539 | }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ |
| 540 | /* U+D800..U+DFFF are for surrogate pairs. */ |
| 541 | return 0; |
| 542 | } |
| 543 | } |
| 544 | do{ |
| 545 | if( (z[i+1]&0xc0)!=0x80 ){ |
| 546 | /* Invalid continuation byte (multi-byte UTF-8) */ |
| 547 | return 0; |
| 548 | } |
| 549 | /* The hi-bits of c are used to keep track of the number of expected |
| 550 | * continuation-bytes, so we don't need a separate counter. */ |
| 551 | c<<=1; ++i; |
| 552 | }while( c>=0xc0 ); |
| 553 | }else if( c=='\\' ){ |
| 554 | return 0; |
| 555 | } |
| 556 | if( c=='/' ){ |
| 557 | if( z[i+1]=='/' ) return 0; |
| @@ -578,11 +602,11 @@ | |
| 602 | if( z[i]=='\\' ) z[i] = '/'; |
| 603 | } |
| 604 | #endif |
| 605 | |
| 606 | /* Removing trailing "/" characters */ |
| 607 | if( !slash ){ |
| 608 | while( n>1 && z[n-1]=='/' ){ n--; } |
| 609 | } |
| 610 | |
| 611 | /* Remove duplicate '/' characters. Except, two // at the beginning |
| 612 | ** of a pathname is allowed since this is important on windows. */ |
| @@ -835,11 +859,11 @@ | |
| 859 | if( zPwd[i]==0 ){ |
| 860 | blob_append(pOut, ".", 1); |
| 861 | }else{ |
| 862 | blob_append(pOut, "..", 2); |
| 863 | for(j=i+1; zPwd[j]; j++){ |
| 864 | if( zPwd[j]=='/' ){ |
| 865 | blob_append(pOut, "/..", 3); |
| 866 | } |
| 867 | } |
| 868 | } |
| 869 | return; |
| @@ -852,11 +876,11 @@ | |
| 876 | return; |
| 877 | } |
| 878 | while( zPath[i-1]!='/' ){ i--; } |
| 879 | blob_set(&tmp, "../"); |
| 880 | for(j=i; zPwd[j]; j++){ |
| 881 | if( zPwd[j]=='/' ){ |
| 882 | blob_append(&tmp, "../", 3); |
| 883 | } |
| 884 | } |
| 885 | blob_append(&tmp, &zPath[i], -1); |
| 886 | blob_reset(pOut); |
| 887 |
+52
-28
| --- src/file.c | ||
| +++ src/file.c | ||
| @@ -490,44 +490,68 @@ | ||
| 490 | 490 | ** * Does not contain two or more "/" characters in a row. |
| 491 | 491 | ** * Contains at least one character |
| 492 | 492 | ** |
| 493 | 493 | ** Invalid UTF8 characters result in a false return if bStrictUtf8 is |
| 494 | 494 | ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently |
| 495 | -** ignored. | |
| 495 | +** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences | |
| 496 | +** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters) | |
| 497 | +** | |
| 498 | +** The bStrictUtf8 flag is true for new inputs, but is false when parsing | |
| 499 | +** legacy manifests, for backwards compatibility. | |
| 496 | 500 | */ |
| 497 | 501 | int file_is_simple_pathname(const char *z, int bStrictUtf8){ |
| 498 | 502 | int i; |
| 499 | - char c = z[0]; | |
| 503 | + unsigned char c = (unsigned char) z[0]; | |
| 500 | 504 | char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; |
| 501 | 505 | if( c=='/' || c==0 ) return 0; |
| 502 | 506 | if( c=='.' ){ |
| 503 | 507 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 504 | 508 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 505 | 509 | } |
| 506 | - for(i=0; (c=z[i])!=0; i++){ | |
| 507 | - if( c & maskNonAscii ){ | |
| 508 | - if( (c & 0xf0) == 0xf0 ) { | |
| 509 | - /* Unicode characters > U+FFFF are not supported. | |
| 510 | - * Windows XP and earlier cannot handle them. | |
| 511 | - */ | |
| 512 | - return 0; | |
| 513 | - } | |
| 514 | - if( (c & 0xf0) == 0xe0 ) { | |
| 515 | - /* This is a 3-byte UTF-8 character */ | |
| 516 | - if ( (c & 0xfe) == 0xee ){ | |
| 517 | - /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */ | |
| 518 | - if ( !(c & 1) || ((z[i+1] & 0xff) < 0xa4) ){ | |
| 519 | - /* Unicode character in the range U+E000 - U+F8FF are for | |
| 520 | - * private use, they shouldn't occur in filenames. */ | |
| 521 | - return 0; | |
| 522 | - } | |
| 523 | - }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){ | |
| 524 | - /* Unicode character in the range U+D800 - U+DFFF are for | |
| 525 | - * surrogate pairs, they shouldn't occur in filenames. */ | |
| 526 | - return 0; | |
| 527 | - } | |
| 528 | - } | |
| 510 | + for(i=0; (c=(unsigned char)z[i])!=0; i++){ | |
| 511 | + if( c & maskNonAscii ){ | |
| 512 | + if( c<0xc2 ){ | |
| 513 | + /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ | |
| 514 | + return 0; | |
| 515 | + }else if( (c&0xe0)==0xe0 ){ | |
| 516 | + /* 3-byte or more */ | |
| 517 | + int unicode; | |
| 518 | + if( c&0x10 ){ | |
| 519 | + /* Unicode characters > U+FFFF are not supported. | |
| 520 | + * Windows XP and earlier cannot handle them. | |
| 521 | + */ | |
| 522 | + return 0; | |
| 523 | + } | |
| 524 | + /* This is a 3-byte UTF-8 character */ | |
| 525 | + unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); | |
| 526 | + if( unicode <= 0x07ff ){ | |
| 527 | + /* overlong form */ | |
| 528 | + return 0; | |
| 529 | + }else if( unicode>=0xe000 ){ | |
| 530 | + /* U+E000..U+FFFF */ | |
| 531 | + if( (unicode<=0xf8ff) || (unicode>=0xfffe) ){ | |
| 532 | + /* U+E000..U+F8FF are for private use. | |
| 533 | + * U+FFFE..U+FFFF are noncharacters. */ | |
| 534 | + return 0; | |
| 535 | + } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ | |
| 536 | + /* U+FDD0..U+FDEF are noncharacters. */ | |
| 537 | + return 0; | |
| 538 | + } | |
| 539 | + }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ | |
| 540 | + /* U+D800..U+DFFF are for surrogate pairs. */ | |
| 541 | + return 0; | |
| 542 | + } | |
| 543 | + } | |
| 544 | + do{ | |
| 545 | + if( (z[i+1]&0xc0)!=0x80 ){ | |
| 546 | + /* Invalid continuation byte (multi-byte UTF-8) */ | |
| 547 | + return 0; | |
| 548 | + } | |
| 549 | + /* The hi-bits of c are used to keep track of the number of expected | |
| 550 | + * continuation-bytes, so we don't need a separate counter. */ | |
| 551 | + c<<=1; ++i; | |
| 552 | + }while( c>=0xc0 ); | |
| 529 | 553 | }else if( c=='\\' ){ |
| 530 | 554 | return 0; |
| 531 | 555 | } |
| 532 | 556 | if( c=='/' ){ |
| 533 | 557 | if( z[i+1]=='/' ) return 0; |
| @@ -578,11 +602,11 @@ | ||
| 578 | 602 | if( z[i]=='\\' ) z[i] = '/'; |
| 579 | 603 | } |
| 580 | 604 | #endif |
| 581 | 605 | |
| 582 | 606 | /* Removing trailing "/" characters */ |
| 583 | - if ( !slash ){ | |
| 607 | + if( !slash ){ | |
| 584 | 608 | while( n>1 && z[n-1]=='/' ){ n--; } |
| 585 | 609 | } |
| 586 | 610 | |
| 587 | 611 | /* Remove duplicate '/' characters. Except, two // at the beginning |
| 588 | 612 | ** of a pathname is allowed since this is important on windows. */ |
| @@ -835,11 +859,11 @@ | ||
| 835 | 859 | if( zPwd[i]==0 ){ |
| 836 | 860 | blob_append(pOut, ".", 1); |
| 837 | 861 | }else{ |
| 838 | 862 | blob_append(pOut, "..", 2); |
| 839 | 863 | for(j=i+1; zPwd[j]; j++){ |
| 840 | - if( zPwd[j]=='/' ) { | |
| 864 | + if( zPwd[j]=='/' ){ | |
| 841 | 865 | blob_append(pOut, "/..", 3); |
| 842 | 866 | } |
| 843 | 867 | } |
| 844 | 868 | } |
| 845 | 869 | return; |
| @@ -852,11 +876,11 @@ | ||
| 852 | 876 | return; |
| 853 | 877 | } |
| 854 | 878 | while( zPath[i-1]!='/' ){ i--; } |
| 855 | 879 | blob_set(&tmp, "../"); |
| 856 | 880 | for(j=i; zPwd[j]; j++){ |
| 857 | - if( zPwd[j]=='/' ) { | |
| 881 | + if( zPwd[j]=='/' ){ | |
| 858 | 882 | blob_append(&tmp, "../", 3); |
| 859 | 883 | } |
| 860 | 884 | } |
| 861 | 885 | blob_append(&tmp, &zPath[i], -1); |
| 862 | 886 | blob_reset(pOut); |
| 863 | 887 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -490,44 +490,68 @@ | |
| 490 | ** * Does not contain two or more "/" characters in a row. |
| 491 | ** * Contains at least one character |
| 492 | ** |
| 493 | ** Invalid UTF8 characters result in a false return if bStrictUtf8 is |
| 494 | ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently |
| 495 | ** ignored. |
| 496 | */ |
| 497 | int file_is_simple_pathname(const char *z, int bStrictUtf8){ |
| 498 | int i; |
| 499 | char c = z[0]; |
| 500 | char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; |
| 501 | if( c=='/' || c==0 ) return 0; |
| 502 | if( c=='.' ){ |
| 503 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 504 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 505 | } |
| 506 | for(i=0; (c=z[i])!=0; i++){ |
| 507 | if( c & maskNonAscii ){ |
| 508 | if( (c & 0xf0) == 0xf0 ) { |
| 509 | /* Unicode characters > U+FFFF are not supported. |
| 510 | * Windows XP and earlier cannot handle them. |
| 511 | */ |
| 512 | return 0; |
| 513 | } |
| 514 | if( (c & 0xf0) == 0xe0 ) { |
| 515 | /* This is a 3-byte UTF-8 character */ |
| 516 | if ( (c & 0xfe) == 0xee ){ |
| 517 | /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */ |
| 518 | if ( !(c & 1) || ((z[i+1] & 0xff) < 0xa4) ){ |
| 519 | /* Unicode character in the range U+E000 - U+F8FF are for |
| 520 | * private use, they shouldn't occur in filenames. */ |
| 521 | return 0; |
| 522 | } |
| 523 | }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){ |
| 524 | /* Unicode character in the range U+D800 - U+DFFF are for |
| 525 | * surrogate pairs, they shouldn't occur in filenames. */ |
| 526 | return 0; |
| 527 | } |
| 528 | } |
| 529 | }else if( c=='\\' ){ |
| 530 | return 0; |
| 531 | } |
| 532 | if( c=='/' ){ |
| 533 | if( z[i+1]=='/' ) return 0; |
| @@ -578,11 +602,11 @@ | |
| 578 | if( z[i]=='\\' ) z[i] = '/'; |
| 579 | } |
| 580 | #endif |
| 581 | |
| 582 | /* Removing trailing "/" characters */ |
| 583 | if ( !slash ){ |
| 584 | while( n>1 && z[n-1]=='/' ){ n--; } |
| 585 | } |
| 586 | |
| 587 | /* Remove duplicate '/' characters. Except, two // at the beginning |
| 588 | ** of a pathname is allowed since this is important on windows. */ |
| @@ -835,11 +859,11 @@ | |
| 835 | if( zPwd[i]==0 ){ |
| 836 | blob_append(pOut, ".", 1); |
| 837 | }else{ |
| 838 | blob_append(pOut, "..", 2); |
| 839 | for(j=i+1; zPwd[j]; j++){ |
| 840 | if( zPwd[j]=='/' ) { |
| 841 | blob_append(pOut, "/..", 3); |
| 842 | } |
| 843 | } |
| 844 | } |
| 845 | return; |
| @@ -852,11 +876,11 @@ | |
| 852 | return; |
| 853 | } |
| 854 | while( zPath[i-1]!='/' ){ i--; } |
| 855 | blob_set(&tmp, "../"); |
| 856 | for(j=i; zPwd[j]; j++){ |
| 857 | if( zPwd[j]=='/' ) { |
| 858 | blob_append(&tmp, "../", 3); |
| 859 | } |
| 860 | } |
| 861 | blob_append(&tmp, &zPath[i], -1); |
| 862 | blob_reset(pOut); |
| 863 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -490,44 +490,68 @@ | |
| 490 | ** * Does not contain two or more "/" characters in a row. |
| 491 | ** * Contains at least one character |
| 492 | ** |
| 493 | ** Invalid UTF8 characters result in a false return if bStrictUtf8 is |
| 494 | ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently |
| 495 | ** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences |
| 496 | ** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters) |
| 497 | ** |
| 498 | ** The bStrictUtf8 flag is true for new inputs, but is false when parsing |
| 499 | ** legacy manifests, for backwards compatibility. |
| 500 | */ |
| 501 | int file_is_simple_pathname(const char *z, int bStrictUtf8){ |
| 502 | int i; |
| 503 | unsigned char c = (unsigned char) z[0]; |
| 504 | char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00; |
| 505 | if( c=='/' || c==0 ) return 0; |
| 506 | if( c=='.' ){ |
| 507 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 508 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 509 | } |
| 510 | for(i=0; (c=(unsigned char)z[i])!=0; i++){ |
| 511 | if( c & maskNonAscii ){ |
| 512 | if( c<0xc2 ){ |
| 513 | /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ |
| 514 | return 0; |
| 515 | }else if( (c&0xe0)==0xe0 ){ |
| 516 | /* 3-byte or more */ |
| 517 | int unicode; |
| 518 | if( c&0x10 ){ |
| 519 | /* Unicode characters > U+FFFF are not supported. |
| 520 | * Windows XP and earlier cannot handle them. |
| 521 | */ |
| 522 | return 0; |
| 523 | } |
| 524 | /* This is a 3-byte UTF-8 character */ |
| 525 | unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); |
| 526 | if( unicode <= 0x07ff ){ |
| 527 | /* overlong form */ |
| 528 | return 0; |
| 529 | }else if( unicode>=0xe000 ){ |
| 530 | /* U+E000..U+FFFF */ |
| 531 | if( (unicode<=0xf8ff) || (unicode>=0xfffe) ){ |
| 532 | /* U+E000..U+F8FF are for private use. |
| 533 | * U+FFFE..U+FFFF are noncharacters. */ |
| 534 | return 0; |
| 535 | } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ |
| 536 | /* U+FDD0..U+FDEF are noncharacters. */ |
| 537 | return 0; |
| 538 | } |
| 539 | }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ |
| 540 | /* U+D800..U+DFFF are for surrogate pairs. */ |
| 541 | return 0; |
| 542 | } |
| 543 | } |
| 544 | do{ |
| 545 | if( (z[i+1]&0xc0)!=0x80 ){ |
| 546 | /* Invalid continuation byte (multi-byte UTF-8) */ |
| 547 | return 0; |
| 548 | } |
| 549 | /* The hi-bits of c are used to keep track of the number of expected |
| 550 | * continuation-bytes, so we don't need a separate counter. */ |
| 551 | c<<=1; ++i; |
| 552 | }while( c>=0xc0 ); |
| 553 | }else if( c=='\\' ){ |
| 554 | return 0; |
| 555 | } |
| 556 | if( c=='/' ){ |
| 557 | if( z[i+1]=='/' ) return 0; |
| @@ -578,11 +602,11 @@ | |
| 602 | if( z[i]=='\\' ) z[i] = '/'; |
| 603 | } |
| 604 | #endif |
| 605 | |
| 606 | /* Removing trailing "/" characters */ |
| 607 | if( !slash ){ |
| 608 | while( n>1 && z[n-1]=='/' ){ n--; } |
| 609 | } |
| 610 | |
| 611 | /* Remove duplicate '/' characters. Except, two // at the beginning |
| 612 | ** of a pathname is allowed since this is important on windows. */ |
| @@ -835,11 +859,11 @@ | |
| 859 | if( zPwd[i]==0 ){ |
| 860 | blob_append(pOut, ".", 1); |
| 861 | }else{ |
| 862 | blob_append(pOut, "..", 2); |
| 863 | for(j=i+1; zPwd[j]; j++){ |
| 864 | if( zPwd[j]=='/' ){ |
| 865 | blob_append(pOut, "/..", 3); |
| 866 | } |
| 867 | } |
| 868 | } |
| 869 | return; |
| @@ -852,11 +876,11 @@ | |
| 876 | return; |
| 877 | } |
| 878 | while( zPath[i-1]!='/' ){ i--; } |
| 879 | blob_set(&tmp, "../"); |
| 880 | for(j=i; zPwd[j]; j++){ |
| 881 | if( zPwd[j]=='/' ){ |
| 882 | blob_append(&tmp, "../", 3); |
| 883 | } |
| 884 | } |
| 885 | blob_append(&tmp, &zPath[i], -1); |
| 886 | blob_reset(pOut); |
| 887 |