Fossil SCM
Further simplification of continuation byte checkin in filenames
Commit
a5cd2dd64eb8e6207e346ea93c257f17bc1a59fa
Parent
769c90a2300cb80…
1 file changed
+9
-10
+9
-10
| --- src/file.c | ||
| +++ src/file.c | ||
| @@ -507,10 +507,14 @@ | ||
| 507 | 507 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 508 | 508 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 509 | 509 | } |
| 510 | 510 | for(i=0; (c=(unsigned char)z[i])!=0; i++){ |
| 511 | 511 | if( c & maskNonAscii ){ |
| 512 | + if( (z[++i]&0xc0)!=0x80 ){ | |
| 513 | + /* Invalid first continuation byte */ | |
| 514 | + return 0; | |
| 515 | + } | |
| 512 | 516 | if( c<0xc2 ){ |
| 513 | 517 | /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ |
| 514 | 518 | return 0; |
| 515 | 519 | }else if( (c&0xe0)==0xe0 ){ |
| 516 | 520 | /* 3-byte or more */ |
| @@ -520,11 +524,11 @@ | ||
| 520 | 524 | * Windows XP and earlier cannot handle them. |
| 521 | 525 | */ |
| 522 | 526 | return 0; |
| 523 | 527 | } |
| 524 | 528 | /* This is a 3-byte UTF-8 character */ |
| 525 | - unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); | |
| 529 | + unicode = ((c&0x0f)<<12) + ((z[i]&0x3f)<<6) + (z[i+1]&0x3f); | |
| 526 | 530 | if( unicode <= 0x07ff ){ |
| 527 | 531 | /* overlong form */ |
| 528 | 532 | return 0; |
| 529 | 533 | }else if( unicode>=0xe000 ){ |
| 530 | 534 | /* U+E000..U+FFFF */ |
| @@ -534,24 +538,19 @@ | ||
| 534 | 538 | return 0; |
| 535 | 539 | } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ |
| 536 | 540 | /* U+FDD0..U+FDEF are noncharacters. */ |
| 537 | 541 | return 0; |
| 538 | 542 | } |
| 539 | - }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ | |
| 543 | + }else if( (unicode>=0xd800) && (unicode<=0xdfff) ){ | |
| 540 | 544 | /* U+D800..U+DFFF are for surrogate pairs. */ |
| 541 | 545 | return 0; |
| 542 | 546 | } |
| 543 | - } | |
| 544 | - do{ | |
| 545 | - if( (z[i+1]&0xc0)!=0x80 ){ | |
| 546 | - /* Invalid continuation byte (multi-byte UTF-8) */ | |
| 547 | + if( (z[++i]&0xc0)!=0x80 ){ | |
| 548 | + /* Invalid second continuation byte */ | |
| 547 | 549 | return 0; |
| 548 | 550 | } |
| 549 | - /* The hi-bits of c are used to keep track of the number of expected | |
| 550 | - * continuation-bytes, so we don't need a separate counter. */ | |
| 551 | - c<<=1; ++i; | |
| 552 | - }while( c>=0xc0 ); | |
| 551 | + } | |
| 553 | 552 | }else if( c=='\\' ){ |
| 554 | 553 | return 0; |
| 555 | 554 | } |
| 556 | 555 | if( c=='/' ){ |
| 557 | 556 | if( z[i+1]=='/' ) return 0; |
| 558 | 557 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -507,10 +507,14 @@ | |
| 507 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 508 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 509 | } |
| 510 | for(i=0; (c=(unsigned char)z[i])!=0; i++){ |
| 511 | if( c & maskNonAscii ){ |
| 512 | if( c<0xc2 ){ |
| 513 | /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ |
| 514 | return 0; |
| 515 | }else if( (c&0xe0)==0xe0 ){ |
| 516 | /* 3-byte or more */ |
| @@ -520,11 +524,11 @@ | |
| 520 | * Windows XP and earlier cannot handle them. |
| 521 | */ |
| 522 | return 0; |
| 523 | } |
| 524 | /* This is a 3-byte UTF-8 character */ |
| 525 | unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f); |
| 526 | if( unicode <= 0x07ff ){ |
| 527 | /* overlong form */ |
| 528 | return 0; |
| 529 | }else if( unicode>=0xe000 ){ |
| 530 | /* U+E000..U+FFFF */ |
| @@ -534,24 +538,19 @@ | |
| 534 | return 0; |
| 535 | } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ |
| 536 | /* U+FDD0..U+FDEF are noncharacters. */ |
| 537 | return 0; |
| 538 | } |
| 539 | }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){ |
| 540 | /* U+D800..U+DFFF are for surrogate pairs. */ |
| 541 | return 0; |
| 542 | } |
| 543 | } |
| 544 | do{ |
| 545 | if( (z[i+1]&0xc0)!=0x80 ){ |
| 546 | /* Invalid continuation byte (multi-byte UTF-8) */ |
| 547 | return 0; |
| 548 | } |
| 549 | /* The hi-bits of c are used to keep track of the number of expected |
| 550 | * continuation-bytes, so we don't need a separate counter. */ |
| 551 | c<<=1; ++i; |
| 552 | }while( c>=0xc0 ); |
| 553 | }else if( c=='\\' ){ |
| 554 | return 0; |
| 555 | } |
| 556 | if( c=='/' ){ |
| 557 | if( z[i+1]=='/' ) return 0; |
| 558 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -507,10 +507,14 @@ | |
| 507 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 508 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 509 | } |
| 510 | for(i=0; (c=(unsigned char)z[i])!=0; i++){ |
| 511 | if( c & maskNonAscii ){ |
| 512 | if( (z[++i]&0xc0)!=0x80 ){ |
| 513 | /* Invalid first continuation byte */ |
| 514 | return 0; |
| 515 | } |
| 516 | if( c<0xc2 ){ |
| 517 | /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */ |
| 518 | return 0; |
| 519 | }else if( (c&0xe0)==0xe0 ){ |
| 520 | /* 3-byte or more */ |
| @@ -520,11 +524,11 @@ | |
| 524 | * Windows XP and earlier cannot handle them. |
| 525 | */ |
| 526 | return 0; |
| 527 | } |
| 528 | /* This is a 3-byte UTF-8 character */ |
| 529 | unicode = ((c&0x0f)<<12) + ((z[i]&0x3f)<<6) + (z[i+1]&0x3f); |
| 530 | if( unicode <= 0x07ff ){ |
| 531 | /* overlong form */ |
| 532 | return 0; |
| 533 | }else if( unicode>=0xe000 ){ |
| 534 | /* U+E000..U+FFFF */ |
| @@ -534,24 +538,19 @@ | |
| 538 | return 0; |
| 539 | } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){ |
| 540 | /* U+FDD0..U+FDEF are noncharacters. */ |
| 541 | return 0; |
| 542 | } |
| 543 | }else if( (unicode>=0xd800) && (unicode<=0xdfff) ){ |
| 544 | /* U+D800..U+DFFF are for surrogate pairs. */ |
| 545 | return 0; |
| 546 | } |
| 547 | if( (z[++i]&0xc0)!=0x80 ){ |
| 548 | /* Invalid second continuation byte */ |
| 549 | return 0; |
| 550 | } |
| 551 | } |
| 552 | }else if( c=='\\' ){ |
| 553 | return 0; |
| 554 | } |
| 555 | if( c=='/' ){ |
| 556 | if( z[i+1]=='/' ) return 0; |
| 557 |