Fossil SCM

Further simplification of continuation byte checkin in filenames

jan.nijtmans 2013-01-24 10:18 trunk
Commit a5cd2dd64eb8e6207e346ea93c257f17bc1a59fa
1 file changed +9 -10
+9 -10
--- src/file.c
+++ src/file.c
@@ -507,10 +507,14 @@
507507
if( z[1]=='/' || z[1]==0 ) return 0;
508508
if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
509509
}
510510
for(i=0; (c=(unsigned char)z[i])!=0; i++){
511511
if( c & maskNonAscii ){
512
+ if( (z[++i]&0xc0)!=0x80 ){
513
+ /* Invalid first continuation byte */
514
+ return 0;
515
+ }
512516
if( c<0xc2 ){
513517
/* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
514518
return 0;
515519
}else if( (c&0xe0)==0xe0 ){
516520
/* 3-byte or more */
@@ -520,11 +524,11 @@
520524
* Windows XP and earlier cannot handle them.
521525
*/
522526
return 0;
523527
}
524528
/* This is a 3-byte UTF-8 character */
525
- unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f);
529
+ unicode = ((c&0x0f)<<12) + ((z[i]&0x3f)<<6) + (z[i+1]&0x3f);
526530
if( unicode <= 0x07ff ){
527531
/* overlong form */
528532
return 0;
529533
}else if( unicode>=0xe000 ){
530534
/* U+E000..U+FFFF */
@@ -534,24 +538,19 @@
534538
return 0;
535539
} else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
536540
/* U+FDD0..U+FDEF are noncharacters. */
537541
return 0;
538542
}
539
- }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){
543
+ }else if( (unicode>=0xd800) && (unicode<=0xdfff) ){
540544
/* U+D800..U+DFFF are for surrogate pairs. */
541545
return 0;
542546
}
543
- }
544
- do{
545
- if( (z[i+1]&0xc0)!=0x80 ){
546
- /* Invalid continuation byte (multi-byte UTF-8) */
547
+ if( (z[++i]&0xc0)!=0x80 ){
548
+ /* Invalid second continuation byte */
547549
return 0;
548550
}
549
- /* The hi-bits of c are used to keep track of the number of expected
550
- * continuation-bytes, so we don't need a separate counter. */
551
- c<<=1; ++i;
552
- }while( c>=0xc0 );
551
+ }
553552
}else if( c=='\\' ){
554553
return 0;
555554
}
556555
if( c=='/' ){
557556
if( z[i+1]=='/' ) return 0;
558557
--- src/file.c
+++ src/file.c
@@ -507,10 +507,14 @@
507 if( z[1]=='/' || z[1]==0 ) return 0;
508 if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
509 }
510 for(i=0; (c=(unsigned char)z[i])!=0; i++){
511 if( c & maskNonAscii ){
 
 
 
 
512 if( c<0xc2 ){
513 /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
514 return 0;
515 }else if( (c&0xe0)==0xe0 ){
516 /* 3-byte or more */
@@ -520,11 +524,11 @@
520 * Windows XP and earlier cannot handle them.
521 */
522 return 0;
523 }
524 /* This is a 3-byte UTF-8 character */
525 unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f);
526 if( unicode <= 0x07ff ){
527 /* overlong form */
528 return 0;
529 }else if( unicode>=0xe000 ){
530 /* U+E000..U+FFFF */
@@ -534,24 +538,19 @@
534 return 0;
535 } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
536 /* U+FDD0..U+FDEF are noncharacters. */
537 return 0;
538 }
539 }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){
540 /* U+D800..U+DFFF are for surrogate pairs. */
541 return 0;
542 }
543 }
544 do{
545 if( (z[i+1]&0xc0)!=0x80 ){
546 /* Invalid continuation byte (multi-byte UTF-8) */
547 return 0;
548 }
549 /* The hi-bits of c are used to keep track of the number of expected
550 * continuation-bytes, so we don't need a separate counter. */
551 c<<=1; ++i;
552 }while( c>=0xc0 );
553 }else if( c=='\\' ){
554 return 0;
555 }
556 if( c=='/' ){
557 if( z[i+1]=='/' ) return 0;
558
--- src/file.c
+++ src/file.c
@@ -507,10 +507,14 @@
507 if( z[1]=='/' || z[1]==0 ) return 0;
508 if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
509 }
510 for(i=0; (c=(unsigned char)z[i])!=0; i++){
511 if( c & maskNonAscii ){
512 if( (z[++i]&0xc0)!=0x80 ){
513 /* Invalid first continuation byte */
514 return 0;
515 }
516 if( c<0xc2 ){
517 /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
518 return 0;
519 }else if( (c&0xe0)==0xe0 ){
520 /* 3-byte or more */
@@ -520,11 +524,11 @@
524 * Windows XP and earlier cannot handle them.
525 */
526 return 0;
527 }
528 /* This is a 3-byte UTF-8 character */
529 unicode = ((c&0x0f)<<12) + ((z[i]&0x3f)<<6) + (z[i+1]&0x3f);
530 if( unicode <= 0x07ff ){
531 /* overlong form */
532 return 0;
533 }else if( unicode>=0xe000 ){
534 /* U+E000..U+FFFF */
@@ -534,24 +538,19 @@
538 return 0;
539 } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
540 /* U+FDD0..U+FDEF are noncharacters. */
541 return 0;
542 }
543 }else if( (unicode>=0xd800) && (unicode<=0xdfff) ){
544 /* U+D800..U+DFFF are for surrogate pairs. */
545 return 0;
546 }
547 if( (z[++i]&0xc0)!=0x80 ){
548 /* Invalid second continuation byte */
 
 
549 return 0;
550 }
551 }
 
 
 
552 }else if( c=='\\' ){
553 return 0;
554 }
555 if( c=='/' ){
556 if( z[i+1]=='/' ) return 0;
557

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button