Fossil SCM

Further fine-tuning of the check for valid UTF8 characters in filenames.

drh 2013-01-23 13:15 trunk merge

Commit 4d456c9fd17bd289fd38b9b09d4e19a2c1af3183

Parent 3104348ec53e670…

2 files changed +52 -28 +52 -28

M src/file.c

+52 -28

		--- src/file.c
		+++ src/file.c
		@@ -490,44 +490,68 @@
490	490	** * Does not contain two or more "/" characters in a row.
491	491	** * Contains at least one character
492	492	**
493	493	** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494	494	** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495		-** ignored.
	495	+** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
	496	+** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters)
	497	+**
	498	+** The bStrictUtf8 flag is true for new inputs, but is false when parsing
	499	+** legacy manifests, for backwards compatibility.
496	500	*/
497	501	int file_is_simple_pathname(const char *z, int bStrictUtf8){
498	502	int i;
499		- char c = z[0];
	503	+ unsigned char c = (unsigned char) z[0];
500	504	char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
501	505	if( c=='/' \|\| c==0 ) return 0;
502	506	if( c=='.' ){
503	507	if( z[1]=='/' \|\| z[1]==0 ) return 0;
504	508	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
505	509	}
506		- for(i=0; (c=z[i])!=0; i++){
507		- if( c & maskNonAscii ){
508		- if( (c & 0xf0) == 0xf0 ) {
509		- /* Unicode characters > U+FFFF are not supported.
510		- * Windows XP and earlier cannot handle them.
511		- */
512		- return 0;
513		- }
514		- if( (c & 0xf0) == 0xe0 ) {
515		- /* This is a 3-byte UTF-8 character */
516		- if ( (c & 0xfe) == 0xee ){
517		- /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
518		- if ( !(c & 1) \|\| ((z[i+1] & 0xff) < 0xa4) ){
519		- /* Unicode character in the range U+E000 - U+F8FF are for
520		- * private use, they shouldn't occur in filenames. */
521		- return 0;
522		- }
523		- }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
524		- /* Unicode character in the range U+D800 - U+DFFF are for
525		- * surrogate pairs, they shouldn't occur in filenames. */
526		- return 0;
527		- }
528		- }
	510	+ for(i=0; (c=(unsigned char)z[i])!=0; i++){
	511	+ if( c & maskNonAscii ){
	512	+ if( c<0xc2 ){
	513	+ /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
	514	+ return 0;
	515	+ }else if( (c&0xe0)==0xe0 ){
	516	+ /* 3-byte or more */
	517	+ int unicode;
	518	+ if( c&0x10 ){
	519	+ /* Unicode characters > U+FFFF are not supported.
	520	+ * Windows XP and earlier cannot handle them.
	521	+ */
	522	+ return 0;
	523	+ }
	524	+ /* This is a 3-byte UTF-8 character */
	525	+ unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f);
	526	+ if( unicode <= 0x07ff ){
	527	+ /* overlong form */
	528	+ return 0;
	529	+ }else if( unicode>=0xe000 ){
	530	+ /* U+E000..U+FFFF */
	531	+ if( (unicode<=0xf8ff) \|\| (unicode>=0xfffe) ){
	532	+ /* U+E000..U+F8FF are for private use.
	533	+ * U+FFFE..U+FFFF are noncharacters. */
	534	+ return 0;
	535	+ } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
	536	+ /* U+FDD0..U+FDEF are noncharacters. */
	537	+ return 0;
	538	+ }
	539	+ }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){
	540	+ /* U+D800..U+DFFF are for surrogate pairs. */
	541	+ return 0;
	542	+ }
	543	+ }
	544	+ do{
	545	+ if( (z[i+1]&0xc0)!=0x80 ){
	546	+ /* Invalid continuation byte (multi-byte UTF-8) */
	547	+ return 0;
	548	+ }
	549	+ /* The hi-bits of c are used to keep track of the number of expected
	550	+ * continuation-bytes, so we don't need a separate counter. */
	551	+ c<<=1; ++i;
	552	+ }while( c>=0xc0 );
529	553	}else if( c=='\\' ){
530	554	return 0;
531	555	}
532	556	if( c=='/' ){
533	557	if( z[i+1]=='/' ) return 0;
		@@ -578,11 +602,11 @@
578	602	if( z[i]=='\\' ) z[i] = '/';
579	603	}
580	604	#endif
581	605
582	606	/* Removing trailing "/" characters */
583		- if ( !slash ){
	607	+ if( !slash ){
584	608	while( n>1 && z[n-1]=='/' ){ n--; }
585	609	}
586	610
587	611	/* Remove duplicate '/' characters. Except, two // at the beginning
588	612	** of a pathname is allowed since this is important on windows. */
		@@ -835,11 +859,11 @@
835	859	if( zPwd[i]==0 ){
836	860	blob_append(pOut, ".", 1);
837	861	}else{
838	862	blob_append(pOut, "..", 2);
839	863	for(j=i+1; zPwd[j]; j++){
840		- if( zPwd[j]=='/' ) {
	864	+ if( zPwd[j]=='/' ){
841	865	blob_append(pOut, "/..", 3);
842	866	}
843	867	}
844	868	}
845	869	return;
		@@ -852,11 +876,11 @@
852	876	return;
853	877	}
854	878	while( zPath[i-1]!='/' ){ i--; }
855	879	blob_set(&tmp, "../");
856	880	for(j=i; zPwd[j]; j++){
857		- if( zPwd[j]=='/' ) {
	881	+ if( zPwd[j]=='/' ){
858	882	blob_append(&tmp, "../", 3);
859	883	}
860	884	}
861	885	blob_append(&tmp, &zPath[i], -1);
862	886	blob_reset(pOut);
863	887

	--- src/file.c
	+++ src/file.c
	@@ -490,44 +490,68 @@
490	** * Does not contain two or more "/" characters in a row.
491	** * Contains at least one character
492	**
493	** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494	** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495	** ignored.




496	*/
497	int file_is_simple_pathname(const char *z, int bStrictUtf8){
498	int i;
499	char c = z[0];
500	char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
501	if( c=='/' \|\| c==0 ) return 0;
502	if( c=='.' ){
503	if( z[1]=='/' \|\| z[1]==0 ) return 0;
504	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
505	}
506	for(i=0; (c=z[i])!=0; i++){
507	if( c & maskNonAscii ){
508	if( (c & 0xf0) == 0xf0 ) {
509	/* Unicode characters > U+FFFF are not supported.
510	* Windows XP and earlier cannot handle them.
511	*/
512	return 0;
513	}
514	if( (c & 0xf0) == 0xe0 ) {
515	/* This is a 3-byte UTF-8 character */
516	if ( (c & 0xfe) == 0xee ){
517	/* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
518	if ( !(c & 1) \|\| ((z[i+1] & 0xff) < 0xa4) ){
519	/* Unicode character in the range U+E000 - U+F8FF are for
520	* private use, they shouldn't occur in filenames. */
521	return 0;
522	}
523	}else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
524	/* Unicode character in the range U+D800 - U+DFFF are for
525	* surrogate pairs, they shouldn't occur in filenames. */
526	return 0;
527	}
528	}




















529	}else if( c=='\\' ){
530	return 0;
531	}
532	if( c=='/' ){
533	if( z[i+1]=='/' ) return 0;
	@@ -578,11 +602,11 @@
578	if( z[i]=='\\' ) z[i] = '/';
579	}
580	#endif
581
582	/* Removing trailing "/" characters */
583	if ( !slash ){
584	while( n>1 && z[n-1]=='/' ){ n--; }
585	}
586
587	/* Remove duplicate '/' characters. Except, two // at the beginning
588	** of a pathname is allowed since this is important on windows. */
	@@ -835,11 +859,11 @@
835	if( zPwd[i]==0 ){
836	blob_append(pOut, ".", 1);
837	}else{
838	blob_append(pOut, "..", 2);
839	for(j=i+1; zPwd[j]; j++){
840	if( zPwd[j]=='/' ) {
841	blob_append(pOut, "/..", 3);
842	}
843	}
844	}
845	return;
	@@ -852,11 +876,11 @@
852	return;
853	}
854	while( zPath[i-1]!='/' ){ i--; }
855	blob_set(&tmp, "../");
856	for(j=i; zPwd[j]; j++){
857	if( zPwd[j]=='/' ) {
858	blob_append(&tmp, "../", 3);
859	}
860	}
861	blob_append(&tmp, &zPath[i], -1);
862	blob_reset(pOut);
863

	--- src/file.c
	+++ src/file.c
	@@ -490,44 +490,68 @@
490	** * Does not contain two or more "/" characters in a row.
491	** * Contains at least one character
492	**
493	** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494	** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495	** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
496	** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters)
497	**
498	** The bStrictUtf8 flag is true for new inputs, but is false when parsing
499	** legacy manifests, for backwards compatibility.
500	*/
501	int file_is_simple_pathname(const char *z, int bStrictUtf8){
502	int i;
503	unsigned char c = (unsigned char) z[0];
504	char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
505	if( c=='/' \|\| c==0 ) return 0;
506	if( c=='.' ){
507	if( z[1]=='/' \|\| z[1]==0 ) return 0;
508	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
509	}
510	for(i=0; (c=(unsigned char)z[i])!=0; i++){
511	if( c & maskNonAscii ){
512	if( c<0xc2 ){
513	/* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
514	return 0;
515	}else if( (c&0xe0)==0xe0 ){
516	/* 3-byte or more */
517	int unicode;
518	if( c&0x10 ){
519	/* Unicode characters > U+FFFF are not supported.
520	* Windows XP and earlier cannot handle them.
521	*/
522	return 0;
523	}
524	/* This is a 3-byte UTF-8 character */
525	unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f);
526	if( unicode <= 0x07ff ){
527	/* overlong form */
528	return 0;
529	}else if( unicode>=0xe000 ){
530	/* U+E000..U+FFFF */
531	if( (unicode<=0xf8ff) \|\| (unicode>=0xfffe) ){
532	/* U+E000..U+F8FF are for private use.
533	* U+FFFE..U+FFFF are noncharacters. */
534	return 0;
535	} else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
536	/* U+FDD0..U+FDEF are noncharacters. */
537	return 0;
538	}
539	}else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){
540	/* U+D800..U+DFFF are for surrogate pairs. */
541	return 0;
542	}
543	}
544	do{
545	if( (z[i+1]&0xc0)!=0x80 ){
546	/* Invalid continuation byte (multi-byte UTF-8) */
547	return 0;
548	}
549	/* The hi-bits of c are used to keep track of the number of expected
550	* continuation-bytes, so we don't need a separate counter. */
551	c<<=1; ++i;
552	}while( c>=0xc0 );
553	}else if( c=='\\' ){
554	return 0;
555	}
556	if( c=='/' ){
557	if( z[i+1]=='/' ) return 0;
	@@ -578,11 +602,11 @@
602	if( z[i]=='\\' ) z[i] = '/';
603	}
604	#endif
605
606	/* Removing trailing "/" characters */
607	if( !slash ){
608	while( n>1 && z[n-1]=='/' ){ n--; }
609	}
610
611	/* Remove duplicate '/' characters. Except, two // at the beginning
612	** of a pathname is allowed since this is important on windows. */
	@@ -835,11 +859,11 @@
859	if( zPwd[i]==0 ){
860	blob_append(pOut, ".", 1);
861	}else{
862	blob_append(pOut, "..", 2);
863	for(j=i+1; zPwd[j]; j++){
864	if( zPwd[j]=='/' ){
865	blob_append(pOut, "/..", 3);
866	}
867	}
868	}
869	return;
	@@ -852,11 +876,11 @@
876	return;
877	}
878	while( zPath[i-1]!='/' ){ i--; }
879	blob_set(&tmp, "../");
880	for(j=i; zPwd[j]; j++){
881	if( zPwd[j]=='/' ){
882	blob_append(&tmp, "../", 3);
883	}
884	}
885	blob_append(&tmp, &zPath[i], -1);
886	blob_reset(pOut);
887

M src/file.c

+52 -28

		--- src/file.c
		+++ src/file.c
		@@ -490,44 +490,68 @@
490	490	** * Does not contain two or more "/" characters in a row.
491	491	** * Contains at least one character
492	492	**
493	493	** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494	494	** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495		-** ignored.
	495	+** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
	496	+** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters)
	497	+**
	498	+** The bStrictUtf8 flag is true for new inputs, but is false when parsing
	499	+** legacy manifests, for backwards compatibility.
496	500	*/
497	501	int file_is_simple_pathname(const char *z, int bStrictUtf8){
498	502	int i;
499		- char c = z[0];
	503	+ unsigned char c = (unsigned char) z[0];
500	504	char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
501	505	if( c=='/' \|\| c==0 ) return 0;
502	506	if( c=='.' ){
503	507	if( z[1]=='/' \|\| z[1]==0 ) return 0;
504	508	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
505	509	}
506		- for(i=0; (c=z[i])!=0; i++){
507		- if( c & maskNonAscii ){
508		- if( (c & 0xf0) == 0xf0 ) {
509		- /* Unicode characters > U+FFFF are not supported.
510		- * Windows XP and earlier cannot handle them.
511		- */
512		- return 0;
513		- }
514		- if( (c & 0xf0) == 0xe0 ) {
515		- /* This is a 3-byte UTF-8 character */
516		- if ( (c & 0xfe) == 0xee ){
517		- /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
518		- if ( !(c & 1) \|\| ((z[i+1] & 0xff) < 0xa4) ){
519		- /* Unicode character in the range U+E000 - U+F8FF are for
520		- * private use, they shouldn't occur in filenames. */
521		- return 0;
522		- }
523		- }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
524		- /* Unicode character in the range U+D800 - U+DFFF are for
525		- * surrogate pairs, they shouldn't occur in filenames. */
526		- return 0;
527		- }
528		- }
	510	+ for(i=0; (c=(unsigned char)z[i])!=0; i++){
	511	+ if( c & maskNonAscii ){
	512	+ if( c<0xc2 ){
	513	+ /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
	514	+ return 0;
	515	+ }else if( (c&0xe0)==0xe0 ){
	516	+ /* 3-byte or more */
	517	+ int unicode;
	518	+ if( c&0x10 ){
	519	+ /* Unicode characters > U+FFFF are not supported.
	520	+ * Windows XP and earlier cannot handle them.
	521	+ */
	522	+ return 0;
	523	+ }
	524	+ /* This is a 3-byte UTF-8 character */
	525	+ unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f);
	526	+ if( unicode <= 0x07ff ){
	527	+ /* overlong form */
	528	+ return 0;
	529	+ }else if( unicode>=0xe000 ){
	530	+ /* U+E000..U+FFFF */
	531	+ if( (unicode<=0xf8ff) \|\| (unicode>=0xfffe) ){
	532	+ /* U+E000..U+F8FF are for private use.
	533	+ * U+FFFE..U+FFFF are noncharacters. */
	534	+ return 0;
	535	+ } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
	536	+ /* U+FDD0..U+FDEF are noncharacters. */
	537	+ return 0;
	538	+ }
	539	+ }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){
	540	+ /* U+D800..U+DFFF are for surrogate pairs. */
	541	+ return 0;
	542	+ }
	543	+ }
	544	+ do{
	545	+ if( (z[i+1]&0xc0)!=0x80 ){
	546	+ /* Invalid continuation byte (multi-byte UTF-8) */
	547	+ return 0;
	548	+ }
	549	+ /* The hi-bits of c are used to keep track of the number of expected
	550	+ * continuation-bytes, so we don't need a separate counter. */
	551	+ c<<=1; ++i;
	552	+ }while( c>=0xc0 );
529	553	}else if( c=='\\' ){
530	554	return 0;
531	555	}
532	556	if( c=='/' ){
533	557	if( z[i+1]=='/' ) return 0;
		@@ -578,11 +602,11 @@
578	602	if( z[i]=='\\' ) z[i] = '/';
579	603	}
580	604	#endif
581	605
582	606	/* Removing trailing "/" characters */
583		- if ( !slash ){
	607	+ if( !slash ){
584	608	while( n>1 && z[n-1]=='/' ){ n--; }
585	609	}
586	610
587	611	/* Remove duplicate '/' characters. Except, two // at the beginning
588	612	** of a pathname is allowed since this is important on windows. */
		@@ -835,11 +859,11 @@
835	859	if( zPwd[i]==0 ){
836	860	blob_append(pOut, ".", 1);
837	861	}else{
838	862	blob_append(pOut, "..", 2);
839	863	for(j=i+1; zPwd[j]; j++){
840		- if( zPwd[j]=='/' ) {
	864	+ if( zPwd[j]=='/' ){
841	865	blob_append(pOut, "/..", 3);
842	866	}
843	867	}
844	868	}
845	869	return;
		@@ -852,11 +876,11 @@
852	876	return;
853	877	}
854	878	while( zPath[i-1]!='/' ){ i--; }
855	879	blob_set(&tmp, "../");
856	880	for(j=i; zPwd[j]; j++){
857		- if( zPwd[j]=='/' ) {
	881	+ if( zPwd[j]=='/' ){
858	882	blob_append(&tmp, "../", 3);
859	883	}
860	884	}
861	885	blob_append(&tmp, &zPath[i], -1);
862	886	blob_reset(pOut);
863	887

	--- src/file.c
	+++ src/file.c
	@@ -490,44 +490,68 @@
490	** * Does not contain two or more "/" characters in a row.
491	** * Contains at least one character
492	**
493	** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494	** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495	** ignored.




496	*/
497	int file_is_simple_pathname(const char *z, int bStrictUtf8){
498	int i;
499	char c = z[0];
500	char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
501	if( c=='/' \|\| c==0 ) return 0;
502	if( c=='.' ){
503	if( z[1]=='/' \|\| z[1]==0 ) return 0;
504	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
505	}
506	for(i=0; (c=z[i])!=0; i++){
507	if( c & maskNonAscii ){
508	if( (c & 0xf0) == 0xf0 ) {
509	/* Unicode characters > U+FFFF are not supported.
510	* Windows XP and earlier cannot handle them.
511	*/
512	return 0;
513	}
514	if( (c & 0xf0) == 0xe0 ) {
515	/* This is a 3-byte UTF-8 character */
516	if ( (c & 0xfe) == 0xee ){
517	/* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
518	if ( !(c & 1) \|\| ((z[i+1] & 0xff) < 0xa4) ){
519	/* Unicode character in the range U+E000 - U+F8FF are for
520	* private use, they shouldn't occur in filenames. */
521	return 0;
522	}
523	}else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
524	/* Unicode character in the range U+D800 - U+DFFF are for
525	* surrogate pairs, they shouldn't occur in filenames. */
526	return 0;
527	}
528	}




















529	}else if( c=='\\' ){
530	return 0;
531	}
532	if( c=='/' ){
533	if( z[i+1]=='/' ) return 0;
	@@ -578,11 +602,11 @@
578	if( z[i]=='\\' ) z[i] = '/';
579	}
580	#endif
581
582	/* Removing trailing "/" characters */
583	if ( !slash ){
584	while( n>1 && z[n-1]=='/' ){ n--; }
585	}
586
587	/* Remove duplicate '/' characters. Except, two // at the beginning
588	** of a pathname is allowed since this is important on windows. */
	@@ -835,11 +859,11 @@
835	if( zPwd[i]==0 ){
836	blob_append(pOut, ".", 1);
837	}else{
838	blob_append(pOut, "..", 2);
839	for(j=i+1; zPwd[j]; j++){
840	if( zPwd[j]=='/' ) {
841	blob_append(pOut, "/..", 3);
842	}
843	}
844	}
845	return;
	@@ -852,11 +876,11 @@
852	return;
853	}
854	while( zPath[i-1]!='/' ){ i--; }
855	blob_set(&tmp, "../");
856	for(j=i; zPwd[j]; j++){
857	if( zPwd[j]=='/' ) {
858	blob_append(&tmp, "../", 3);
859	}
860	}
861	blob_append(&tmp, &zPath[i], -1);
862	blob_reset(pOut);
863

	--- src/file.c
	+++ src/file.c
	@@ -490,44 +490,68 @@
490	** * Does not contain two or more "/" characters in a row.
491	** * Contains at least one character
492	**
493	** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494	** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495	** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
496	** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters)
497	**
498	** The bStrictUtf8 flag is true for new inputs, but is false when parsing
499	** legacy manifests, for backwards compatibility.
500	*/
501	int file_is_simple_pathname(const char *z, int bStrictUtf8){
502	int i;
503	unsigned char c = (unsigned char) z[0];
504	char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
505	if( c=='/' \|\| c==0 ) return 0;
506	if( c=='.' ){
507	if( z[1]=='/' \|\| z[1]==0 ) return 0;
508	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
509	}
510	for(i=0; (c=(unsigned char)z[i])!=0; i++){
511	if( c & maskNonAscii ){
512	if( c<0xc2 ){
513	/* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
514	return 0;
515	}else if( (c&0xe0)==0xe0 ){
516	/* 3-byte or more */
517	int unicode;
518	if( c&0x10 ){
519	/* Unicode characters > U+FFFF are not supported.
520	* Windows XP and earlier cannot handle them.
521	*/
522	return 0;
523	}
524	/* This is a 3-byte UTF-8 character */
525	unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f);
526	if( unicode <= 0x07ff ){
527	/* overlong form */
528	return 0;
529	}else if( unicode>=0xe000 ){
530	/* U+E000..U+FFFF */
531	if( (unicode<=0xf8ff) \|\| (unicode>=0xfffe) ){
532	/* U+E000..U+F8FF are for private use.
533	* U+FFFE..U+FFFF are noncharacters. */
534	return 0;
535	} else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
536	/* U+FDD0..U+FDEF are noncharacters. */
537	return 0;
538	}
539	}else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){
540	/* U+D800..U+DFFF are for surrogate pairs. */
541	return 0;
542	}
543	}
544	do{
545	if( (z[i+1]&0xc0)!=0x80 ){
546	/* Invalid continuation byte (multi-byte UTF-8) */
547	return 0;
548	}
549	/* The hi-bits of c are used to keep track of the number of expected
550	* continuation-bytes, so we don't need a separate counter. */
551	c<<=1; ++i;
552	}while( c>=0xc0 );
553	}else if( c=='\\' ){
554	return 0;
555	}
556	if( c=='/' ){
557	if( z[i+1]=='/' ) return 0;
	@@ -578,11 +602,11 @@
602	if( z[i]=='\\' ) z[i] = '/';
603	}
604	#endif
605
606	/* Removing trailing "/" characters */
607	if( !slash ){
608	while( n>1 && z[n-1]=='/' ){ n--; }
609	}
610
611	/* Remove duplicate '/' characters. Except, two // at the beginning
612	** of a pathname is allowed since this is important on windows. */
	@@ -835,11 +859,11 @@
859	if( zPwd[i]==0 ){
860	blob_append(pOut, ".", 1);
861	}else{
862	blob_append(pOut, "..", 2);
863	for(j=i+1; zPwd[j]; j++){
864	if( zPwd[j]=='/' ){
865	blob_append(pOut, "/..", 3);
866	}
867	}
868	}
869	return;
	@@ -852,11 +876,11 @@
876	return;
877	}
878	while( zPath[i-1]!='/' ){ i--; }
879	blob_set(&tmp, "../");
880	for(j=i; zPwd[j]; j++){
881	if( zPwd[j]=='/' ){
882	blob_append(&tmp, "../", 3);
883	}
884	}
885	blob_append(&tmp, &zPath[i], -1);
886	blob_reset(pOut);
887

Fossil SCM

Keyboard Shortcuts