Fossil SCM

Experimental fix for issue [d17d6e5b17]. <p>Should have a LOT more testing before merging it to trunk, because it is dangerous! <p>The method used is as described at: <br> [http://cygwin.com/cygwin-ug-net/using-specialnames.html] The only problematic characters left are ':' and '\', all other problematic characters are handled by translating them to characters in the range U+F000 to U+F0FF <p>Feedback welcome.

jan.nijtmans 2012-11-20 13:46 trunk

Commit 82ce90f91c6e50ab624cc08e49c7e2195dc2f0ea

Parent 3c1ad1def951372…

1 file changed +43 -5

~ src/file.c

M src/file.c

+43 -5

		--- src/file.c
		+++ src/file.c
		@@ -481,11 +481,11 @@
481	481	** a file in a repository. Valid filenames follow all of the
482	482	** following rules:
483	483	**
484	484	** * Does not begin with "/"
485	485	** * Does not contain any path element named "." or ".."
486		-** * Does not contain any of these characters in the path: "\*[]?"
	486	+** * Does not contain any of these characters in the path: "\:"
487	487	** * Does not end with "/".
488	488	** * Does not contain two or more "/" characters in a row.
489	489	** * Contains at least one character
490	490	*/
491	491	int file_is_simple_pathname(const char *z){
		@@ -495,11 +495,11 @@
495	495	if( c=='.' ){
496	496	if( z[1]=='/' \|\| z[1]==0 ) return 0;
497	497	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
498	498	}
499	499	for(i=0; (c=z[i])!=0; i++){
500		- if( c=='\\' \|\| c=='*' \|\| c=='[' \|\| c==']' \|\| c=='?' ){
	500	+ if( c=='\\' \|\| c==':' ){
501	501	return 0;
502	502	}
503	503	if( c=='/' ){
504	504	if( z[i+1]=='/' ) return 0;
505	505	if( z[i+1]=='.' ){
		@@ -1089,15 +1089,33 @@
1089	1089
1090	1090	/*
1091	1091	** Translate Unicode to UTF8. Return a pointer to the translated text.
1092	1092	** Call fossil_mbcs_free() to deallocate any memory used to store the
1093	1093	** returned pointer when done.
	1094	+**
	1095	+** On Windows, characters in the range U+FF01 to U+FF7F (private use area)
	1096	+** are translated in ASCII characters in the range U+0001 - U+007F. The
	1097	+** only place they can come from are filenames using Cygwin's trick
	1098	+** to circumvent invalid characters in filenames.
	1099	+** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
	1100	+** This way, fossil will work nicely together with the cygwin shell
	1101	+** handling those filenames. On other shells, the generated filename
	1102	+** might not be as expected, but apart from that nothing goes wrong.
1094	1103	*/
1095		-char fossil_unicode_to_utf8(const void zUnicode){
	1104	+char fossil_unicode_to_utf8(void zUnicode){
1096	1105	#ifdef _WIN32
1097		- int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
1098		- char *zUtf = sqlite3_malloc( nByte );
	1106	+ int nByte = 0;
	1107	+ char *zUtf;
	1108	+ WCHAR *wUnicode = zUnicode;
	1109	+ while( *wUnicode != 0 ){
	1110	+ if ( (wUnicode > 0xF000) && (wUnicode <= 0xF07F) ){
	1111	+ *wUnicode &= 0x7F;
	1112	+ }
	1113	+ ++wUnicode;
	1114	+ }
	1115	+ nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
	1116	+ zUtf = sqlite3_malloc( nByte );
1099	1117	if( zUtf==0 ){
1100	1118	return 0;
1101	1119	}
1102	1120	WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
1103	1121	return zUtf;
		@@ -1122,19 +1140,39 @@
1122	1140
1123	1141	/*
1124	1142	** Translate UTF8 to unicode for use in system calls. Return a pointer to the
1125	1143	** translated text.. Call fossil_mbcs_free() to deallocate any memory
1126	1144	** used to store the returned pointer when done.
	1145	+**
	1146	+** On Windows, characters in the range U+001 to U+0031 and the
	1147	+** characters '"', '*', ':', '<', '>', '?', '\|' and '\\' are invalid
	1148	+** to be used. Therefore, translated those to characters in the
	1149	+** (private use area), in the range U+0001 - U+007F, so those
	1150	+** characters never arrive in any Windows API. The filenames might
	1151	+** look strange in Windows explorer, but in the cygwin shell
	1152	+** everything looks as expected.
	1153	+**
	1154	+** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
	1155	+**
1127	1156	*/
1128	1157	void fossil_utf8_to_unicode(const char zUtf8){
1129	1158	#ifdef _WIN32
1130	1159	int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
1131	1160	wchar_t zUnicode = sqlite3_malloc( nByte 2 );
	1161	+ wchar_t *wUnicode;
1132	1162	if( zUnicode==0 ){
1133	1163	return 0;
1134	1164	}
1135	1165	MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
	1166	+ wUnicode = zUnicode;
	1167	+ while( --nByte > 0){
	1168	+ if ( (wUnicode < 32) \|\| wcschr(L"\"<>?\|", *wUnicode) ){
	1169	+ *wUnicode \|= 0xF000;
	1170	+ }
	1171	+ ++wUnicode;
	1172	+ }
	1173	+
1136	1174	return zUnicode;
1137	1175	#else
1138	1176	return (void )zUtf8; / No-op on unix */
1139	1177	#endif
1140	1178	}
1141	1179

	--- src/file.c
	+++ src/file.c
	@@ -481,11 +481,11 @@
481	** a file in a repository. Valid filenames follow all of the
482	** following rules:
483	**
484	** * Does not begin with "/"
485	** * Does not contain any path element named "." or ".."
486	** * Does not contain any of these characters in the path: "\*[]?"
487	** * Does not end with "/".
488	** * Does not contain two or more "/" characters in a row.
489	** * Contains at least one character
490	*/
491	int file_is_simple_pathname(const char *z){
	@@ -495,11 +495,11 @@
495	if( c=='.' ){
496	if( z[1]=='/' \|\| z[1]==0 ) return 0;
497	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
498	}
499	for(i=0; (c=z[i])!=0; i++){
500	if( c=='\\' \|\| c=='*' \|\| c=='[' \|\| c==']' \|\| c=='?' ){
501	return 0;
502	}
503	if( c=='/' ){
504	if( z[i+1]=='/' ) return 0;
505	if( z[i+1]=='.' ){
	@@ -1089,15 +1089,33 @@
1089
1090	/*
1091	** Translate Unicode to UTF8. Return a pointer to the translated text.
1092	** Call fossil_mbcs_free() to deallocate any memory used to store the
1093	** returned pointer when done.









1094	*/
1095	char fossil_unicode_to_utf8(const void zUnicode){
1096	#ifdef _WIN32
1097	int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
1098	char *zUtf = sqlite3_malloc( nByte );









1099	if( zUtf==0 ){
1100	return 0;
1101	}
1102	WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
1103	return zUtf;
	@@ -1122,19 +1140,39 @@
1122
1123	/*
1124	** Translate UTF8 to unicode for use in system calls. Return a pointer to the
1125	** translated text.. Call fossil_mbcs_free() to deallocate any memory
1126	** used to store the returned pointer when done.











1127	*/
1128	void fossil_utf8_to_unicode(const char zUtf8){
1129	#ifdef _WIN32
1130	int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
1131	wchar_t zUnicode = sqlite3_malloc( nByte 2 );

1132	if( zUnicode==0 ){
1133	return 0;
1134	}
1135	MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);








1136	return zUnicode;
1137	#else
1138	return (void )zUtf8; / No-op on unix */
1139	#endif
1140	}
1141

	--- src/file.c
	+++ src/file.c
	@@ -481,11 +481,11 @@
481	** a file in a repository. Valid filenames follow all of the
482	** following rules:
483	**
484	** * Does not begin with "/"
485	** * Does not contain any path element named "." or ".."
486	** * Does not contain any of these characters in the path: "\:"
487	** * Does not end with "/".
488	** * Does not contain two or more "/" characters in a row.
489	** * Contains at least one character
490	*/
491	int file_is_simple_pathname(const char *z){
	@@ -495,11 +495,11 @@
495	if( c=='.' ){
496	if( z[1]=='/' \|\| z[1]==0 ) return 0;
497	if( z[1]=='.' && (z[2]=='/' \|\| z[2]==0) ) return 0;
498	}
499	for(i=0; (c=z[i])!=0; i++){
500	if( c=='\\' \|\| c==':' ){
501	return 0;
502	}
503	if( c=='/' ){
504	if( z[i+1]=='/' ) return 0;
505	if( z[i+1]=='.' ){
	@@ -1089,15 +1089,33 @@
1089
1090	/*
1091	** Translate Unicode to UTF8. Return a pointer to the translated text.
1092	** Call fossil_mbcs_free() to deallocate any memory used to store the
1093	** returned pointer when done.
1094	**
1095	** On Windows, characters in the range U+FF01 to U+FF7F (private use area)
1096	** are translated in ASCII characters in the range U+0001 - U+007F. The
1097	** only place they can come from are filenames using Cygwin's trick
1098	** to circumvent invalid characters in filenames.
1099	** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
1100	** This way, fossil will work nicely together with the cygwin shell
1101	** handling those filenames. On other shells, the generated filename
1102	** might not be as expected, but apart from that nothing goes wrong.
1103	*/
1104	char fossil_unicode_to_utf8(void zUnicode){
1105	#ifdef _WIN32
1106	int nByte = 0;
1107	char *zUtf;
1108	WCHAR *wUnicode = zUnicode;
1109	while( *wUnicode != 0 ){
1110	if ( (wUnicode > 0xF000) && (wUnicode <= 0xF07F) ){
1111	*wUnicode &= 0x7F;
1112	}
1113	++wUnicode;
1114	}
1115	nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
1116	zUtf = sqlite3_malloc( nByte );
1117	if( zUtf==0 ){
1118	return 0;
1119	}
1120	WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
1121	return zUtf;
	@@ -1122,19 +1140,39 @@
1140
1141	/*
1142	** Translate UTF8 to unicode for use in system calls. Return a pointer to the
1143	** translated text.. Call fossil_mbcs_free() to deallocate any memory
1144	** used to store the returned pointer when done.
1145	**
1146	** On Windows, characters in the range U+001 to U+0031 and the
1147	** characters '"', '*', ':', '<', '>', '?', '\|' and '\\' are invalid
1148	** to be used. Therefore, translated those to characters in the
1149	** (private use area), in the range U+0001 - U+007F, so those
1150	** characters never arrive in any Windows API. The filenames might
1151	** look strange in Windows explorer, but in the cygwin shell
1152	** everything looks as expected.
1153	**
1154	** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
1155	**
1156	*/
1157	void fossil_utf8_to_unicode(const char zUtf8){
1158	#ifdef _WIN32
1159	int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
1160	wchar_t zUnicode = sqlite3_malloc( nByte 2 );
1161	wchar_t *wUnicode;
1162	if( zUnicode==0 ){
1163	return 0;
1164	}
1165	MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
1166	wUnicode = zUnicode;
1167	while( --nByte > 0){
1168	if ( (wUnicode < 32) \|\| wcschr(L"\"<>?\|", *wUnicode) ){
1169	*wUnicode \|= 0xF000;
1170	}
1171	++wUnicode;
1172	}
1173
1174	return zUnicode;
1175	#else
1176	return (void )zUtf8; / No-op on unix */
1177	#endif
1178	}
1179

Fossil SCM

Keyboard Shortcuts