Fossil SCM

Experimental fix for issue [d17d6e5b17]. <p>Should have a LOT more testing before merging it to trunk, because it is dangerous! <p>The method used is as described at: <br> [http://cygwin.com/cygwin-ug-net/using-specialnames.html] The only problematic characters left are ':' and '\', all other problematic characters are handled by translating them to characters in the range U+F000 to U+F0FF <p>Feedback welcome.

jan.nijtmans 2012-11-20 13:46 trunk
Commit 82ce90f91c6e50ab624cc08e49c7e2195dc2f0ea
1 file changed +43 -5
+43 -5
--- src/file.c
+++ src/file.c
@@ -481,11 +481,11 @@
481481
** a file in a repository. Valid filenames follow all of the
482482
** following rules:
483483
**
484484
** * Does not begin with "/"
485485
** * Does not contain any path element named "." or ".."
486
-** * Does not contain any of these characters in the path: "\*[]?"
486
+** * Does not contain any of these characters in the path: "\:"
487487
** * Does not end with "/".
488488
** * Does not contain two or more "/" characters in a row.
489489
** * Contains at least one character
490490
*/
491491
int file_is_simple_pathname(const char *z){
@@ -495,11 +495,11 @@
495495
if( c=='.' ){
496496
if( z[1]=='/' || z[1]==0 ) return 0;
497497
if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
498498
}
499499
for(i=0; (c=z[i])!=0; i++){
500
- if( c=='\\' || c=='*' || c=='[' || c==']' || c=='?' ){
500
+ if( c=='\\' || c==':' ){
501501
return 0;
502502
}
503503
if( c=='/' ){
504504
if( z[i+1]=='/' ) return 0;
505505
if( z[i+1]=='.' ){
@@ -1089,15 +1089,33 @@
10891089
10901090
/*
10911091
** Translate Unicode to UTF8. Return a pointer to the translated text.
10921092
** Call fossil_mbcs_free() to deallocate any memory used to store the
10931093
** returned pointer when done.
1094
+**
1095
+** On Windows, characters in the range U+FF01 to U+FF7F (private use area)
1096
+** are translated in ASCII characters in the range U+0001 - U+007F. The
1097
+** only place they can come from are filenames using Cygwin's trick
1098
+** to circumvent invalid characters in filenames.
1099
+** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
1100
+** This way, fossil will work nicely together with the cygwin shell
1101
+** handling those filenames. On other shells, the generated filename
1102
+** might not be as expected, but apart from that nothing goes wrong.
10941103
*/
1095
-char *fossil_unicode_to_utf8(const void *zUnicode){
1104
+char *fossil_unicode_to_utf8(void *zUnicode){
10961105
#ifdef _WIN32
1097
- int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
1098
- char *zUtf = sqlite3_malloc( nByte );
1106
+ int nByte = 0;
1107
+ char *zUtf;
1108
+ WCHAR *wUnicode = zUnicode;
1109
+ while( *wUnicode != 0 ){
1110
+ if ( (*wUnicode > 0xF000) && (*wUnicode <= 0xF07F) ){
1111
+ *wUnicode &= 0x7F;
1112
+ }
1113
+ ++wUnicode;
1114
+ }
1115
+ nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
1116
+ zUtf = sqlite3_malloc( nByte );
10991117
if( zUtf==0 ){
11001118
return 0;
11011119
}
11021120
WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
11031121
return zUtf;
@@ -1122,19 +1140,39 @@
11221140
11231141
/*
11241142
** Translate UTF8 to unicode for use in system calls. Return a pointer to the
11251143
** translated text.. Call fossil_mbcs_free() to deallocate any memory
11261144
** used to store the returned pointer when done.
1145
+**
1146
+** On Windows, characters in the range U+001 to U+0031 and the
1147
+** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid
1148
+** to be used. Therefore, translated those to characters in the
1149
+** (private use area), in the range U+0001 - U+007F, so those
1150
+** characters never arrive in any Windows API. The filenames might
1151
+** look strange in Windows explorer, but in the cygwin shell
1152
+** everything looks as expected.
1153
+**
1154
+** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
1155
+**
11271156
*/
11281157
void *fossil_utf8_to_unicode(const char *zUtf8){
11291158
#ifdef _WIN32
11301159
int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
11311160
wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );
1161
+ wchar_t *wUnicode;
11321162
if( zUnicode==0 ){
11331163
return 0;
11341164
}
11351165
MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
1166
+ wUnicode = zUnicode;
1167
+ while( --nByte > 0){
1168
+ if ( (*wUnicode < 32) || wcschr(L"\"*<>?|", *wUnicode) ){
1169
+ *wUnicode |= 0xF000;
1170
+ }
1171
+ ++wUnicode;
1172
+ }
1173
+
11361174
return zUnicode;
11371175
#else
11381176
return (void *)zUtf8; /* No-op on unix */
11391177
#endif
11401178
}
11411179
--- src/file.c
+++ src/file.c
@@ -481,11 +481,11 @@
481 ** a file in a repository. Valid filenames follow all of the
482 ** following rules:
483 **
484 ** * Does not begin with "/"
485 ** * Does not contain any path element named "." or ".."
486 ** * Does not contain any of these characters in the path: "\*[]?"
487 ** * Does not end with "/".
488 ** * Does not contain two or more "/" characters in a row.
489 ** * Contains at least one character
490 */
491 int file_is_simple_pathname(const char *z){
@@ -495,11 +495,11 @@
495 if( c=='.' ){
496 if( z[1]=='/' || z[1]==0 ) return 0;
497 if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
498 }
499 for(i=0; (c=z[i])!=0; i++){
500 if( c=='\\' || c=='*' || c=='[' || c==']' || c=='?' ){
501 return 0;
502 }
503 if( c=='/' ){
504 if( z[i+1]=='/' ) return 0;
505 if( z[i+1]=='.' ){
@@ -1089,15 +1089,33 @@
1089
1090 /*
1091 ** Translate Unicode to UTF8. Return a pointer to the translated text.
1092 ** Call fossil_mbcs_free() to deallocate any memory used to store the
1093 ** returned pointer when done.
 
 
 
 
 
 
 
 
 
1094 */
1095 char *fossil_unicode_to_utf8(const void *zUnicode){
1096 #ifdef _WIN32
1097 int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
1098 char *zUtf = sqlite3_malloc( nByte );
 
 
 
 
 
 
 
 
 
1099 if( zUtf==0 ){
1100 return 0;
1101 }
1102 WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
1103 return zUtf;
@@ -1122,19 +1140,39 @@
1122
1123 /*
1124 ** Translate UTF8 to unicode for use in system calls. Return a pointer to the
1125 ** translated text.. Call fossil_mbcs_free() to deallocate any memory
1126 ** used to store the returned pointer when done.
 
 
 
 
 
 
 
 
 
 
 
1127 */
1128 void *fossil_utf8_to_unicode(const char *zUtf8){
1129 #ifdef _WIN32
1130 int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
1131 wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );
 
1132 if( zUnicode==0 ){
1133 return 0;
1134 }
1135 MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
 
 
 
 
 
 
 
 
1136 return zUnicode;
1137 #else
1138 return (void *)zUtf8; /* No-op on unix */
1139 #endif
1140 }
1141
--- src/file.c
+++ src/file.c
@@ -481,11 +481,11 @@
481 ** a file in a repository. Valid filenames follow all of the
482 ** following rules:
483 **
484 ** * Does not begin with "/"
485 ** * Does not contain any path element named "." or ".."
486 ** * Does not contain any of these characters in the path: "\:"
487 ** * Does not end with "/".
488 ** * Does not contain two or more "/" characters in a row.
489 ** * Contains at least one character
490 */
491 int file_is_simple_pathname(const char *z){
@@ -495,11 +495,11 @@
495 if( c=='.' ){
496 if( z[1]=='/' || z[1]==0 ) return 0;
497 if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
498 }
499 for(i=0; (c=z[i])!=0; i++){
500 if( c=='\\' || c==':' ){
501 return 0;
502 }
503 if( c=='/' ){
504 if( z[i+1]=='/' ) return 0;
505 if( z[i+1]=='.' ){
@@ -1089,15 +1089,33 @@
1089
1090 /*
1091 ** Translate Unicode to UTF8. Return a pointer to the translated text.
1092 ** Call fossil_mbcs_free() to deallocate any memory used to store the
1093 ** returned pointer when done.
1094 **
1095 ** On Windows, characters in the range U+FF01 to U+FF7F (private use area)
1096 ** are translated in ASCII characters in the range U+0001 - U+007F. The
1097 ** only place they can come from are filenames using Cygwin's trick
1098 ** to circumvent invalid characters in filenames.
1099 ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
1100 ** This way, fossil will work nicely together with the cygwin shell
1101 ** handling those filenames. On other shells, the generated filename
1102 ** might not be as expected, but apart from that nothing goes wrong.
1103 */
1104 char *fossil_unicode_to_utf8(void *zUnicode){
1105 #ifdef _WIN32
1106 int nByte = 0;
1107 char *zUtf;
1108 WCHAR *wUnicode = zUnicode;
1109 while( *wUnicode != 0 ){
1110 if ( (*wUnicode > 0xF000) && (*wUnicode <= 0xF07F) ){
1111 *wUnicode &= 0x7F;
1112 }
1113 ++wUnicode;
1114 }
1115 nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
1116 zUtf = sqlite3_malloc( nByte );
1117 if( zUtf==0 ){
1118 return 0;
1119 }
1120 WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
1121 return zUtf;
@@ -1122,19 +1140,39 @@
1140
1141 /*
1142 ** Translate UTF8 to unicode for use in system calls. Return a pointer to the
1143 ** translated text.. Call fossil_mbcs_free() to deallocate any memory
1144 ** used to store the returned pointer when done.
1145 **
1146 ** On Windows, characters in the range U+001 to U+0031 and the
1147 ** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid
1148 ** to be used. Therefore, translated those to characters in the
1149 ** (private use area), in the range U+0001 - U+007F, so those
1150 ** characters never arrive in any Windows API. The filenames might
1151 ** look strange in Windows explorer, but in the cygwin shell
1152 ** everything looks as expected.
1153 **
1154 ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
1155 **
1156 */
1157 void *fossil_utf8_to_unicode(const char *zUtf8){
1158 #ifdef _WIN32
1159 int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
1160 wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );
1161 wchar_t *wUnicode;
1162 if( zUnicode==0 ){
1163 return 0;
1164 }
1165 MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
1166 wUnicode = zUnicode;
1167 while( --nByte > 0){
1168 if ( (*wUnicode < 32) || wcschr(L"\"*<>?|", *wUnicode) ){
1169 *wUnicode |= 0xF000;
1170 }
1171 ++wUnicode;
1172 }
1173
1174 return zUnicode;
1175 #else
1176 return (void *)zUtf8; /* No-op on unix */
1177 #endif
1178 }
1179

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button