| | @@ -70,11 +70,11 @@ |
| 70 | 70 | }else{ |
| 71 | 71 | return stat(zFilename, buf); |
| 72 | 72 | } |
| 73 | 73 | #else |
| 74 | 74 | int rc = 0; |
| 75 | | - wchar_t *zMbcs = fossil_utf8_to_unicode(zFilename); |
| 75 | + wchar_t *zMbcs = fossil_utf8_to_filename(zFilename); |
| 76 | 76 | rc = _wstati64(zMbcs, buf); |
| 77 | 77 | fossil_mbcs_free(zMbcs); |
| 78 | 78 | return rc; |
| 79 | 79 | #endif |
| 80 | 80 | } |
| | @@ -301,11 +301,11 @@ |
| 301 | 301 | /* |
| 302 | 302 | ** Wrapper around the access() system call. |
| 303 | 303 | */ |
| 304 | 304 | int file_access(const char *zFilename, int flags){ |
| 305 | 305 | #ifdef _WIN32 |
| 306 | | - wchar_t *zMbcs = fossil_utf8_to_unicode(zFilename); |
| 306 | + wchar_t *zMbcs = fossil_utf8_to_filename(zFilename); |
| 307 | 307 | int rc = _waccess(zMbcs, flags); |
| 308 | 308 | fossil_mbcs_free(zMbcs); |
| 309 | 309 | #else |
| 310 | 310 | int rc = access(zFilename, flags); |
| 311 | 311 | #endif |
| | @@ -403,11 +403,11 @@ |
| 403 | 403 | tv[0].tv_sec = newMTime; |
| 404 | 404 | tv[1].tv_sec = newMTime; |
| 405 | 405 | utimes(zFilename, tv); |
| 406 | 406 | #else |
| 407 | 407 | struct _utimbuf tb; |
| 408 | | - wchar_t *zMbcs = fossil_utf8_to_unicode(zFilename); |
| 408 | + wchar_t *zMbcs = fossil_utf8_to_filename(zFilename); |
| 409 | 409 | tb.actime = newMTime; |
| 410 | 410 | tb.modtime = newMTime; |
| 411 | 411 | _wutime(zMbcs, &tb); |
| 412 | 412 | fossil_mbcs_free(zMbcs); |
| 413 | 413 | #endif |
| | @@ -439,11 +439,11 @@ |
| 439 | 439 | /* |
| 440 | 440 | ** Delete a file. |
| 441 | 441 | */ |
| 442 | 442 | void file_delete(const char *zFilename){ |
| 443 | 443 | #ifdef _WIN32 |
| 444 | | - wchar_t *z = fossil_utf8_to_unicode(zFilename); |
| 444 | + wchar_t *z = fossil_utf8_to_filename(zFilename); |
| 445 | 445 | _wunlink(z); |
| 446 | 446 | fossil_mbcs_free(z); |
| 447 | 447 | #else |
| 448 | 448 | unlink(zFilename); |
| 449 | 449 | #endif |
| | @@ -463,11 +463,11 @@ |
| 463 | 463 | file_delete(zName); |
| 464 | 464 | } |
| 465 | 465 | if( rc!=1 ){ |
| 466 | 466 | #if defined(_WIN32) |
| 467 | 467 | int rc; |
| 468 | | - wchar_t *zMbcs = fossil_utf8_to_unicode(zName); |
| 468 | + wchar_t *zMbcs = fossil_utf8_to_filename(zName); |
| 469 | 469 | rc = _wmkdir(zMbcs); |
| 470 | 470 | fossil_mbcs_free(zMbcs); |
| 471 | 471 | return rc; |
| 472 | 472 | #else |
| 473 | 473 | return mkdir(zName, 0755); |
| | @@ -481,11 +481,11 @@ |
| 481 | 481 | ** a file in a repository. Valid filenames follow all of the |
| 482 | 482 | ** following rules: |
| 483 | 483 | ** |
| 484 | 484 | ** * Does not begin with "/" |
| 485 | 485 | ** * Does not contain any path element named "." or ".." |
| 486 | | -** * Does not contain any of these characters in the path: "\:" |
| 486 | +** * Does not contain any of these characters in the path: "\*[]?" |
| 487 | 487 | ** * Does not end with "/". |
| 488 | 488 | ** * Does not contain two or more "/" characters in a row. |
| 489 | 489 | ** * Contains at least one character |
| 490 | 490 | */ |
| 491 | 491 | int file_is_simple_pathname(const char *z){ |
| | @@ -495,11 +495,11 @@ |
| 495 | 495 | if( c=='.' ){ |
| 496 | 496 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 497 | 497 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 498 | 498 | } |
| 499 | 499 | for(i=0; (c=z[i])!=0; i++){ |
| 500 | | - if( c=='\\' || c==':' ){ |
| 500 | + if( c=='\\' || c=='*' || c=='[' || c==']' || c=='?' ){ |
| 501 | 501 | return 0; |
| 502 | 502 | } |
| 503 | 503 | if( c=='/' ){ |
| 504 | 504 | if( z[i+1]=='/' ) return 0; |
| 505 | 505 | if( z[i+1]=='.' ){ |
| | @@ -1089,38 +1089,53 @@ |
| 1089 | 1089 | |
| 1090 | 1090 | /* |
| 1091 | 1091 | ** Translate Unicode to UTF8. Return a pointer to the translated text. |
| 1092 | 1092 | ** Call fossil_mbcs_free() to deallocate any memory used to store the |
| 1093 | 1093 | ** returned pointer when done. |
| 1094 | +*/ |
| 1095 | +char *fossil_unicode_to_utf8(const void *zUnicode){ |
| 1096 | +#ifdef _WIN32 |
| 1097 | + int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0); |
| 1098 | + char *zUtf = sqlite3_malloc( nByte ); |
| 1099 | + if( zUtf==0 ){ |
| 1100 | + return 0; |
| 1101 | + } |
| 1102 | + WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0); |
| 1103 | + return zUtf; |
| 1104 | +#else |
| 1105 | + return (char *)zUnicode; /* No-op on unix */ |
| 1106 | +#endif |
| 1107 | +} |
| 1108 | + |
| 1109 | +/* |
| 1110 | +** Translate Unicode (filename) to UTF8. Return a pointer to the |
| 1111 | +** translated text. Call fossil_mbcs_free() to deallocate any |
| 1112 | +** memory used to store the returned pointer when done. |
| 1094 | 1113 | ** |
| 1095 | | -** On Windows, characters in the range U+FF01 to U+FF7F (private use area) |
| 1114 | +** On Windows, characters in the range U+F001 to U+F07F (private use area) |
| 1096 | 1115 | ** are translated in ASCII characters in the range U+0001 - U+007F. The |
| 1097 | 1116 | ** only place they can come from are filenames using Cygwin's trick |
| 1098 | 1117 | ** to circumvent invalid characters in filenames. |
| 1099 | 1118 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| 1100 | 1119 | ** This way, fossil will work nicely together with the cygwin shell |
| 1101 | 1120 | ** handling those filenames. On other shells, the generated filename |
| 1102 | 1121 | ** might not be as expected, but apart from that nothing goes wrong. |
| 1103 | 1122 | */ |
| 1104 | | -char *fossil_unicode_to_utf8(void *zUnicode){ |
| 1123 | +char *fossil_filename_to_utf8(void *zUnicode){ |
| 1105 | 1124 | #ifdef _WIN32 |
| 1106 | | - int nByte = 0; |
| 1107 | | - char *zUtf; |
| 1108 | 1125 | WCHAR *wUnicode = zUnicode; |
| 1109 | 1126 | while( *wUnicode != 0 ){ |
| 1110 | | - if ( (*wUnicode > 0xF000) && (*wUnicode <= 0xF07F) ){ |
| 1111 | | - *wUnicode &= 0x7F; |
| 1127 | + if ( (*wUnicode & 0xFF80) == 0xF000 ){ |
| 1128 | + WCHAR converted = (*wUnicode & 0x7F); |
| 1129 | + /* Only really convert it when the resulting char is in the given range*/ |
| 1130 | + if ( (converted < 32) || wcschr(L"\"*<>?|:", converted) ){ |
| 1131 | + *wUnicode = converted; |
| 1132 | + } |
| 1112 | 1133 | } |
| 1113 | 1134 | ++wUnicode; |
| 1114 | 1135 | } |
| 1115 | | - nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0); |
| 1116 | | - zUtf = sqlite3_malloc( nByte ); |
| 1117 | | - if( zUtf==0 ){ |
| 1118 | | - return 0; |
| 1119 | | - } |
| 1120 | | - WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0); |
| 1121 | | - return zUtf; |
| 1136 | + return fossil_unicode_to_utf8(zUnicode); |
| 1122 | 1137 | #else |
| 1123 | 1138 | return (char *)zUnicode; /* No-op on unix */ |
| 1124 | 1139 | #endif |
| 1125 | 1140 | } |
| 1126 | 1141 | |
| | @@ -1140,34 +1155,52 @@ |
| 1140 | 1155 | |
| 1141 | 1156 | /* |
| 1142 | 1157 | ** Translate UTF8 to unicode for use in system calls. Return a pointer to the |
| 1143 | 1158 | ** translated text.. Call fossil_mbcs_free() to deallocate any memory |
| 1144 | 1159 | ** used to store the returned pointer when done. |
| 1160 | +*/ |
| 1161 | +void *fossil_utf8_to_unicode(const char *zUtf8){ |
| 1162 | +#ifdef _WIN32 |
| 1163 | + int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0); |
| 1164 | + wchar_t *zUnicode = sqlite3_malloc( nByte * 2 ); |
| 1165 | + if( zUnicode==0 ){ |
| 1166 | + return 0; |
| 1167 | + } |
| 1168 | + MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte); |
| 1169 | + |
| 1170 | + return zUnicode; |
| 1171 | +#else |
| 1172 | + return (void *)zUtf8; /* No-op on unix */ |
| 1173 | +#endif |
| 1174 | +} |
| 1175 | + |
| 1176 | +/* |
| 1177 | +** Translate UTF8 to unicode for use in filename translations. |
| 1178 | +** Return a pointer to the translated text.. Call fossil_mbcs_free() |
| 1179 | +** to deallocate any memory used to store the returned pointer when done. |
| 1145 | 1180 | ** |
| 1146 | | -** On Windows, characters in the range U+001 to U+0031 and the |
| 1181 | +** On Windows, characters in the range U+0001 to U+0031 and the |
| 1147 | 1182 | ** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid |
| 1148 | 1183 | ** to be used. Therefore, translated those to characters in the |
| 1149 | | -** (private use area), in the range U+0001 - U+007F, so those |
| 1184 | +** (private use area), in the range U+F001 - U+F07F, so those |
| 1150 | 1185 | ** characters never arrive in any Windows API. The filenames might |
| 1151 | 1186 | ** look strange in Windows explorer, but in the cygwin shell |
| 1152 | 1187 | ** everything looks as expected. |
| 1153 | 1188 | ** |
| 1154 | 1189 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| 1155 | 1190 | ** |
| 1156 | 1191 | */ |
| 1157 | | -void *fossil_utf8_to_unicode(const char *zUtf8){ |
| 1158 | | -#ifdef _WIN32 |
| 1159 | | - int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0); |
| 1160 | | - wchar_t *zUnicode = sqlite3_malloc( nByte * 2 ); |
| 1161 | | - wchar_t *wUnicode; |
| 1162 | | - if( zUnicode==0 ){ |
| 1163 | | - return 0; |
| 1164 | | - } |
| 1165 | | - MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte); |
| 1166 | | - wUnicode = zUnicode; |
| 1167 | | - while( --nByte > 0){ |
| 1168 | | - if ( (*wUnicode < 32) || wcschr(L"\"*<>?|", *wUnicode) ){ |
| 1192 | +void *fossil_utf8_to_filename(const char *zUtf8){ |
| 1193 | +#ifdef _WIN32 |
| 1194 | + WCHAR *zUnicode = fossil_utf8_to_unicode(zUtf8); |
| 1195 | + WCHAR *wUnicode = zUnicode; |
| 1196 | + /* If path starts with "<drive>:/" or "<drive>:\", don't translate the ':' */ |
| 1197 | + if ( file_is_absolute_path(zUtf8) ){ |
| 1198 | + wUnicode += 3; |
| 1199 | + } |
| 1200 | + while( *wUnicode != '\0' ){ |
| 1201 | + if ( (*wUnicode < 32) || wcschr(L"\"*<>?|:", *wUnicode) ){ |
| 1169 | 1202 | *wUnicode |= 0xF000; |
| 1170 | 1203 | } |
| 1171 | 1204 | ++wUnicode; |
| 1172 | 1205 | } |
| 1173 | 1206 | |
| | @@ -1248,14 +1281,14 @@ |
| 1248 | 1281 | ** Like fopen() but always takes a UTF8 argument. |
| 1249 | 1282 | */ |
| 1250 | 1283 | FILE *fossil_fopen(const char *zName, const char *zMode){ |
| 1251 | 1284 | #ifdef _WIN32 |
| 1252 | 1285 | wchar_t *uMode = fossil_utf8_to_unicode(zMode); |
| 1253 | | - wchar_t *uName = fossil_utf8_to_unicode(zName); |
| 1286 | + wchar_t *uName = fossil_utf8_to_filename(zName); |
| 1254 | 1287 | FILE *f = _wfopen(uName, uMode); |
| 1255 | 1288 | fossil_mbcs_free(uName); |
| 1256 | 1289 | fossil_mbcs_free(uMode); |
| 1257 | 1290 | #else |
| 1258 | 1291 | FILE *f = fopen(zName, zMode); |
| 1259 | 1292 | #endif |
| 1260 | 1293 | return f; |
| 1261 | 1294 | } |
| 1262 | 1295 | |