Fossil SCM
Experimental fix for issue [d17d6e5b17]. <p>Should have a LOT more testing before merging it to trunk, because it is dangerous! <p>The method used is as described at: <br> [http://cygwin.com/cygwin-ug-net/using-specialnames.html] The only problematic characters left are ':' and '\', all other problematic characters are handled by translating them to characters in the range U+F000 to U+F0FF <p>Feedback welcome.
Commit
82ce90f91c6e50ab624cc08e49c7e2195dc2f0ea
Parent
3c1ad1def951372…
1 file changed
+43
-5
+43
-5
| --- src/file.c | ||
| +++ src/file.c | ||
| @@ -481,11 +481,11 @@ | ||
| 481 | 481 | ** a file in a repository. Valid filenames follow all of the |
| 482 | 482 | ** following rules: |
| 483 | 483 | ** |
| 484 | 484 | ** * Does not begin with "/" |
| 485 | 485 | ** * Does not contain any path element named "." or ".." |
| 486 | -** * Does not contain any of these characters in the path: "\*[]?" | |
| 486 | +** * Does not contain any of these characters in the path: "\:" | |
| 487 | 487 | ** * Does not end with "/". |
| 488 | 488 | ** * Does not contain two or more "/" characters in a row. |
| 489 | 489 | ** * Contains at least one character |
| 490 | 490 | */ |
| 491 | 491 | int file_is_simple_pathname(const char *z){ |
| @@ -495,11 +495,11 @@ | ||
| 495 | 495 | if( c=='.' ){ |
| 496 | 496 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 497 | 497 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 498 | 498 | } |
| 499 | 499 | for(i=0; (c=z[i])!=0; i++){ |
| 500 | - if( c=='\\' || c=='*' || c=='[' || c==']' || c=='?' ){ | |
| 500 | + if( c=='\\' || c==':' ){ | |
| 501 | 501 | return 0; |
| 502 | 502 | } |
| 503 | 503 | if( c=='/' ){ |
| 504 | 504 | if( z[i+1]=='/' ) return 0; |
| 505 | 505 | if( z[i+1]=='.' ){ |
| @@ -1089,15 +1089,33 @@ | ||
| 1089 | 1089 | |
| 1090 | 1090 | /* |
| 1091 | 1091 | ** Translate Unicode to UTF8. Return a pointer to the translated text. |
| 1092 | 1092 | ** Call fossil_mbcs_free() to deallocate any memory used to store the |
| 1093 | 1093 | ** returned pointer when done. |
| 1094 | +** | |
| 1095 | +** On Windows, characters in the range U+FF01 to U+FF7F (private use area) | |
| 1096 | +** are translated in ASCII characters in the range U+0001 - U+007F. The | |
| 1097 | +** only place they can come from are filenames using Cygwin's trick | |
| 1098 | +** to circumvent invalid characters in filenames. | |
| 1099 | +** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> | |
| 1100 | +** This way, fossil will work nicely together with the cygwin shell | |
| 1101 | +** handling those filenames. On other shells, the generated filename | |
| 1102 | +** might not be as expected, but apart from that nothing goes wrong. | |
| 1094 | 1103 | */ |
| 1095 | -char *fossil_unicode_to_utf8(const void *zUnicode){ | |
| 1104 | +char *fossil_unicode_to_utf8(void *zUnicode){ | |
| 1096 | 1105 | #ifdef _WIN32 |
| 1097 | - int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0); | |
| 1098 | - char *zUtf = sqlite3_malloc( nByte ); | |
| 1106 | + int nByte = 0; | |
| 1107 | + char *zUtf; | |
| 1108 | + WCHAR *wUnicode = zUnicode; | |
| 1109 | + while( *wUnicode != 0 ){ | |
| 1110 | + if ( (*wUnicode > 0xF000) && (*wUnicode <= 0xF07F) ){ | |
| 1111 | + *wUnicode &= 0x7F; | |
| 1112 | + } | |
| 1113 | + ++wUnicode; | |
| 1114 | + } | |
| 1115 | + nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0); | |
| 1116 | + zUtf = sqlite3_malloc( nByte ); | |
| 1099 | 1117 | if( zUtf==0 ){ |
| 1100 | 1118 | return 0; |
| 1101 | 1119 | } |
| 1102 | 1120 | WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0); |
| 1103 | 1121 | return zUtf; |
| @@ -1122,19 +1140,39 @@ | ||
| 1122 | 1140 | |
| 1123 | 1141 | /* |
| 1124 | 1142 | ** Translate UTF8 to unicode for use in system calls. Return a pointer to the |
| 1125 | 1143 | ** translated text.. Call fossil_mbcs_free() to deallocate any memory |
| 1126 | 1144 | ** used to store the returned pointer when done. |
| 1145 | +** | |
| 1146 | +** On Windows, characters in the range U+001 to U+0031 and the | |
| 1147 | +** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid | |
| 1148 | +** to be used. Therefore, translated those to characters in the | |
| 1149 | +** (private use area), in the range U+0001 - U+007F, so those | |
| 1150 | +** characters never arrive in any Windows API. The filenames might | |
| 1151 | +** look strange in Windows explorer, but in the cygwin shell | |
| 1152 | +** everything looks as expected. | |
| 1153 | +** | |
| 1154 | +** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> | |
| 1155 | +** | |
| 1127 | 1156 | */ |
| 1128 | 1157 | void *fossil_utf8_to_unicode(const char *zUtf8){ |
| 1129 | 1158 | #ifdef _WIN32 |
| 1130 | 1159 | int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0); |
| 1131 | 1160 | wchar_t *zUnicode = sqlite3_malloc( nByte * 2 ); |
| 1161 | + wchar_t *wUnicode; | |
| 1132 | 1162 | if( zUnicode==0 ){ |
| 1133 | 1163 | return 0; |
| 1134 | 1164 | } |
| 1135 | 1165 | MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte); |
| 1166 | + wUnicode = zUnicode; | |
| 1167 | + while( --nByte > 0){ | |
| 1168 | + if ( (*wUnicode < 32) || wcschr(L"\"*<>?|", *wUnicode) ){ | |
| 1169 | + *wUnicode |= 0xF000; | |
| 1170 | + } | |
| 1171 | + ++wUnicode; | |
| 1172 | + } | |
| 1173 | + | |
| 1136 | 1174 | return zUnicode; |
| 1137 | 1175 | #else |
| 1138 | 1176 | return (void *)zUtf8; /* No-op on unix */ |
| 1139 | 1177 | #endif |
| 1140 | 1178 | } |
| 1141 | 1179 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -481,11 +481,11 @@ | |
| 481 | ** a file in a repository. Valid filenames follow all of the |
| 482 | ** following rules: |
| 483 | ** |
| 484 | ** * Does not begin with "/" |
| 485 | ** * Does not contain any path element named "." or ".." |
| 486 | ** * Does not contain any of these characters in the path: "\*[]?" |
| 487 | ** * Does not end with "/". |
| 488 | ** * Does not contain two or more "/" characters in a row. |
| 489 | ** * Contains at least one character |
| 490 | */ |
| 491 | int file_is_simple_pathname(const char *z){ |
| @@ -495,11 +495,11 @@ | |
| 495 | if( c=='.' ){ |
| 496 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 497 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 498 | } |
| 499 | for(i=0; (c=z[i])!=0; i++){ |
| 500 | if( c=='\\' || c=='*' || c=='[' || c==']' || c=='?' ){ |
| 501 | return 0; |
| 502 | } |
| 503 | if( c=='/' ){ |
| 504 | if( z[i+1]=='/' ) return 0; |
| 505 | if( z[i+1]=='.' ){ |
| @@ -1089,15 +1089,33 @@ | |
| 1089 | |
| 1090 | /* |
| 1091 | ** Translate Unicode to UTF8. Return a pointer to the translated text. |
| 1092 | ** Call fossil_mbcs_free() to deallocate any memory used to store the |
| 1093 | ** returned pointer when done. |
| 1094 | */ |
| 1095 | char *fossil_unicode_to_utf8(const void *zUnicode){ |
| 1096 | #ifdef _WIN32 |
| 1097 | int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0); |
| 1098 | char *zUtf = sqlite3_malloc( nByte ); |
| 1099 | if( zUtf==0 ){ |
| 1100 | return 0; |
| 1101 | } |
| 1102 | WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0); |
| 1103 | return zUtf; |
| @@ -1122,19 +1140,39 @@ | |
| 1122 | |
| 1123 | /* |
| 1124 | ** Translate UTF8 to unicode for use in system calls. Return a pointer to the |
| 1125 | ** translated text.. Call fossil_mbcs_free() to deallocate any memory |
| 1126 | ** used to store the returned pointer when done. |
| 1127 | */ |
| 1128 | void *fossil_utf8_to_unicode(const char *zUtf8){ |
| 1129 | #ifdef _WIN32 |
| 1130 | int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0); |
| 1131 | wchar_t *zUnicode = sqlite3_malloc( nByte * 2 ); |
| 1132 | if( zUnicode==0 ){ |
| 1133 | return 0; |
| 1134 | } |
| 1135 | MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte); |
| 1136 | return zUnicode; |
| 1137 | #else |
| 1138 | return (void *)zUtf8; /* No-op on unix */ |
| 1139 | #endif |
| 1140 | } |
| 1141 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -481,11 +481,11 @@ | |
| 481 | ** a file in a repository. Valid filenames follow all of the |
| 482 | ** following rules: |
| 483 | ** |
| 484 | ** * Does not begin with "/" |
| 485 | ** * Does not contain any path element named "." or ".." |
| 486 | ** * Does not contain any of these characters in the path: "\:" |
| 487 | ** * Does not end with "/". |
| 488 | ** * Does not contain two or more "/" characters in a row. |
| 489 | ** * Contains at least one character |
| 490 | */ |
| 491 | int file_is_simple_pathname(const char *z){ |
| @@ -495,11 +495,11 @@ | |
| 495 | if( c=='.' ){ |
| 496 | if( z[1]=='/' || z[1]==0 ) return 0; |
| 497 | if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0; |
| 498 | } |
| 499 | for(i=0; (c=z[i])!=0; i++){ |
| 500 | if( c=='\\' || c==':' ){ |
| 501 | return 0; |
| 502 | } |
| 503 | if( c=='/' ){ |
| 504 | if( z[i+1]=='/' ) return 0; |
| 505 | if( z[i+1]=='.' ){ |
| @@ -1089,15 +1089,33 @@ | |
| 1089 | |
| 1090 | /* |
| 1091 | ** Translate Unicode to UTF8. Return a pointer to the translated text. |
| 1092 | ** Call fossil_mbcs_free() to deallocate any memory used to store the |
| 1093 | ** returned pointer when done. |
| 1094 | ** |
| 1095 | ** On Windows, characters in the range U+FF01 to U+FF7F (private use area) |
| 1096 | ** are translated in ASCII characters in the range U+0001 - U+007F. The |
| 1097 | ** only place they can come from are filenames using Cygwin's trick |
| 1098 | ** to circumvent invalid characters in filenames. |
| 1099 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| 1100 | ** This way, fossil will work nicely together with the cygwin shell |
| 1101 | ** handling those filenames. On other shells, the generated filename |
| 1102 | ** might not be as expected, but apart from that nothing goes wrong. |
| 1103 | */ |
| 1104 | char *fossil_unicode_to_utf8(void *zUnicode){ |
| 1105 | #ifdef _WIN32 |
| 1106 | int nByte = 0; |
| 1107 | char *zUtf; |
| 1108 | WCHAR *wUnicode = zUnicode; |
| 1109 | while( *wUnicode != 0 ){ |
| 1110 | if ( (*wUnicode > 0xF000) && (*wUnicode <= 0xF07F) ){ |
| 1111 | *wUnicode &= 0x7F; |
| 1112 | } |
| 1113 | ++wUnicode; |
| 1114 | } |
| 1115 | nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0); |
| 1116 | zUtf = sqlite3_malloc( nByte ); |
| 1117 | if( zUtf==0 ){ |
| 1118 | return 0; |
| 1119 | } |
| 1120 | WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0); |
| 1121 | return zUtf; |
| @@ -1122,19 +1140,39 @@ | |
| 1140 | |
| 1141 | /* |
| 1142 | ** Translate UTF8 to unicode for use in system calls. Return a pointer to the |
| 1143 | ** translated text.. Call fossil_mbcs_free() to deallocate any memory |
| 1144 | ** used to store the returned pointer when done. |
| 1145 | ** |
| 1146 | ** On Windows, characters in the range U+001 to U+0031 and the |
| 1147 | ** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid |
| 1148 | ** to be used. Therefore, translated those to characters in the |
| 1149 | ** (private use area), in the range U+0001 - U+007F, so those |
| 1150 | ** characters never arrive in any Windows API. The filenames might |
| 1151 | ** look strange in Windows explorer, but in the cygwin shell |
| 1152 | ** everything looks as expected. |
| 1153 | ** |
| 1154 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| 1155 | ** |
| 1156 | */ |
| 1157 | void *fossil_utf8_to_unicode(const char *zUtf8){ |
| 1158 | #ifdef _WIN32 |
| 1159 | int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0); |
| 1160 | wchar_t *zUnicode = sqlite3_malloc( nByte * 2 ); |
| 1161 | wchar_t *wUnicode; |
| 1162 | if( zUnicode==0 ){ |
| 1163 | return 0; |
| 1164 | } |
| 1165 | MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte); |
| 1166 | wUnicode = zUnicode; |
| 1167 | while( --nByte > 0){ |
| 1168 | if ( (*wUnicode < 32) || wcschr(L"\"*<>?|", *wUnicode) ){ |
| 1169 | *wUnicode |= 0xF000; |
| 1170 | } |
| 1171 | ++wUnicode; |
| 1172 | } |
| 1173 | |
| 1174 | return zUnicode; |
| 1175 | #else |
| 1176 | return (void *)zUtf8; /* No-op on unix */ |
| 1177 | #endif |
| 1178 | } |
| 1179 |