Fossil SCM
Merge trunk. Restore fossil_utf8_to_filename signature by doing character conversions in utf-8 instead of unicode.
Commit
ae1b1f4b5d97289ab1d6ca811350618ea688659d
Parent
f1bb151e6f7a9e5…
2 files changed
+34
-26
+34
-26
+34
-26
| --- src/utf8.c | ||
| +++ src/utf8.c | ||
| @@ -104,32 +104,37 @@ | ||
| 104 | 104 | /* |
| 105 | 105 | ** Translate text from the filename character set into UTF-8. |
| 106 | 106 | ** Return a pointer to the translated text. |
| 107 | 107 | ** Call fossil_filename_free() to deallocate any memory used to store the |
| 108 | 108 | ** returned pointer when done. |
| 109 | +** | |
| 110 | +** On Windows, translate some characters in the in the range | |
| 111 | +** U+F001 - U+F07F (private use area) to ASCII. Cygwin sometimes | |
| 112 | +** generates such filenames. See: | |
| 113 | +** <http://cygwin.com/cygwin-ug-net/using-specialnames.html> | |
| 109 | 114 | */ |
| 110 | -char *fossil_filename_to_utf8(void *zFilename){ | |
| 115 | +char *fossil_filename_to_utf8(const void *zFilename){ | |
| 111 | 116 | #if defined(_WIN32) |
| 112 | - int nByte; | |
| 113 | - char *zUtf; | |
| 114 | - wchar_t *wUnicode = zFilename; | |
| 115 | - while( *wUnicode != 0 ){ | |
| 116 | - if ( (*wUnicode & 0xff80) == 0xf000 ){ | |
| 117 | - wchar_t converted = (*wUnicode & 0x7f); | |
| 118 | - /* Only really convert it when the resulting char is in the given range*/ | |
| 119 | - if ( (converted < 32) || wcschr(L"\"*<>?|:", converted) ){ | |
| 120 | - *wUnicode = converted; | |
| 121 | - } | |
| 122 | - } | |
| 123 | - ++wUnicode; | |
| 124 | - } | |
| 125 | - nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); | |
| 126 | - zUtf = sqlite3_malloc( nByte ); | |
| 117 | + int nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); | |
| 118 | + char *zUtf = sqlite3_malloc( nByte ); | |
| 119 | + char *pUtf, *qUtf; | |
| 127 | 120 | if( zUtf==0 ){ |
| 128 | 121 | return 0; |
| 129 | 122 | } |
| 130 | 123 | WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, zUtf, nByte, 0, 0); |
| 124 | + pUtf = qUtf = zUtf; | |
| 125 | + while( *pUtf ) { | |
| 126 | + if( *pUtf == (char)0xef ){ | |
| 127 | + wchar_t c = ((pUtf[1]&0x3f)<<6)|(pUtf[2]&0x3f); | |
| 128 | + /* Only really convert it when the resulting char is in range. */ | |
| 129 | + if ( c && ((c <= ' ') || wcschr(L"\"*.:<>?|", c)) ){ | |
| 130 | + *qUtf++ = c; pUtf+=3; continue; | |
| 131 | + } | |
| 132 | + } | |
| 133 | + *qUtf++ = *pUtf++; | |
| 134 | + } | |
| 135 | + *qUtf = 0; | |
| 131 | 136 | return zUtf; |
| 132 | 137 | #elif defined(__CYGWIN__) |
| 133 | 138 | char *zOut; |
| 134 | 139 | zOut = fossil_strdup(zFilename); |
| 135 | 140 | return zOut; |
| @@ -168,12 +173,12 @@ | ||
| 168 | 173 | ** Return a pointer to the translated text.. Call fossil_filename_free() |
| 169 | 174 | ** to deallocate any memory used to store the returned pointer when done. |
| 170 | 175 | ** |
| 171 | 176 | ** On Windows, characters in the range U+0001 to U+0031 and the |
| 172 | 177 | ** characters '"', '*', ':', '<', '>', '?' and '|' are invalid |
| 173 | -** to be used. Therefore, translated those to characters in the | |
| 174 | -** (private use area), in the range U+F001 - U+F07F, so those | |
| 178 | +** to be used. Therefore, translate those to characters in the | |
| 179 | +** in the range U+F001 - U+F07F (private use area), so those | |
| 175 | 180 | ** characters never arrive in any Windows API. The filenames might |
| 176 | 181 | ** look strange in Windows explorer, but in the cygwin shell |
| 177 | 182 | ** everything looks as expected. |
| 178 | 183 | ** |
| 179 | 184 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| @@ -193,11 +198,11 @@ | ||
| 193 | 198 | && (zUtf8[2]=='\\' || zUtf8[2]=='/')) { |
| 194 | 199 | zUnicode[2] = '\\'; |
| 195 | 200 | wUnicode += 3; |
| 196 | 201 | } |
| 197 | 202 | while( *wUnicode != '\0' ){ |
| 198 | - if ( (*wUnicode < 32) || wcschr(L"\"*<>?|:", *wUnicode) ){ | |
| 203 | + if ( (*wUnicode < 32) || wcschr(L"\"*:<>?|", *wUnicode) ){ | |
| 199 | 204 | *wUnicode |= 0xF000; |
| 200 | 205 | }else if( *wUnicode == '/' ){ |
| 201 | 206 | *wUnicode = '\\'; |
| 202 | 207 | } |
| 203 | 208 | ++wUnicode; |
| @@ -239,11 +244,11 @@ | ||
| 239 | 244 | ** to a file, -1 is returned and nothing is written |
| 240 | 245 | ** to the console. |
| 241 | 246 | */ |
| 242 | 247 | int fossil_utf8_to_console(const char *zUtf8, int nByte, int toStdErr){ |
| 243 | 248 | #ifdef _WIN32 |
| 244 | - int nChar; | |
| 249 | + int nChar, written = 0; | |
| 245 | 250 | wchar_t *zUnicode; /* Unicode version of zUtf8 */ |
| 246 | 251 | DWORD dummy; |
| 247 | 252 | |
| 248 | 253 | static int istty[2] = { -1, -1 }; |
| 249 | 254 | if( istty[toStdErr] == -1 ){ |
| @@ -258,17 +263,20 @@ | ||
| 258 | 263 | zUnicode = malloc( (nChar + 1) *sizeof(zUnicode[0]) ); |
| 259 | 264 | if( zUnicode==0 ){ |
| 260 | 265 | return 0; |
| 261 | 266 | } |
| 262 | 267 | nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar); |
| 263 | - if( nChar==0 ){ | |
| 264 | - free(zUnicode); | |
| 265 | - return 0; | |
| 268 | + /* Split WriteConsoleW call into multiple chunks, if necessary. See: | |
| 269 | + * <https://connect.microsoft.com/VisualStudio/feedback/details/635230> */ | |
| 270 | + while( written < nChar ){ | |
| 271 | + int size = nChar-written; | |
| 272 | + if (size > 26000) size = 26000; | |
| 273 | + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode+written, | |
| 274 | + size, &dummy, 0); | |
| 275 | + written += size; | |
| 266 | 276 | } |
| 267 | - zUnicode[nChar] = '\0'; | |
| 268 | - WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode, nChar, | |
| 269 | - &dummy, 0); | |
| 277 | + free(zUnicode); | |
| 270 | 278 | return nChar; |
| 271 | 279 | #else |
| 272 | 280 | return -1; /* No-op on unix */ |
| 273 | 281 | #endif |
| 274 | 282 | } |
| 275 | 283 |
| --- src/utf8.c | |
| +++ src/utf8.c | |
| @@ -104,32 +104,37 @@ | |
| 104 | /* |
| 105 | ** Translate text from the filename character set into UTF-8. |
| 106 | ** Return a pointer to the translated text. |
| 107 | ** Call fossil_filename_free() to deallocate any memory used to store the |
| 108 | ** returned pointer when done. |
| 109 | */ |
| 110 | char *fossil_filename_to_utf8(void *zFilename){ |
| 111 | #if defined(_WIN32) |
| 112 | int nByte; |
| 113 | char *zUtf; |
| 114 | wchar_t *wUnicode = zFilename; |
| 115 | while( *wUnicode != 0 ){ |
| 116 | if ( (*wUnicode & 0xff80) == 0xf000 ){ |
| 117 | wchar_t converted = (*wUnicode & 0x7f); |
| 118 | /* Only really convert it when the resulting char is in the given range*/ |
| 119 | if ( (converted < 32) || wcschr(L"\"*<>?|:", converted) ){ |
| 120 | *wUnicode = converted; |
| 121 | } |
| 122 | } |
| 123 | ++wUnicode; |
| 124 | } |
| 125 | nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); |
| 126 | zUtf = sqlite3_malloc( nByte ); |
| 127 | if( zUtf==0 ){ |
| 128 | return 0; |
| 129 | } |
| 130 | WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, zUtf, nByte, 0, 0); |
| 131 | return zUtf; |
| 132 | #elif defined(__CYGWIN__) |
| 133 | char *zOut; |
| 134 | zOut = fossil_strdup(zFilename); |
| 135 | return zOut; |
| @@ -168,12 +173,12 @@ | |
| 168 | ** Return a pointer to the translated text.. Call fossil_filename_free() |
| 169 | ** to deallocate any memory used to store the returned pointer when done. |
| 170 | ** |
| 171 | ** On Windows, characters in the range U+0001 to U+0031 and the |
| 172 | ** characters '"', '*', ':', '<', '>', '?' and '|' are invalid |
| 173 | ** to be used. Therefore, translated those to characters in the |
| 174 | ** (private use area), in the range U+F001 - U+F07F, so those |
| 175 | ** characters never arrive in any Windows API. The filenames might |
| 176 | ** look strange in Windows explorer, but in the cygwin shell |
| 177 | ** everything looks as expected. |
| 178 | ** |
| 179 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| @@ -193,11 +198,11 @@ | |
| 193 | && (zUtf8[2]=='\\' || zUtf8[2]=='/')) { |
| 194 | zUnicode[2] = '\\'; |
| 195 | wUnicode += 3; |
| 196 | } |
| 197 | while( *wUnicode != '\0' ){ |
| 198 | if ( (*wUnicode < 32) || wcschr(L"\"*<>?|:", *wUnicode) ){ |
| 199 | *wUnicode |= 0xF000; |
| 200 | }else if( *wUnicode == '/' ){ |
| 201 | *wUnicode = '\\'; |
| 202 | } |
| 203 | ++wUnicode; |
| @@ -239,11 +244,11 @@ | |
| 239 | ** to a file, -1 is returned and nothing is written |
| 240 | ** to the console. |
| 241 | */ |
| 242 | int fossil_utf8_to_console(const char *zUtf8, int nByte, int toStdErr){ |
| 243 | #ifdef _WIN32 |
| 244 | int nChar; |
| 245 | wchar_t *zUnicode; /* Unicode version of zUtf8 */ |
| 246 | DWORD dummy; |
| 247 | |
| 248 | static int istty[2] = { -1, -1 }; |
| 249 | if( istty[toStdErr] == -1 ){ |
| @@ -258,17 +263,20 @@ | |
| 258 | zUnicode = malloc( (nChar + 1) *sizeof(zUnicode[0]) ); |
| 259 | if( zUnicode==0 ){ |
| 260 | return 0; |
| 261 | } |
| 262 | nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar); |
| 263 | if( nChar==0 ){ |
| 264 | free(zUnicode); |
| 265 | return 0; |
| 266 | } |
| 267 | zUnicode[nChar] = '\0'; |
| 268 | WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode, nChar, |
| 269 | &dummy, 0); |
| 270 | return nChar; |
| 271 | #else |
| 272 | return -1; /* No-op on unix */ |
| 273 | #endif |
| 274 | } |
| 275 |
| --- src/utf8.c | |
| +++ src/utf8.c | |
| @@ -104,32 +104,37 @@ | |
| 104 | /* |
| 105 | ** Translate text from the filename character set into UTF-8. |
| 106 | ** Return a pointer to the translated text. |
| 107 | ** Call fossil_filename_free() to deallocate any memory used to store the |
| 108 | ** returned pointer when done. |
| 109 | ** |
| 110 | ** On Windows, translate some characters in the in the range |
| 111 | ** U+F001 - U+F07F (private use area) to ASCII. Cygwin sometimes |
| 112 | ** generates such filenames. See: |
| 113 | ** <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| 114 | */ |
| 115 | char *fossil_filename_to_utf8(const void *zFilename){ |
| 116 | #if defined(_WIN32) |
| 117 | int nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); |
| 118 | char *zUtf = sqlite3_malloc( nByte ); |
| 119 | char *pUtf, *qUtf; |
| 120 | if( zUtf==0 ){ |
| 121 | return 0; |
| 122 | } |
| 123 | WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, zUtf, nByte, 0, 0); |
| 124 | pUtf = qUtf = zUtf; |
| 125 | while( *pUtf ) { |
| 126 | if( *pUtf == (char)0xef ){ |
| 127 | wchar_t c = ((pUtf[1]&0x3f)<<6)|(pUtf[2]&0x3f); |
| 128 | /* Only really convert it when the resulting char is in range. */ |
| 129 | if ( c && ((c <= ' ') || wcschr(L"\"*.:<>?|", c)) ){ |
| 130 | *qUtf++ = c; pUtf+=3; continue; |
| 131 | } |
| 132 | } |
| 133 | *qUtf++ = *pUtf++; |
| 134 | } |
| 135 | *qUtf = 0; |
| 136 | return zUtf; |
| 137 | #elif defined(__CYGWIN__) |
| 138 | char *zOut; |
| 139 | zOut = fossil_strdup(zFilename); |
| 140 | return zOut; |
| @@ -168,12 +173,12 @@ | |
| 173 | ** Return a pointer to the translated text.. Call fossil_filename_free() |
| 174 | ** to deallocate any memory used to store the returned pointer when done. |
| 175 | ** |
| 176 | ** On Windows, characters in the range U+0001 to U+0031 and the |
| 177 | ** characters '"', '*', ':', '<', '>', '?' and '|' are invalid |
| 178 | ** to be used. Therefore, translate those to characters in the |
| 179 | ** in the range U+F001 - U+F07F (private use area), so those |
| 180 | ** characters never arrive in any Windows API. The filenames might |
| 181 | ** look strange in Windows explorer, but in the cygwin shell |
| 182 | ** everything looks as expected. |
| 183 | ** |
| 184 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| @@ -193,11 +198,11 @@ | |
| 198 | && (zUtf8[2]=='\\' || zUtf8[2]=='/')) { |
| 199 | zUnicode[2] = '\\'; |
| 200 | wUnicode += 3; |
| 201 | } |
| 202 | while( *wUnicode != '\0' ){ |
| 203 | if ( (*wUnicode < 32) || wcschr(L"\"*:<>?|", *wUnicode) ){ |
| 204 | *wUnicode |= 0xF000; |
| 205 | }else if( *wUnicode == '/' ){ |
| 206 | *wUnicode = '\\'; |
| 207 | } |
| 208 | ++wUnicode; |
| @@ -239,11 +244,11 @@ | |
| 244 | ** to a file, -1 is returned and nothing is written |
| 245 | ** to the console. |
| 246 | */ |
| 247 | int fossil_utf8_to_console(const char *zUtf8, int nByte, int toStdErr){ |
| 248 | #ifdef _WIN32 |
| 249 | int nChar, written = 0; |
| 250 | wchar_t *zUnicode; /* Unicode version of zUtf8 */ |
| 251 | DWORD dummy; |
| 252 | |
| 253 | static int istty[2] = { -1, -1 }; |
| 254 | if( istty[toStdErr] == -1 ){ |
| @@ -258,17 +263,20 @@ | |
| 263 | zUnicode = malloc( (nChar + 1) *sizeof(zUnicode[0]) ); |
| 264 | if( zUnicode==0 ){ |
| 265 | return 0; |
| 266 | } |
| 267 | nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar); |
| 268 | /* Split WriteConsoleW call into multiple chunks, if necessary. See: |
| 269 | * <https://connect.microsoft.com/VisualStudio/feedback/details/635230> */ |
| 270 | while( written < nChar ){ |
| 271 | int size = nChar-written; |
| 272 | if (size > 26000) size = 26000; |
| 273 | WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode+written, |
| 274 | size, &dummy, 0); |
| 275 | written += size; |
| 276 | } |
| 277 | free(zUnicode); |
| 278 | return nChar; |
| 279 | #else |
| 280 | return -1; /* No-op on unix */ |
| 281 | #endif |
| 282 | } |
| 283 |
+34
-26
| --- src/utf8.c | ||
| +++ src/utf8.c | ||
| @@ -104,32 +104,37 @@ | ||
| 104 | 104 | /* |
| 105 | 105 | ** Translate text from the filename character set into UTF-8. |
| 106 | 106 | ** Return a pointer to the translated text. |
| 107 | 107 | ** Call fossil_filename_free() to deallocate any memory used to store the |
| 108 | 108 | ** returned pointer when done. |
| 109 | +** | |
| 110 | +** On Windows, translate some characters in the in the range | |
| 111 | +** U+F001 - U+F07F (private use area) to ASCII. Cygwin sometimes | |
| 112 | +** generates such filenames. See: | |
| 113 | +** <http://cygwin.com/cygwin-ug-net/using-specialnames.html> | |
| 109 | 114 | */ |
| 110 | -char *fossil_filename_to_utf8(void *zFilename){ | |
| 115 | +char *fossil_filename_to_utf8(const void *zFilename){ | |
| 111 | 116 | #if defined(_WIN32) |
| 112 | - int nByte; | |
| 113 | - char *zUtf; | |
| 114 | - wchar_t *wUnicode = zFilename; | |
| 115 | - while( *wUnicode != 0 ){ | |
| 116 | - if ( (*wUnicode & 0xff80) == 0xf000 ){ | |
| 117 | - wchar_t converted = (*wUnicode & 0x7f); | |
| 118 | - /* Only really convert it when the resulting char is in the given range*/ | |
| 119 | - if ( (converted < 32) || wcschr(L"\"*<>?|:", converted) ){ | |
| 120 | - *wUnicode = converted; | |
| 121 | - } | |
| 122 | - } | |
| 123 | - ++wUnicode; | |
| 124 | - } | |
| 125 | - nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); | |
| 126 | - zUtf = sqlite3_malloc( nByte ); | |
| 117 | + int nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); | |
| 118 | + char *zUtf = sqlite3_malloc( nByte ); | |
| 119 | + char *pUtf, *qUtf; | |
| 127 | 120 | if( zUtf==0 ){ |
| 128 | 121 | return 0; |
| 129 | 122 | } |
| 130 | 123 | WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, zUtf, nByte, 0, 0); |
| 124 | + pUtf = qUtf = zUtf; | |
| 125 | + while( *pUtf ) { | |
| 126 | + if( *pUtf == (char)0xef ){ | |
| 127 | + wchar_t c = ((pUtf[1]&0x3f)<<6)|(pUtf[2]&0x3f); | |
| 128 | + /* Only really convert it when the resulting char is in range. */ | |
| 129 | + if ( c && ((c <= ' ') || wcschr(L"\"*.:<>?|", c)) ){ | |
| 130 | + *qUtf++ = c; pUtf+=3; continue; | |
| 131 | + } | |
| 132 | + } | |
| 133 | + *qUtf++ = *pUtf++; | |
| 134 | + } | |
| 135 | + *qUtf = 0; | |
| 131 | 136 | return zUtf; |
| 132 | 137 | #elif defined(__CYGWIN__) |
| 133 | 138 | char *zOut; |
| 134 | 139 | zOut = fossil_strdup(zFilename); |
| 135 | 140 | return zOut; |
| @@ -168,12 +173,12 @@ | ||
| 168 | 173 | ** Return a pointer to the translated text.. Call fossil_filename_free() |
| 169 | 174 | ** to deallocate any memory used to store the returned pointer when done. |
| 170 | 175 | ** |
| 171 | 176 | ** On Windows, characters in the range U+0001 to U+0031 and the |
| 172 | 177 | ** characters '"', '*', ':', '<', '>', '?' and '|' are invalid |
| 173 | -** to be used. Therefore, translated those to characters in the | |
| 174 | -** (private use area), in the range U+F001 - U+F07F, so those | |
| 178 | +** to be used. Therefore, translate those to characters in the | |
| 179 | +** in the range U+F001 - U+F07F (private use area), so those | |
| 175 | 180 | ** characters never arrive in any Windows API. The filenames might |
| 176 | 181 | ** look strange in Windows explorer, but in the cygwin shell |
| 177 | 182 | ** everything looks as expected. |
| 178 | 183 | ** |
| 179 | 184 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| @@ -193,11 +198,11 @@ | ||
| 193 | 198 | && (zUtf8[2]=='\\' || zUtf8[2]=='/')) { |
| 194 | 199 | zUnicode[2] = '\\'; |
| 195 | 200 | wUnicode += 3; |
| 196 | 201 | } |
| 197 | 202 | while( *wUnicode != '\0' ){ |
| 198 | - if ( (*wUnicode < 32) || wcschr(L"\"*<>?|:", *wUnicode) ){ | |
| 203 | + if ( (*wUnicode < 32) || wcschr(L"\"*:<>?|", *wUnicode) ){ | |
| 199 | 204 | *wUnicode |= 0xF000; |
| 200 | 205 | }else if( *wUnicode == '/' ){ |
| 201 | 206 | *wUnicode = '\\'; |
| 202 | 207 | } |
| 203 | 208 | ++wUnicode; |
| @@ -239,11 +244,11 @@ | ||
| 239 | 244 | ** to a file, -1 is returned and nothing is written |
| 240 | 245 | ** to the console. |
| 241 | 246 | */ |
| 242 | 247 | int fossil_utf8_to_console(const char *zUtf8, int nByte, int toStdErr){ |
| 243 | 248 | #ifdef _WIN32 |
| 244 | - int nChar; | |
| 249 | + int nChar, written = 0; | |
| 245 | 250 | wchar_t *zUnicode; /* Unicode version of zUtf8 */ |
| 246 | 251 | DWORD dummy; |
| 247 | 252 | |
| 248 | 253 | static int istty[2] = { -1, -1 }; |
| 249 | 254 | if( istty[toStdErr] == -1 ){ |
| @@ -258,17 +263,20 @@ | ||
| 258 | 263 | zUnicode = malloc( (nChar + 1) *sizeof(zUnicode[0]) ); |
| 259 | 264 | if( zUnicode==0 ){ |
| 260 | 265 | return 0; |
| 261 | 266 | } |
| 262 | 267 | nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar); |
| 263 | - if( nChar==0 ){ | |
| 264 | - free(zUnicode); | |
| 265 | - return 0; | |
| 268 | + /* Split WriteConsoleW call into multiple chunks, if necessary. See: | |
| 269 | + * <https://connect.microsoft.com/VisualStudio/feedback/details/635230> */ | |
| 270 | + while( written < nChar ){ | |
| 271 | + int size = nChar-written; | |
| 272 | + if (size > 26000) size = 26000; | |
| 273 | + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode+written, | |
| 274 | + size, &dummy, 0); | |
| 275 | + written += size; | |
| 266 | 276 | } |
| 267 | - zUnicode[nChar] = '\0'; | |
| 268 | - WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode, nChar, | |
| 269 | - &dummy, 0); | |
| 277 | + free(zUnicode); | |
| 270 | 278 | return nChar; |
| 271 | 279 | #else |
| 272 | 280 | return -1; /* No-op on unix */ |
| 273 | 281 | #endif |
| 274 | 282 | } |
| 275 | 283 |
| --- src/utf8.c | |
| +++ src/utf8.c | |
| @@ -104,32 +104,37 @@ | |
| 104 | /* |
| 105 | ** Translate text from the filename character set into UTF-8. |
| 106 | ** Return a pointer to the translated text. |
| 107 | ** Call fossil_filename_free() to deallocate any memory used to store the |
| 108 | ** returned pointer when done. |
| 109 | */ |
| 110 | char *fossil_filename_to_utf8(void *zFilename){ |
| 111 | #if defined(_WIN32) |
| 112 | int nByte; |
| 113 | char *zUtf; |
| 114 | wchar_t *wUnicode = zFilename; |
| 115 | while( *wUnicode != 0 ){ |
| 116 | if ( (*wUnicode & 0xff80) == 0xf000 ){ |
| 117 | wchar_t converted = (*wUnicode & 0x7f); |
| 118 | /* Only really convert it when the resulting char is in the given range*/ |
| 119 | if ( (converted < 32) || wcschr(L"\"*<>?|:", converted) ){ |
| 120 | *wUnicode = converted; |
| 121 | } |
| 122 | } |
| 123 | ++wUnicode; |
| 124 | } |
| 125 | nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); |
| 126 | zUtf = sqlite3_malloc( nByte ); |
| 127 | if( zUtf==0 ){ |
| 128 | return 0; |
| 129 | } |
| 130 | WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, zUtf, nByte, 0, 0); |
| 131 | return zUtf; |
| 132 | #elif defined(__CYGWIN__) |
| 133 | char *zOut; |
| 134 | zOut = fossil_strdup(zFilename); |
| 135 | return zOut; |
| @@ -168,12 +173,12 @@ | |
| 168 | ** Return a pointer to the translated text.. Call fossil_filename_free() |
| 169 | ** to deallocate any memory used to store the returned pointer when done. |
| 170 | ** |
| 171 | ** On Windows, characters in the range U+0001 to U+0031 and the |
| 172 | ** characters '"', '*', ':', '<', '>', '?' and '|' are invalid |
| 173 | ** to be used. Therefore, translated those to characters in the |
| 174 | ** (private use area), in the range U+F001 - U+F07F, so those |
| 175 | ** characters never arrive in any Windows API. The filenames might |
| 176 | ** look strange in Windows explorer, but in the cygwin shell |
| 177 | ** everything looks as expected. |
| 178 | ** |
| 179 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| @@ -193,11 +198,11 @@ | |
| 193 | && (zUtf8[2]=='\\' || zUtf8[2]=='/')) { |
| 194 | zUnicode[2] = '\\'; |
| 195 | wUnicode += 3; |
| 196 | } |
| 197 | while( *wUnicode != '\0' ){ |
| 198 | if ( (*wUnicode < 32) || wcschr(L"\"*<>?|:", *wUnicode) ){ |
| 199 | *wUnicode |= 0xF000; |
| 200 | }else if( *wUnicode == '/' ){ |
| 201 | *wUnicode = '\\'; |
| 202 | } |
| 203 | ++wUnicode; |
| @@ -239,11 +244,11 @@ | |
| 239 | ** to a file, -1 is returned and nothing is written |
| 240 | ** to the console. |
| 241 | */ |
| 242 | int fossil_utf8_to_console(const char *zUtf8, int nByte, int toStdErr){ |
| 243 | #ifdef _WIN32 |
| 244 | int nChar; |
| 245 | wchar_t *zUnicode; /* Unicode version of zUtf8 */ |
| 246 | DWORD dummy; |
| 247 | |
| 248 | static int istty[2] = { -1, -1 }; |
| 249 | if( istty[toStdErr] == -1 ){ |
| @@ -258,17 +263,20 @@ | |
| 258 | zUnicode = malloc( (nChar + 1) *sizeof(zUnicode[0]) ); |
| 259 | if( zUnicode==0 ){ |
| 260 | return 0; |
| 261 | } |
| 262 | nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar); |
| 263 | if( nChar==0 ){ |
| 264 | free(zUnicode); |
| 265 | return 0; |
| 266 | } |
| 267 | zUnicode[nChar] = '\0'; |
| 268 | WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode, nChar, |
| 269 | &dummy, 0); |
| 270 | return nChar; |
| 271 | #else |
| 272 | return -1; /* No-op on unix */ |
| 273 | #endif |
| 274 | } |
| 275 |
| --- src/utf8.c | |
| +++ src/utf8.c | |
| @@ -104,32 +104,37 @@ | |
| 104 | /* |
| 105 | ** Translate text from the filename character set into UTF-8. |
| 106 | ** Return a pointer to the translated text. |
| 107 | ** Call fossil_filename_free() to deallocate any memory used to store the |
| 108 | ** returned pointer when done. |
| 109 | ** |
| 110 | ** On Windows, translate some characters in the in the range |
| 111 | ** U+F001 - U+F07F (private use area) to ASCII. Cygwin sometimes |
| 112 | ** generates such filenames. See: |
| 113 | ** <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| 114 | */ |
| 115 | char *fossil_filename_to_utf8(const void *zFilename){ |
| 116 | #if defined(_WIN32) |
| 117 | int nByte = WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, 0, 0, 0, 0); |
| 118 | char *zUtf = sqlite3_malloc( nByte ); |
| 119 | char *pUtf, *qUtf; |
| 120 | if( zUtf==0 ){ |
| 121 | return 0; |
| 122 | } |
| 123 | WideCharToMultiByte(CP_UTF8, 0, zFilename, -1, zUtf, nByte, 0, 0); |
| 124 | pUtf = qUtf = zUtf; |
| 125 | while( *pUtf ) { |
| 126 | if( *pUtf == (char)0xef ){ |
| 127 | wchar_t c = ((pUtf[1]&0x3f)<<6)|(pUtf[2]&0x3f); |
| 128 | /* Only really convert it when the resulting char is in range. */ |
| 129 | if ( c && ((c <= ' ') || wcschr(L"\"*.:<>?|", c)) ){ |
| 130 | *qUtf++ = c; pUtf+=3; continue; |
| 131 | } |
| 132 | } |
| 133 | *qUtf++ = *pUtf++; |
| 134 | } |
| 135 | *qUtf = 0; |
| 136 | return zUtf; |
| 137 | #elif defined(__CYGWIN__) |
| 138 | char *zOut; |
| 139 | zOut = fossil_strdup(zFilename); |
| 140 | return zOut; |
| @@ -168,12 +173,12 @@ | |
| 173 | ** Return a pointer to the translated text.. Call fossil_filename_free() |
| 174 | ** to deallocate any memory used to store the returned pointer when done. |
| 175 | ** |
| 176 | ** On Windows, characters in the range U+0001 to U+0031 and the |
| 177 | ** characters '"', '*', ':', '<', '>', '?' and '|' are invalid |
| 178 | ** to be used. Therefore, translate those to characters in the |
| 179 | ** in the range U+F001 - U+F07F (private use area), so those |
| 180 | ** characters never arrive in any Windows API. The filenames might |
| 181 | ** look strange in Windows explorer, but in the cygwin shell |
| 182 | ** everything looks as expected. |
| 183 | ** |
| 184 | ** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html> |
| @@ -193,11 +198,11 @@ | |
| 198 | && (zUtf8[2]=='\\' || zUtf8[2]=='/')) { |
| 199 | zUnicode[2] = '\\'; |
| 200 | wUnicode += 3; |
| 201 | } |
| 202 | while( *wUnicode != '\0' ){ |
| 203 | if ( (*wUnicode < 32) || wcschr(L"\"*:<>?|", *wUnicode) ){ |
| 204 | *wUnicode |= 0xF000; |
| 205 | }else if( *wUnicode == '/' ){ |
| 206 | *wUnicode = '\\'; |
| 207 | } |
| 208 | ++wUnicode; |
| @@ -239,11 +244,11 @@ | |
| 244 | ** to a file, -1 is returned and nothing is written |
| 245 | ** to the console. |
| 246 | */ |
| 247 | int fossil_utf8_to_console(const char *zUtf8, int nByte, int toStdErr){ |
| 248 | #ifdef _WIN32 |
| 249 | int nChar, written = 0; |
| 250 | wchar_t *zUnicode; /* Unicode version of zUtf8 */ |
| 251 | DWORD dummy; |
| 252 | |
| 253 | static int istty[2] = { -1, -1 }; |
| 254 | if( istty[toStdErr] == -1 ){ |
| @@ -258,17 +263,20 @@ | |
| 263 | zUnicode = malloc( (nChar + 1) *sizeof(zUnicode[0]) ); |
| 264 | if( zUnicode==0 ){ |
| 265 | return 0; |
| 266 | } |
| 267 | nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar); |
| 268 | /* Split WriteConsoleW call into multiple chunks, if necessary. See: |
| 269 | * <https://connect.microsoft.com/VisualStudio/feedback/details/635230> */ |
| 270 | while( written < nChar ){ |
| 271 | int size = nChar-written; |
| 272 | if (size > 26000) size = 26000; |
| 273 | WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode+written, |
| 274 | size, &dummy, 0); |
| 275 | written += size; |
| 276 | } |
| 277 | free(zUnicode); |
| 278 | return nChar; |
| 279 | #else |
| 280 | return -1; /* No-op on unix */ |
| 281 | #endif |
| 282 | } |
| 283 |