Fossil SCM
Enhance the function to find case-preserved filenames on Windows to deal with non-ASCII filenames.
Commit
9919dfbbaa2019e7eb7bbfdb2f821058e25553478e27079f95122d672a6fbdff
Parent
2d5a23e919dff2f…
2 files changed
+4
+118
+4
| --- src/file.c | ||
| +++ src/file.c | ||
| @@ -1372,10 +1372,11 @@ | ||
| 1372 | 1372 | ** just fossil_strdup(). But for case-insenstiive but "case preserving" |
| 1373 | 1373 | ** filesystems, such as on MacOS or Windows, we want the filename to be |
| 1374 | 1374 | ** in the preserved casing. That's what this routine does. |
| 1375 | 1375 | */ |
| 1376 | 1376 | char *file_case_preferred_name(const char *zDir, const char *zPath){ |
| 1377 | +#ifndef _WIN32 /* Call win32_file_case_preferred_name() on Windows. */ | |
| 1377 | 1378 | DIR *d; |
| 1378 | 1379 | int i; |
| 1379 | 1380 | char *zResult = 0; |
| 1380 | 1381 | void *zNative = 0; |
| 1381 | 1382 | |
| @@ -1407,10 +1408,13 @@ | ||
| 1407 | 1408 | closedir(d); |
| 1408 | 1409 | } |
| 1409 | 1410 | fossil_path_free(zNative); |
| 1410 | 1411 | if( zResult==0 ) zResult = fossil_strdup(zPath); |
| 1411 | 1412 | return zResult; |
| 1413 | +#else /* _WIN32 */ | |
| 1414 | + return win32_file_case_preferred_name(zDir,zPath); | |
| 1415 | +#endif /* _WIN32 */ | |
| 1412 | 1416 | } |
| 1413 | 1417 | |
| 1414 | 1418 | /* |
| 1415 | 1419 | ** COMMAND: test-case-filename |
| 1416 | 1420 | ** |
| 1417 | 1421 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -1372,10 +1372,11 @@ | |
| 1372 | ** just fossil_strdup(). But for case-insenstiive but "case preserving" |
| 1373 | ** filesystems, such as on MacOS or Windows, we want the filename to be |
| 1374 | ** in the preserved casing. That's what this routine does. |
| 1375 | */ |
| 1376 | char *file_case_preferred_name(const char *zDir, const char *zPath){ |
| 1377 | DIR *d; |
| 1378 | int i; |
| 1379 | char *zResult = 0; |
| 1380 | void *zNative = 0; |
| 1381 | |
| @@ -1407,10 +1408,13 @@ | |
| 1407 | closedir(d); |
| 1408 | } |
| 1409 | fossil_path_free(zNative); |
| 1410 | if( zResult==0 ) zResult = fossil_strdup(zPath); |
| 1411 | return zResult; |
| 1412 | } |
| 1413 | |
| 1414 | /* |
| 1415 | ** COMMAND: test-case-filename |
| 1416 | ** |
| 1417 |
| --- src/file.c | |
| +++ src/file.c | |
| @@ -1372,10 +1372,11 @@ | |
| 1372 | ** just fossil_strdup(). But for case-insenstiive but "case preserving" |
| 1373 | ** filesystems, such as on MacOS or Windows, we want the filename to be |
| 1374 | ** in the preserved casing. That's what this routine does. |
| 1375 | */ |
| 1376 | char *file_case_preferred_name(const char *zDir, const char *zPath){ |
| 1377 | #ifndef _WIN32 /* Call win32_file_case_preferred_name() on Windows. */ |
| 1378 | DIR *d; |
| 1379 | int i; |
| 1380 | char *zResult = 0; |
| 1381 | void *zNative = 0; |
| 1382 | |
| @@ -1407,10 +1408,13 @@ | |
| 1408 | closedir(d); |
| 1409 | } |
| 1410 | fossil_path_free(zNative); |
| 1411 | if( zResult==0 ) zResult = fossil_strdup(zPath); |
| 1412 | return zResult; |
| 1413 | #else /* _WIN32 */ |
| 1414 | return win32_file_case_preferred_name(zDir,zPath); |
| 1415 | #endif /* _WIN32 */ |
| 1416 | } |
| 1417 | |
| 1418 | /* |
| 1419 | ** COMMAND: test-case-filename |
| 1420 | ** |
| 1421 |
+118
| --- src/winfile.c | ||
| +++ src/winfile.c | ||
| @@ -290,6 +290,124 @@ | ||
| 290 | 290 | fossil_free(zWide); |
| 291 | 291 | for(i=0; zUtf8[i]; i++) if( zUtf8[i]=='\\' ) zUtf8[i] = '/'; |
| 292 | 292 | strncpy(zBuf, zUtf8, nBuf); |
| 293 | 293 | fossil_path_free(zUtf8); |
| 294 | 294 | } |
| 295 | + | |
| 296 | +/* Perform case-insensitive comparison of two UTF-16 file names. Try to load the | |
| 297 | +** CompareStringOrdinal() function on Windows Vista and newer, and resort to the | |
| 298 | +** lstrcmpiW() function on Windows XP. | |
| 299 | +*/ | |
| 300 | +int win32_compare_filenames_nocase( | |
| 301 | + const wchar_t *fn1, | |
| 302 | + const wchar_t *fn2 | |
| 303 | +){ | |
| 304 | + static FARPROC fnCompareStringOrdinal; | |
| 305 | + static int try_fnCompareStringOrdinal; | |
| 306 | + if( !try_fnCompareStringOrdinal ){ | |
| 307 | + fnCompareStringOrdinal = | |
| 308 | + GetProcAddress(GetModuleHandleA("kernel32"),"CompareStringOrdinal"); | |
| 309 | + try_fnCompareStringOrdinal = 1; | |
| 310 | + } | |
| 311 | + if( fnCompareStringOrdinal ){ | |
| 312 | + return -2 + fnCompareStringOrdinal(fn1,-1,fn2,-1,1); | |
| 313 | + }else{ | |
| 314 | + return lstrcmpiW(fn1,fn2); | |
| 315 | + } | |
| 316 | +} | |
| 317 | + | |
| 318 | +/* Helper macros to deal with directory separators. */ | |
| 319 | +#define IS_DIRSEP(s,i) ( s[i]=='/' || s[i]=='\\' ) | |
| 320 | +#define NEXT_DIRSEP(s,i) while( s[i] && s[i]!='/' && s[i]!='\\' ){i++;} | |
| 321 | + | |
| 322 | +/* The Win32 version of file_case_preferred_name() from file.c, which is able to | |
| 323 | +** find case-preserved file names containing non-ASCII characters. The result is | |
| 324 | +** allocated by fossil_malloc() and *should* be free'd by tha caller. While this | |
| 325 | +** function usually gets canonicalized paths, it is able to handle any input and | |
| 326 | +** figure out more cases than the original: | |
| 327 | +** | |
| 328 | +** fossil test-case-filename C:/ .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE | |
| 329 | +** → Original: .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE | |
| 330 | +** → Modified: .//..\Windows\/.//.\System32\.\notepad.exe | |
| 331 | +** | |
| 332 | +** md ÄÖÜ | |
| 333 | +** fossil test-case-filename ./\ .\äöü\/[empty]\\/ | |
| 334 | +** → Original: ./äöü\/[empty]\\/ | |
| 335 | +** → Modified: .\ÄÖÜ\/[empty]\\/ | |
| 336 | +** | |
| 337 | +** The function preserves slashes and backslashes: only single file or directory | |
| 338 | +** components without directory separators ("basenames") are converted to UTF-16 | |
| 339 | +** using fossil_utf8_to_path(), so bypassing its slash ↔ backslash translations. | |
| 340 | +** Note that the original function doesn't preserve all slashes and backslashes, | |
| 341 | +** for example in the second example above. | |
| 342 | +** | |
| 343 | +** NOTE: As of Windows 10, version 1803, case sensitivity may be enabled on a | |
| 344 | +** per-directory basis, as returned by NtQueryInformationFile() with the file | |
| 345 | +** information class FILE_CASE_SENSITIVE_INFORMATION. So this function may be | |
| 346 | +** changed to act like fossil_strdup() for files located in such directories. | |
| 347 | +*/ | |
| 348 | +char *win32_file_case_preferred_name( | |
| 349 | + const char *zBase, | |
| 350 | + const char *zPath | |
| 351 | +){ | |
| 352 | + int cchBase = strlen(zBase); | |
| 353 | + int cchPath = strlen(zPath); | |
| 354 | + int cchBuf = cchBase + cchPath + 1; | |
| 355 | + int cchRes = cchPath + 1; | |
| 356 | + char *zBuf = fossil_malloc(cchBuf); | |
| 357 | + char *zRes = fossil_malloc(cchRes); | |
| 358 | + int i, j; | |
| 359 | + memcpy(zBuf,zBase,cchBase); | |
| 360 | + cchRes = 0; | |
| 361 | + if( !IS_DIRSEP(zBuf,cchBase-1) ){ | |
| 362 | + zBuf[cchBase++]=L'/'; | |
| 363 | + } | |
| 364 | + memcpy(zBuf+cchBase,zPath,cchPath+1); | |
| 365 | + i = j = cchBase; | |
| 366 | + while( 1 ){ | |
| 367 | + WIN32_FIND_DATAW fd; | |
| 368 | + HANDLE hFind; | |
| 369 | + wchar_t *wzBuf; | |
| 370 | + char *zCompBuf = 0; | |
| 371 | + char *zComp = &zBuf[i]; | |
| 372 | + int cchComp; | |
| 373 | + char chSep; | |
| 374 | + int fDone; | |
| 375 | + if( IS_DIRSEP(zBuf,i) ){ | |
| 376 | + zRes[cchRes++] = zBuf[i]; | |
| 377 | + i = j = i+1; | |
| 378 | + continue; | |
| 379 | + } | |
| 380 | + NEXT_DIRSEP(zBuf,j); | |
| 381 | + fDone = zBuf[j]==0; | |
| 382 | + chSep = zBuf[j]; | |
| 383 | + zBuf[j] = 0; /* Truncate working buffer. */ | |
| 384 | + wzBuf = fossil_utf8_to_path(zBuf,0); | |
| 385 | + hFind = FindFirstFileW(wzBuf,&fd); | |
| 386 | + if( hFind!= INVALID_HANDLE_VALUE ){ | |
| 387 | + wchar_t *wzComp = fossil_utf8_to_path(zComp,0); | |
| 388 | + FindClose(hFind); | |
| 389 | + /* Test fd.cFileName, not fd.cAlternateFileName (classic 8.3 format). */ | |
| 390 | + if( win32_compare_filenames_nocase(wzComp,fd.cFileName)==0 ){ | |
| 391 | + zCompBuf = fossil_path_to_utf8(fd.cFileName); | |
| 392 | + zComp = zCompBuf; | |
| 393 | + } | |
| 394 | + fossil_path_free(wzComp); | |
| 395 | + } | |
| 396 | + fossil_path_free(wzBuf); | |
| 397 | + cchComp = strlen(zComp); | |
| 398 | + memcpy(zRes+cchRes,zComp,cchComp); | |
| 399 | + cchRes += cchComp; | |
| 400 | + if( zCompBuf ){ | |
| 401 | + fossil_path_free(zCompBuf); | |
| 402 | + } | |
| 403 | + if( fDone ){ | |
| 404 | + zRes[cchRes] = 0; | |
| 405 | + break; | |
| 406 | + } | |
| 407 | + zBuf[j] = chSep; /* Undo working buffer truncation. */ | |
| 408 | + i = j; | |
| 409 | + } | |
| 410 | + fossil_free(zBuf); | |
| 411 | + return zRes; | |
| 412 | +} | |
| 295 | 413 | #endif /* _WIN32 -- This code is for win32 only */ |
| 296 | 414 |
| --- src/winfile.c | |
| +++ src/winfile.c | |
| @@ -290,6 +290,124 @@ | |
| 290 | fossil_free(zWide); |
| 291 | for(i=0; zUtf8[i]; i++) if( zUtf8[i]=='\\' ) zUtf8[i] = '/'; |
| 292 | strncpy(zBuf, zUtf8, nBuf); |
| 293 | fossil_path_free(zUtf8); |
| 294 | } |
| 295 | #endif /* _WIN32 -- This code is for win32 only */ |
| 296 |
| --- src/winfile.c | |
| +++ src/winfile.c | |
| @@ -290,6 +290,124 @@ | |
| 290 | fossil_free(zWide); |
| 291 | for(i=0; zUtf8[i]; i++) if( zUtf8[i]=='\\' ) zUtf8[i] = '/'; |
| 292 | strncpy(zBuf, zUtf8, nBuf); |
| 293 | fossil_path_free(zUtf8); |
| 294 | } |
| 295 | |
| 296 | /* Perform case-insensitive comparison of two UTF-16 file names. Try to load the |
| 297 | ** CompareStringOrdinal() function on Windows Vista and newer, and resort to the |
| 298 | ** lstrcmpiW() function on Windows XP. |
| 299 | */ |
| 300 | int win32_compare_filenames_nocase( |
| 301 | const wchar_t *fn1, |
| 302 | const wchar_t *fn2 |
| 303 | ){ |
| 304 | static FARPROC fnCompareStringOrdinal; |
| 305 | static int try_fnCompareStringOrdinal; |
| 306 | if( !try_fnCompareStringOrdinal ){ |
| 307 | fnCompareStringOrdinal = |
| 308 | GetProcAddress(GetModuleHandleA("kernel32"),"CompareStringOrdinal"); |
| 309 | try_fnCompareStringOrdinal = 1; |
| 310 | } |
| 311 | if( fnCompareStringOrdinal ){ |
| 312 | return -2 + fnCompareStringOrdinal(fn1,-1,fn2,-1,1); |
| 313 | }else{ |
| 314 | return lstrcmpiW(fn1,fn2); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | /* Helper macros to deal with directory separators. */ |
| 319 | #define IS_DIRSEP(s,i) ( s[i]=='/' || s[i]=='\\' ) |
| 320 | #define NEXT_DIRSEP(s,i) while( s[i] && s[i]!='/' && s[i]!='\\' ){i++;} |
| 321 | |
| 322 | /* The Win32 version of file_case_preferred_name() from file.c, which is able to |
| 323 | ** find case-preserved file names containing non-ASCII characters. The result is |
| 324 | ** allocated by fossil_malloc() and *should* be free'd by tha caller. While this |
| 325 | ** function usually gets canonicalized paths, it is able to handle any input and |
| 326 | ** figure out more cases than the original: |
| 327 | ** |
| 328 | ** fossil test-case-filename C:/ .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE |
| 329 | ** → Original: .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE |
| 330 | ** → Modified: .//..\Windows\/.//.\System32\.\notepad.exe |
| 331 | ** |
| 332 | ** md ÄÖÜ |
| 333 | ** fossil test-case-filename ./\ .\äöü\/[empty]\\/ |
| 334 | ** → Original: ./äöü\/[empty]\\/ |
| 335 | ** → Modified: .\ÄÖÜ\/[empty]\\/ |
| 336 | ** |
| 337 | ** The function preserves slashes and backslashes: only single file or directory |
| 338 | ** components without directory separators ("basenames") are converted to UTF-16 |
| 339 | ** using fossil_utf8_to_path(), so bypassing its slash ↔ backslash translations. |
| 340 | ** Note that the original function doesn't preserve all slashes and backslashes, |
| 341 | ** for example in the second example above. |
| 342 | ** |
| 343 | ** NOTE: As of Windows 10, version 1803, case sensitivity may be enabled on a |
| 344 | ** per-directory basis, as returned by NtQueryInformationFile() with the file |
| 345 | ** information class FILE_CASE_SENSITIVE_INFORMATION. So this function may be |
| 346 | ** changed to act like fossil_strdup() for files located in such directories. |
| 347 | */ |
| 348 | char *win32_file_case_preferred_name( |
| 349 | const char *zBase, |
| 350 | const char *zPath |
| 351 | ){ |
| 352 | int cchBase = strlen(zBase); |
| 353 | int cchPath = strlen(zPath); |
| 354 | int cchBuf = cchBase + cchPath + 1; |
| 355 | int cchRes = cchPath + 1; |
| 356 | char *zBuf = fossil_malloc(cchBuf); |
| 357 | char *zRes = fossil_malloc(cchRes); |
| 358 | int i, j; |
| 359 | memcpy(zBuf,zBase,cchBase); |
| 360 | cchRes = 0; |
| 361 | if( !IS_DIRSEP(zBuf,cchBase-1) ){ |
| 362 | zBuf[cchBase++]=L'/'; |
| 363 | } |
| 364 | memcpy(zBuf+cchBase,zPath,cchPath+1); |
| 365 | i = j = cchBase; |
| 366 | while( 1 ){ |
| 367 | WIN32_FIND_DATAW fd; |
| 368 | HANDLE hFind; |
| 369 | wchar_t *wzBuf; |
| 370 | char *zCompBuf = 0; |
| 371 | char *zComp = &zBuf[i]; |
| 372 | int cchComp; |
| 373 | char chSep; |
| 374 | int fDone; |
| 375 | if( IS_DIRSEP(zBuf,i) ){ |
| 376 | zRes[cchRes++] = zBuf[i]; |
| 377 | i = j = i+1; |
| 378 | continue; |
| 379 | } |
| 380 | NEXT_DIRSEP(zBuf,j); |
| 381 | fDone = zBuf[j]==0; |
| 382 | chSep = zBuf[j]; |
| 383 | zBuf[j] = 0; /* Truncate working buffer. */ |
| 384 | wzBuf = fossil_utf8_to_path(zBuf,0); |
| 385 | hFind = FindFirstFileW(wzBuf,&fd); |
| 386 | if( hFind!= INVALID_HANDLE_VALUE ){ |
| 387 | wchar_t *wzComp = fossil_utf8_to_path(zComp,0); |
| 388 | FindClose(hFind); |
| 389 | /* Test fd.cFileName, not fd.cAlternateFileName (classic 8.3 format). */ |
| 390 | if( win32_compare_filenames_nocase(wzComp,fd.cFileName)==0 ){ |
| 391 | zCompBuf = fossil_path_to_utf8(fd.cFileName); |
| 392 | zComp = zCompBuf; |
| 393 | } |
| 394 | fossil_path_free(wzComp); |
| 395 | } |
| 396 | fossil_path_free(wzBuf); |
| 397 | cchComp = strlen(zComp); |
| 398 | memcpy(zRes+cchRes,zComp,cchComp); |
| 399 | cchRes += cchComp; |
| 400 | if( zCompBuf ){ |
| 401 | fossil_path_free(zCompBuf); |
| 402 | } |
| 403 | if( fDone ){ |
| 404 | zRes[cchRes] = 0; |
| 405 | break; |
| 406 | } |
| 407 | zBuf[j] = chSep; /* Undo working buffer truncation. */ |
| 408 | i = j; |
| 409 | } |
| 410 | fossil_free(zBuf); |
| 411 | return zRes; |
| 412 | } |
| 413 | #endif /* _WIN32 -- This code is for win32 only */ |
| 414 |