Fossil SCM

Enhance the function to find case-preserved filenames on Windows to deal with non-ASCII filenames.

florian 2024-10-15 05:17 trunk
Commit 9919dfbbaa2019e7eb7bbfdb2f821058e25553478e27079f95122d672a6fbdff
2 files changed +4 +118
+4
--- src/file.c
+++ src/file.c
@@ -1372,10 +1372,11 @@
13721372
** just fossil_strdup(). But for case-insenstiive but "case preserving"
13731373
** filesystems, such as on MacOS or Windows, we want the filename to be
13741374
** in the preserved casing. That's what this routine does.
13751375
*/
13761376
char *file_case_preferred_name(const char *zDir, const char *zPath){
1377
+#ifndef _WIN32 /* Call win32_file_case_preferred_name() on Windows. */
13771378
DIR *d;
13781379
int i;
13791380
char *zResult = 0;
13801381
void *zNative = 0;
13811382
@@ -1407,10 +1408,13 @@
14071408
closedir(d);
14081409
}
14091410
fossil_path_free(zNative);
14101411
if( zResult==0 ) zResult = fossil_strdup(zPath);
14111412
return zResult;
1413
+#else /* _WIN32 */
1414
+ return win32_file_case_preferred_name(zDir,zPath);
1415
+#endif /* _WIN32 */
14121416
}
14131417
14141418
/*
14151419
** COMMAND: test-case-filename
14161420
**
14171421
--- src/file.c
+++ src/file.c
@@ -1372,10 +1372,11 @@
1372 ** just fossil_strdup(). But for case-insenstiive but "case preserving"
1373 ** filesystems, such as on MacOS or Windows, we want the filename to be
1374 ** in the preserved casing. That's what this routine does.
1375 */
1376 char *file_case_preferred_name(const char *zDir, const char *zPath){
 
1377 DIR *d;
1378 int i;
1379 char *zResult = 0;
1380 void *zNative = 0;
1381
@@ -1407,10 +1408,13 @@
1407 closedir(d);
1408 }
1409 fossil_path_free(zNative);
1410 if( zResult==0 ) zResult = fossil_strdup(zPath);
1411 return zResult;
 
 
 
1412 }
1413
1414 /*
1415 ** COMMAND: test-case-filename
1416 **
1417
--- src/file.c
+++ src/file.c
@@ -1372,10 +1372,11 @@
1372 ** just fossil_strdup(). But for case-insenstiive but "case preserving"
1373 ** filesystems, such as on MacOS or Windows, we want the filename to be
1374 ** in the preserved casing. That's what this routine does.
1375 */
1376 char *file_case_preferred_name(const char *zDir, const char *zPath){
1377 #ifndef _WIN32 /* Call win32_file_case_preferred_name() on Windows. */
1378 DIR *d;
1379 int i;
1380 char *zResult = 0;
1381 void *zNative = 0;
1382
@@ -1407,10 +1408,13 @@
1408 closedir(d);
1409 }
1410 fossil_path_free(zNative);
1411 if( zResult==0 ) zResult = fossil_strdup(zPath);
1412 return zResult;
1413 #else /* _WIN32 */
1414 return win32_file_case_preferred_name(zDir,zPath);
1415 #endif /* _WIN32 */
1416 }
1417
1418 /*
1419 ** COMMAND: test-case-filename
1420 **
1421
+118
--- src/winfile.c
+++ src/winfile.c
@@ -290,6 +290,124 @@
290290
fossil_free(zWide);
291291
for(i=0; zUtf8[i]; i++) if( zUtf8[i]=='\\' ) zUtf8[i] = '/';
292292
strncpy(zBuf, zUtf8, nBuf);
293293
fossil_path_free(zUtf8);
294294
}
295
+
296
+/* Perform case-insensitive comparison of two UTF-16 file names. Try to load the
297
+** CompareStringOrdinal() function on Windows Vista and newer, and resort to the
298
+** lstrcmpiW() function on Windows XP.
299
+*/
300
+int win32_compare_filenames_nocase(
301
+ const wchar_t *fn1,
302
+ const wchar_t *fn2
303
+){
304
+ static FARPROC fnCompareStringOrdinal;
305
+ static int try_fnCompareStringOrdinal;
306
+ if( !try_fnCompareStringOrdinal ){
307
+ fnCompareStringOrdinal =
308
+ GetProcAddress(GetModuleHandleA("kernel32"),"CompareStringOrdinal");
309
+ try_fnCompareStringOrdinal = 1;
310
+ }
311
+ if( fnCompareStringOrdinal ){
312
+ return -2 + fnCompareStringOrdinal(fn1,-1,fn2,-1,1);
313
+ }else{
314
+ return lstrcmpiW(fn1,fn2);
315
+ }
316
+}
317
+
318
+/* Helper macros to deal with directory separators. */
319
+#define IS_DIRSEP(s,i) ( s[i]=='/' || s[i]=='\\' )
320
+#define NEXT_DIRSEP(s,i) while( s[i] && s[i]!='/' && s[i]!='\\' ){i++;}
321
+
322
+/* The Win32 version of file_case_preferred_name() from file.c, which is able to
323
+** find case-preserved file names containing non-ASCII characters. The result is
324
+** allocated by fossil_malloc() and *should* be free'd by tha caller. While this
325
+** function usually gets canonicalized paths, it is able to handle any input and
326
+** figure out more cases than the original:
327
+**
328
+** fossil test-case-filename C:/ .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE
329
+** → Original: .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE
330
+** → Modified: .//..\Windows\/.//.\System32\.\notepad.exe
331
+**
332
+** md ÄÖÜ
333
+** fossil test-case-filename ./\ .\äöü\/[empty]\\/
334
+** → Original: ./äöü\/[empty]\\/
335
+** → Modified: .\ÄÖÜ\/[empty]\\/
336
+**
337
+** The function preserves slashes and backslashes: only single file or directory
338
+** components without directory separators ("basenames") are converted to UTF-16
339
+** using fossil_utf8_to_path(), so bypassing its slash ↔ backslash translations.
340
+** Note that the original function doesn't preserve all slashes and backslashes,
341
+** for example in the second example above.
342
+**
343
+** NOTE: As of Windows 10, version 1803, case sensitivity may be enabled on a
344
+** per-directory basis, as returned by NtQueryInformationFile() with the file
345
+** information class FILE_CASE_SENSITIVE_INFORMATION. So this function may be
346
+** changed to act like fossil_strdup() for files located in such directories.
347
+*/
348
+char *win32_file_case_preferred_name(
349
+ const char *zBase,
350
+ const char *zPath
351
+){
352
+ int cchBase = strlen(zBase);
353
+ int cchPath = strlen(zPath);
354
+ int cchBuf = cchBase + cchPath + 1;
355
+ int cchRes = cchPath + 1;
356
+ char *zBuf = fossil_malloc(cchBuf);
357
+ char *zRes = fossil_malloc(cchRes);
358
+ int i, j;
359
+ memcpy(zBuf,zBase,cchBase);
360
+ cchRes = 0;
361
+ if( !IS_DIRSEP(zBuf,cchBase-1) ){
362
+ zBuf[cchBase++]=L'/';
363
+ }
364
+ memcpy(zBuf+cchBase,zPath,cchPath+1);
365
+ i = j = cchBase;
366
+ while( 1 ){
367
+ WIN32_FIND_DATAW fd;
368
+ HANDLE hFind;
369
+ wchar_t *wzBuf;
370
+ char *zCompBuf = 0;
371
+ char *zComp = &zBuf[i];
372
+ int cchComp;
373
+ char chSep;
374
+ int fDone;
375
+ if( IS_DIRSEP(zBuf,i) ){
376
+ zRes[cchRes++] = zBuf[i];
377
+ i = j = i+1;
378
+ continue;
379
+ }
380
+ NEXT_DIRSEP(zBuf,j);
381
+ fDone = zBuf[j]==0;
382
+ chSep = zBuf[j];
383
+ zBuf[j] = 0; /* Truncate working buffer. */
384
+ wzBuf = fossil_utf8_to_path(zBuf,0);
385
+ hFind = FindFirstFileW(wzBuf,&fd);
386
+ if( hFind!= INVALID_HANDLE_VALUE ){
387
+ wchar_t *wzComp = fossil_utf8_to_path(zComp,0);
388
+ FindClose(hFind);
389
+ /* Test fd.cFileName, not fd.cAlternateFileName (classic 8.3 format). */
390
+ if( win32_compare_filenames_nocase(wzComp,fd.cFileName)==0 ){
391
+ zCompBuf = fossil_path_to_utf8(fd.cFileName);
392
+ zComp = zCompBuf;
393
+ }
394
+ fossil_path_free(wzComp);
395
+ }
396
+ fossil_path_free(wzBuf);
397
+ cchComp = strlen(zComp);
398
+ memcpy(zRes+cchRes,zComp,cchComp);
399
+ cchRes += cchComp;
400
+ if( zCompBuf ){
401
+ fossil_path_free(zCompBuf);
402
+ }
403
+ if( fDone ){
404
+ zRes[cchRes] = 0;
405
+ break;
406
+ }
407
+ zBuf[j] = chSep; /* Undo working buffer truncation. */
408
+ i = j;
409
+ }
410
+ fossil_free(zBuf);
411
+ return zRes;
412
+}
295413
#endif /* _WIN32 -- This code is for win32 only */
296414
--- src/winfile.c
+++ src/winfile.c
@@ -290,6 +290,124 @@
290 fossil_free(zWide);
291 for(i=0; zUtf8[i]; i++) if( zUtf8[i]=='\\' ) zUtf8[i] = '/';
292 strncpy(zBuf, zUtf8, nBuf);
293 fossil_path_free(zUtf8);
294 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295 #endif /* _WIN32 -- This code is for win32 only */
296
--- src/winfile.c
+++ src/winfile.c
@@ -290,6 +290,124 @@
290 fossil_free(zWide);
291 for(i=0; zUtf8[i]; i++) if( zUtf8[i]=='\\' ) zUtf8[i] = '/';
292 strncpy(zBuf, zUtf8, nBuf);
293 fossil_path_free(zUtf8);
294 }
295
296 /* Perform case-insensitive comparison of two UTF-16 file names. Try to load the
297 ** CompareStringOrdinal() function on Windows Vista and newer, and resort to the
298 ** lstrcmpiW() function on Windows XP.
299 */
300 int win32_compare_filenames_nocase(
301 const wchar_t *fn1,
302 const wchar_t *fn2
303 ){
304 static FARPROC fnCompareStringOrdinal;
305 static int try_fnCompareStringOrdinal;
306 if( !try_fnCompareStringOrdinal ){
307 fnCompareStringOrdinal =
308 GetProcAddress(GetModuleHandleA("kernel32"),"CompareStringOrdinal");
309 try_fnCompareStringOrdinal = 1;
310 }
311 if( fnCompareStringOrdinal ){
312 return -2 + fnCompareStringOrdinal(fn1,-1,fn2,-1,1);
313 }else{
314 return lstrcmpiW(fn1,fn2);
315 }
316 }
317
318 /* Helper macros to deal with directory separators. */
319 #define IS_DIRSEP(s,i) ( s[i]=='/' || s[i]=='\\' )
320 #define NEXT_DIRSEP(s,i) while( s[i] && s[i]!='/' && s[i]!='\\' ){i++;}
321
322 /* The Win32 version of file_case_preferred_name() from file.c, which is able to
323 ** find case-preserved file names containing non-ASCII characters. The result is
324 ** allocated by fossil_malloc() and *should* be free'd by tha caller. While this
325 ** function usually gets canonicalized paths, it is able to handle any input and
326 ** figure out more cases than the original:
327 **
328 ** fossil test-case-filename C:/ .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE
329 ** → Original: .//..\WINDOWS\/.//.\SYSTEM32\.\NOTEPAD.EXE
330 ** → Modified: .//..\Windows\/.//.\System32\.\notepad.exe
331 **
332 ** md ÄÖÜ
333 ** fossil test-case-filename ./\ .\äöü\/[empty]\\/
334 ** → Original: ./äöü\/[empty]\\/
335 ** → Modified: .\ÄÖÜ\/[empty]\\/
336 **
337 ** The function preserves slashes and backslashes: only single file or directory
338 ** components without directory separators ("basenames") are converted to UTF-16
339 ** using fossil_utf8_to_path(), so bypassing its slash ↔ backslash translations.
340 ** Note that the original function doesn't preserve all slashes and backslashes,
341 ** for example in the second example above.
342 **
343 ** NOTE: As of Windows 10, version 1803, case sensitivity may be enabled on a
344 ** per-directory basis, as returned by NtQueryInformationFile() with the file
345 ** information class FILE_CASE_SENSITIVE_INFORMATION. So this function may be
346 ** changed to act like fossil_strdup() for files located in such directories.
347 */
348 char *win32_file_case_preferred_name(
349 const char *zBase,
350 const char *zPath
351 ){
352 int cchBase = strlen(zBase);
353 int cchPath = strlen(zPath);
354 int cchBuf = cchBase + cchPath + 1;
355 int cchRes = cchPath + 1;
356 char *zBuf = fossil_malloc(cchBuf);
357 char *zRes = fossil_malloc(cchRes);
358 int i, j;
359 memcpy(zBuf,zBase,cchBase);
360 cchRes = 0;
361 if( !IS_DIRSEP(zBuf,cchBase-1) ){
362 zBuf[cchBase++]=L'/';
363 }
364 memcpy(zBuf+cchBase,zPath,cchPath+1);
365 i = j = cchBase;
366 while( 1 ){
367 WIN32_FIND_DATAW fd;
368 HANDLE hFind;
369 wchar_t *wzBuf;
370 char *zCompBuf = 0;
371 char *zComp = &zBuf[i];
372 int cchComp;
373 char chSep;
374 int fDone;
375 if( IS_DIRSEP(zBuf,i) ){
376 zRes[cchRes++] = zBuf[i];
377 i = j = i+1;
378 continue;
379 }
380 NEXT_DIRSEP(zBuf,j);
381 fDone = zBuf[j]==0;
382 chSep = zBuf[j];
383 zBuf[j] = 0; /* Truncate working buffer. */
384 wzBuf = fossil_utf8_to_path(zBuf,0);
385 hFind = FindFirstFileW(wzBuf,&fd);
386 if( hFind!= INVALID_HANDLE_VALUE ){
387 wchar_t *wzComp = fossil_utf8_to_path(zComp,0);
388 FindClose(hFind);
389 /* Test fd.cFileName, not fd.cAlternateFileName (classic 8.3 format). */
390 if( win32_compare_filenames_nocase(wzComp,fd.cFileName)==0 ){
391 zCompBuf = fossil_path_to_utf8(fd.cFileName);
392 zComp = zCompBuf;
393 }
394 fossil_path_free(wzComp);
395 }
396 fossil_path_free(wzBuf);
397 cchComp = strlen(zComp);
398 memcpy(zRes+cchRes,zComp,cchComp);
399 cchRes += cchComp;
400 if( zCompBuf ){
401 fossil_path_free(zCompBuf);
402 }
403 if( fDone ){
404 zRes[cchRes] = 0;
405 break;
406 }
407 zBuf[j] = chSep; /* Undo working buffer truncation. */
408 i = j;
409 }
410 fossil_free(zBuf);
411 return zRes;
412 }
413 #endif /* _WIN32 -- This code is for win32 only */
414

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button