Fossil SCM

Do not enforce the requirement that filenames in manifests be strict UTF8 because some bug or another in a legacy version of Fossil allowed in some Latin-1 Suppliment characters encoded as a single byte (ex: 0xf3) instead of the correct two-byte encoding (ex: 0xc3 0xb3) and so if we start enforcing strict UTF8, some check-ins from those legacy versions of Fossil will be inaccessible.

drh 2012-12-12 22:52 trunk
Commit d48399bd3907820a8ebb75644c6e4784271fce9d
4 files changed +1 -1 +2 -2 +31 -24 +3 -3
+1 -1
--- src/add.c
+++ src/add.c
@@ -140,11 +140,11 @@
140140
const char *zPath, /* Tree-name of file to add. */
141141
int vid, /* Add to this VFILE */
142142
int caseSensitive /* True if filenames are case sensitive */
143143
){
144144
const char *zCollate = caseSensitive ? "binary" : "nocase";
145
- if( !file_is_simple_pathname(zPath) ){
145
+ if( !file_is_simple_pathname(zPath, 1) ){
146146
fossil_warning("filename contains illegal characters: %s", zPath);
147147
return 0;
148148
}
149149
if( db_exists("SELECT 1 FROM vfile"
150150
" WHERE pathname=%Q COLLATE %s", zPath, zCollate) ){
151151
--- src/add.c
+++ src/add.c
@@ -140,11 +140,11 @@
140 const char *zPath, /* Tree-name of file to add. */
141 int vid, /* Add to this VFILE */
142 int caseSensitive /* True if filenames are case sensitive */
143 ){
144 const char *zCollate = caseSensitive ? "binary" : "nocase";
145 if( !file_is_simple_pathname(zPath) ){
146 fossil_warning("filename contains illegal characters: %s", zPath);
147 return 0;
148 }
149 if( db_exists("SELECT 1 FROM vfile"
150 " WHERE pathname=%Q COLLATE %s", zPath, zCollate) ){
151
--- src/add.c
+++ src/add.c
@@ -140,11 +140,11 @@
140 const char *zPath, /* Tree-name of file to add. */
141 int vid, /* Add to this VFILE */
142 int caseSensitive /* True if filenames are case sensitive */
143 ){
144 const char *zCollate = caseSensitive ? "binary" : "nocase";
145 if( !file_is_simple_pathname(zPath, 1) ){
146 fossil_warning("filename contains illegal characters: %s", zPath);
147 return 0;
148 }
149 if( db_exists("SELECT 1 FROM vfile"
150 " WHERE pathname=%Q COLLATE %s", zPath, zCollate) ){
151
+2 -2
--- src/doc.c
+++ src/doc.c
@@ -380,15 +380,15 @@
380380
g.zPath = mprintf("%s/%s", g.zPath, zName);
381381
memcpy(zBaseline, zName, i);
382382
zBaseline[i] = 0;
383383
zName += i;
384384
while( zName[0]=='/' ){ zName++; }
385
- if( !file_is_simple_pathname(zName) ){
385
+ if( !file_is_simple_pathname(zName, 1) ){
386386
int n = strlen(zName);
387387
if( n>0 && zName[n-1]=='/' ){
388388
zName = mprintf("%sindex.html", zName);
389
- if( !file_is_simple_pathname(zName) ){
389
+ if( !file_is_simple_pathname(zName, 1) ){
390390
goto doc_not_found;
391391
}
392392
}else{
393393
goto doc_not_found;
394394
}
395395
--- src/doc.c
+++ src/doc.c
@@ -380,15 +380,15 @@
380 g.zPath = mprintf("%s/%s", g.zPath, zName);
381 memcpy(zBaseline, zName, i);
382 zBaseline[i] = 0;
383 zName += i;
384 while( zName[0]=='/' ){ zName++; }
385 if( !file_is_simple_pathname(zName) ){
386 int n = strlen(zName);
387 if( n>0 && zName[n-1]=='/' ){
388 zName = mprintf("%sindex.html", zName);
389 if( !file_is_simple_pathname(zName) ){
390 goto doc_not_found;
391 }
392 }else{
393 goto doc_not_found;
394 }
395
--- src/doc.c
+++ src/doc.c
@@ -380,15 +380,15 @@
380 g.zPath = mprintf("%s/%s", g.zPath, zName);
381 memcpy(zBaseline, zName, i);
382 zBaseline[i] = 0;
383 zName += i;
384 while( zName[0]=='/' ){ zName++; }
385 if( !file_is_simple_pathname(zName, 1) ){
386 int n = strlen(zName);
387 if( n>0 && zName[n-1]=='/' ){
388 zName = mprintf("%sindex.html", zName);
389 if( !file_is_simple_pathname(zName, 1) ){
390 goto doc_not_found;
391 }
392 }else{
393 goto doc_not_found;
394 }
395
+31 -24
--- src/file.c
+++ src/file.c
@@ -487,43 +487,50 @@
487487
** * Does not contain any path element named "." or ".."
488488
** * Does not contain any of these characters in the path: "\"
489489
** * Does not end with "/".
490490
** * Does not contain two or more "/" characters in a row.
491491
** * Contains at least one character
492
+**
493
+** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494
+** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495
+** ignored.
492496
*/
493
-int file_is_simple_pathname(const char *z){
497
+int file_is_simple_pathname(const char *z, int bStrictUtf8){
494498
int i;
495499
char c = z[0];
500
+ char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
496501
if( c=='/' || c==0 ) return 0;
497502
if( c=='.' ){
498503
if( z[1]=='/' || z[1]==0 ) return 0;
499504
if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
500505
}
501506
for(i=0; (c=z[i])!=0; i++){
502
- if( (c & 0xf0) == 0xf0 ) {
503
- /* Unicode characters > U+FFFF are not supported.
504
- * Windows XP and earlier cannot handle them.
505
- */
506
- return 0;
507
- }
508
- if( (c & 0xf0) == 0xe0 ) {
509
- /* This is a 3-byte UTF-8 character */
510
- if ( (c & 0xfe) == 0xee ){
511
- /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
512
- if ( (c & 1) && ((z[i+1] & 0xff) >= 0xa4) ){
513
- /* But exclude U+F900 - U+FFFF (0xef followed by byte >= 0xa4),
514
- * which contain valid characters. */
515
- continue;
516
- }
517
- /* Unicode character in the range U+E000 - U+F8FF are for
518
- * private use, they shouldn't occur in filenames. */
519
- return 0;
520
- }
521
- if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
522
- /* Unicode character in the range U+D800 - U+DFFF are for
523
- * surrogate pairs, they shouldn't occur in filenames. */
524
- return 0;
507
+ if( c & maskNonAscii ){
508
+ if( (c & 0xf0) == 0xf0 ) {
509
+ /* Unicode characters > U+FFFF are not supported.
510
+ * Windows XP and earlier cannot handle them.
511
+ */
512
+ return 0;
513
+ }
514
+ if( (c & 0xf0) == 0xe0 ) {
515
+ /* This is a 3-byte UTF-8 character */
516
+ if ( (c & 0xfe) == 0xee ){
517
+ /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
518
+ if ( (c & 1) && ((z[i+1] & 0xff) >= 0xa4) ){
519
+ /* But exclude U+F900 - U+FFFF (0xef followed by byte >= 0xa4),
520
+ * which contain valid characters. */
521
+ continue;
522
+ }
523
+ /* Unicode character in the range U+E000 - U+F8FF are for
524
+ * private use, they shouldn't occur in filenames. */
525
+ return 0;
526
+ }
527
+ if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
528
+ /* Unicode character in the range U+D800 - U+DFFF are for
529
+ * surrogate pairs, they shouldn't occur in filenames. */
530
+ return 0;
531
+ }
525532
}
526533
}
527534
if( c=='\\' ){
528535
return 0;
529536
}
530537
--- src/file.c
+++ src/file.c
@@ -487,43 +487,50 @@
487 ** * Does not contain any path element named "." or ".."
488 ** * Does not contain any of these characters in the path: "\"
489 ** * Does not end with "/".
490 ** * Does not contain two or more "/" characters in a row.
491 ** * Contains at least one character
 
 
 
 
492 */
493 int file_is_simple_pathname(const char *z){
494 int i;
495 char c = z[0];
 
496 if( c=='/' || c==0 ) return 0;
497 if( c=='.' ){
498 if( z[1]=='/' || z[1]==0 ) return 0;
499 if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
500 }
501 for(i=0; (c=z[i])!=0; i++){
502 if( (c & 0xf0) == 0xf0 ) {
503 /* Unicode characters > U+FFFF are not supported.
504 * Windows XP and earlier cannot handle them.
505 */
506 return 0;
507 }
508 if( (c & 0xf0) == 0xe0 ) {
509 /* This is a 3-byte UTF-8 character */
510 if ( (c & 0xfe) == 0xee ){
511 /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
512 if ( (c & 1) && ((z[i+1] & 0xff) >= 0xa4) ){
513 /* But exclude U+F900 - U+FFFF (0xef followed by byte >= 0xa4),
514 * which contain valid characters. */
515 continue;
516 }
517 /* Unicode character in the range U+E000 - U+F8FF are for
518 * private use, they shouldn't occur in filenames. */
519 return 0;
520 }
521 if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
522 /* Unicode character in the range U+D800 - U+DFFF are for
523 * surrogate pairs, they shouldn't occur in filenames. */
524 return 0;
 
 
525 }
526 }
527 if( c=='\\' ){
528 return 0;
529 }
530
--- src/file.c
+++ src/file.c
@@ -487,43 +487,50 @@
487 ** * Does not contain any path element named "." or ".."
488 ** * Does not contain any of these characters in the path: "\"
489 ** * Does not end with "/".
490 ** * Does not contain two or more "/" characters in a row.
491 ** * Contains at least one character
492 **
493 ** Invalid UTF8 characters result in a false return if bStrictUtf8 is
494 ** true. If bStrictUtf8 is false, invalid UTF8 characters are silently
495 ** ignored.
496 */
497 int file_is_simple_pathname(const char *z, int bStrictUtf8){
498 int i;
499 char c = z[0];
500 char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
501 if( c=='/' || c==0 ) return 0;
502 if( c=='.' ){
503 if( z[1]=='/' || z[1]==0 ) return 0;
504 if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
505 }
506 for(i=0; (c=z[i])!=0; i++){
507 if( c & maskNonAscii ){
508 if( (c & 0xf0) == 0xf0 ) {
509 /* Unicode characters > U+FFFF are not supported.
510 * Windows XP and earlier cannot handle them.
511 */
512 return 0;
513 }
514 if( (c & 0xf0) == 0xe0 ) {
515 /* This is a 3-byte UTF-8 character */
516 if ( (c & 0xfe) == 0xee ){
517 /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
518 if ( (c & 1) && ((z[i+1] & 0xff) >= 0xa4) ){
519 /* But exclude U+F900 - U+FFFF (0xef followed by byte >= 0xa4),
520 * which contain valid characters. */
521 continue;
522 }
523 /* Unicode character in the range U+E000 - U+F8FF are for
524 * private use, they shouldn't occur in filenames. */
525 return 0;
526 }
527 if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
528 /* Unicode character in the range U+D800 - U+DFFF are for
529 * surrogate pairs, they shouldn't occur in filenames. */
530 return 0;
531 }
532 }
533 }
534 if( c=='\\' ){
535 return 0;
536 }
537
+3 -3
--- src/manifest.c
+++ src/manifest.c
@@ -430,11 +430,11 @@
430430
zTarget = next_token(&x, &nTarget);
431431
zSrc = next_token(&x, &nSrc);
432432
if( zName==0 || zTarget==0 ) goto manifest_syntax_error;
433433
if( p->zAttachName!=0 ) goto manifest_syntax_error;
434434
defossilize(zName);
435
- if( !file_is_simple_pathname(zName) ){
435
+ if( !file_is_simple_pathname(zName, 0) ){
436436
SYNTAX("invalid filename on A-card");
437437
}
438438
defossilize(zTarget);
439439
if( (nTarget!=UUID_SIZE || !validate16(zTarget, UUID_SIZE))
440440
&& !wiki_name_is_wellformed((const unsigned char *)zTarget) ){
@@ -524,11 +524,11 @@
524524
case 'F': {
525525
char *zName, *zPerm, *zPriorName;
526526
zName = next_token(&x,0);
527527
if( zName==0 ) SYNTAX("missing filename on F-card");
528528
defossilize(zName);
529
- if( !file_is_simple_pathname(zName) ){
529
+ if( !file_is_simple_pathname(zName, 0) ){
530530
SYNTAX("F-card filename is not a simple path");
531531
}
532532
zUuid = next_token(&x, &sz);
533533
if( p->zBaseline==0 || zUuid!=0 ){
534534
if( sz!=UUID_SIZE ) SYNTAX("F-card UUID is the wrong size");
@@ -536,11 +536,11 @@
536536
}
537537
zPerm = next_token(&x,0);
538538
zPriorName = next_token(&x,0);
539539
if( zPriorName ){
540540
defossilize(zPriorName);
541
- if( !file_is_simple_pathname(zPriorName) ){
541
+ if( !file_is_simple_pathname(zPriorName, 0) ){
542542
SYNTAX("F-card old filename is not a simple path");
543543
}
544544
}
545545
if( p->nFile>=p->nFileAlloc ){
546546
p->nFileAlloc = p->nFileAlloc*2 + 10;
547547
--- src/manifest.c
+++ src/manifest.c
@@ -430,11 +430,11 @@
430 zTarget = next_token(&x, &nTarget);
431 zSrc = next_token(&x, &nSrc);
432 if( zName==0 || zTarget==0 ) goto manifest_syntax_error;
433 if( p->zAttachName!=0 ) goto manifest_syntax_error;
434 defossilize(zName);
435 if( !file_is_simple_pathname(zName) ){
436 SYNTAX("invalid filename on A-card");
437 }
438 defossilize(zTarget);
439 if( (nTarget!=UUID_SIZE || !validate16(zTarget, UUID_SIZE))
440 && !wiki_name_is_wellformed((const unsigned char *)zTarget) ){
@@ -524,11 +524,11 @@
524 case 'F': {
525 char *zName, *zPerm, *zPriorName;
526 zName = next_token(&x,0);
527 if( zName==0 ) SYNTAX("missing filename on F-card");
528 defossilize(zName);
529 if( !file_is_simple_pathname(zName) ){
530 SYNTAX("F-card filename is not a simple path");
531 }
532 zUuid = next_token(&x, &sz);
533 if( p->zBaseline==0 || zUuid!=0 ){
534 if( sz!=UUID_SIZE ) SYNTAX("F-card UUID is the wrong size");
@@ -536,11 +536,11 @@
536 }
537 zPerm = next_token(&x,0);
538 zPriorName = next_token(&x,0);
539 if( zPriorName ){
540 defossilize(zPriorName);
541 if( !file_is_simple_pathname(zPriorName) ){
542 SYNTAX("F-card old filename is not a simple path");
543 }
544 }
545 if( p->nFile>=p->nFileAlloc ){
546 p->nFileAlloc = p->nFileAlloc*2 + 10;
547
--- src/manifest.c
+++ src/manifest.c
@@ -430,11 +430,11 @@
430 zTarget = next_token(&x, &nTarget);
431 zSrc = next_token(&x, &nSrc);
432 if( zName==0 || zTarget==0 ) goto manifest_syntax_error;
433 if( p->zAttachName!=0 ) goto manifest_syntax_error;
434 defossilize(zName);
435 if( !file_is_simple_pathname(zName, 0) ){
436 SYNTAX("invalid filename on A-card");
437 }
438 defossilize(zTarget);
439 if( (nTarget!=UUID_SIZE || !validate16(zTarget, UUID_SIZE))
440 && !wiki_name_is_wellformed((const unsigned char *)zTarget) ){
@@ -524,11 +524,11 @@
524 case 'F': {
525 char *zName, *zPerm, *zPriorName;
526 zName = next_token(&x,0);
527 if( zName==0 ) SYNTAX("missing filename on F-card");
528 defossilize(zName);
529 if( !file_is_simple_pathname(zName, 0) ){
530 SYNTAX("F-card filename is not a simple path");
531 }
532 zUuid = next_token(&x, &sz);
533 if( p->zBaseline==0 || zUuid!=0 ){
534 if( sz!=UUID_SIZE ) SYNTAX("F-card UUID is the wrong size");
@@ -536,11 +536,11 @@
536 }
537 zPerm = next_token(&x,0);
538 zPriorName = next_token(&x,0);
539 if( zPriorName ){
540 defossilize(zPriorName);
541 if( !file_is_simple_pathname(zPriorName, 0) ){
542 SYNTAX("F-card old filename is not a simple path");
543 }
544 }
545 if( p->nFile>=p->nFileAlloc ){
546 p->nFileAlloc = p->nFileAlloc*2 + 10;
547

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button