Fossil SCM

Improve filename shell escaping logic to deal better with UTF-8 characters.

drh 2021-06-24 15:35 trunk
Commit 19f195a838ba539c35e581e42d67144a72aca218e00388cdd241a4608a6f5b9b
1 file changed +137 -22
+137 -22
--- src/blob.c
+++ src/blob.c
@@ -1307,25 +1307,39 @@
13071307
** 6x ` a b c d e f g h i j k l m n o
13081308
** 7x p q r s t u v w x y z { | } ~ ^_
13091309
*/
13101310
13111311
/*
1312
-** Characters that need to be escaped are marked with 1.
1313
-** Illegal characters are marked with 2.
1312
+** Meanings for bytes in a filename:
1313
+**
1314
+** 0 Ordinary character. No encoding required
1315
+** 1 Needs to be escaped
1316
+** 2 Illegal character. Do not allow in a filename
1317
+** 3 First byte of a 2-byte UTF-8
1318
+** 4 First byte of a 3-byte UTF-8
1319
+** 5 First byte of a 4-byte UTF-8
13141320
*/
13151321
static const char aSafeChar[256] = {
13161322
#ifdef _WIN32
1323
+/* Windows
1324
+** Prohibit: all control characters, including tab, \r and \n
1325
+** Escape: (space) " # $ % & ' ( ) * ; < > ? [ ] ^ ` { | }
1326
+*/
13171327
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
13181328
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
13191329
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
1320
- 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, /* 2x */
1321
- 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
1330
+ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */
1331
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
13221332
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
13231333
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, /* 5x */
13241334
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
13251335
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
13261336
#else
1337
+/* Unix
1338
+** Prohibit: all control characters, including tab, \r and \n
1339
+** Escape: (space) ! " # $ % & ' ( ) * ; < > ? [ \ ] ^ ` { | }
1340
+*/
13271341
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
13281342
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
13291343
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
13301344
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */
13311345
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
@@ -1332,16 +1346,26 @@
13321346
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
13331347
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 5x */
13341348
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
13351349
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
13361350
#endif
1337
- /* All the rest are zeros */
1351
+ /* all bytes 0x80 through 0xbf are unescaped, being secondary
1352
+ ** bytes to UTF8 characters. Bytes 0xc0 through 0xff are the
1353
+ ** first byte of a UTF8 character and do get escaped */
1354
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 8x */
1355
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */
1356
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* ax */
1357
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* bx */
1358
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* cx */
1359
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* dx */
1360
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* ex */
1361
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 /* fx */
13381362
};
13391363
13401364
/*
13411365
** pBlob is a shell command under construction. This routine safely
1342
-** appends argument zIn.
1366
+** appends filename argument zIn.
13431367
**
13441368
** The argument is escaped if it contains white space or other characters
13451369
** that need to be escaped for the shell. If zIn contains characters
13461370
** that cannot be safely escaped, then throw a fatal error.
13471371
**
@@ -1355,40 +1379,46 @@
13551379
int i;
13561380
unsigned char c;
13571381
int needEscape = 0;
13581382
int n = blob_size(pBlob);
13591383
char *z = blob_buffer(pBlob);
1360
-#if defined(_WIN32)
1361
- const char *zNeedQuote = "\"^[];*? ";
1362
-#else
1363
- const char *zNeedQuote = "\"'\\*?$&|` ";
1364
-#endif
13651384
13661385
/* Any control character is illegal. This prevents \n and \r in an
13671386
** argument. */
13681387
for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
13691388
if( aSafeChar[c] ){
1370
- if( aSafeChar[c]==2 ){
1389
+ unsigned char x = aSafeChar[c];
1390
+ needEscape = 1;
1391
+ if( x==2 ){
13711392
Blob bad;
13721393
blob_token(pBlob, &bad);
13731394
fossil_fatal("the [%s] argument to the \"%s\" command contains "
1374
- "a character (ascii 0x%02x) that is a security risk",
1395
+ "a character (ascii 0x%02x) that is not allowed in "
1396
+ "filename arguments",
13751397
zIn, blob_str(&bad), c);
1376
- }else{
1377
- needEscape = 1;
1398
+ }else if( x>2 ){
1399
+ if( (zIn[i+1]&0xc0)!=0x80
1400
+ || (x>=4 && (zIn[i+2]&0xc0)!=0x80)
1401
+ || (x==5 && (zIn[i+3]&0xc0)!=0x80)
1402
+ ){
1403
+ Blob bad;
1404
+ blob_token(pBlob, &bad);
1405
+ fossil_fatal("the [%s] argument to the \"%s\" command contains "
1406
+ "an illegal UTF-8 character",
1407
+ zIn, blob_str(&bad));
1408
+ }
1409
+ i += x-2;
13781410
}
1379
- break;
13801411
}
13811412
}
13821413
13831414
/* Separate from the previous argument by a space */
13841415
if( n>0 && !fossil_isspace(z[n-1]) ){
13851416
blob_append_char(pBlob, ' ');
13861417
}
13871418
13881419
/* Check for characters that need quoting */
1389
- needEscape = strpbrk(zIn, zNeedQuote)!=0;
13901420
if( !needEscape ){
13911421
if( zIn[0]=='-' ){
13921422
blob_append_char(pBlob, '.');
13931423
#if defined(_WIN32)
13941424
blob_append_char(pBlob, '\\');
@@ -1397,10 +1427,14 @@
13971427
#endif
13981428
}
13991429
blob_append(pBlob, zIn, -1);
14001430
}else{
14011431
#if defined(_WIN32)
1432
+ /* Quoting strategy for windows:
1433
+ ** Put the entire name inside of "...". Any " characters within
1434
+ ** the name get doubled.
1435
+ */
14021436
blob_append_char(pBlob, '"');
14031437
if( zIn[0]=='-' ){
14041438
blob_append_char(pBlob, '.');
14051439
blob_append_char(pBlob, '\\');
14061440
}else if( zIn[0]=='/' ){
@@ -1410,17 +1444,22 @@
14101444
blob_append_char(pBlob, (char)c);
14111445
if( c=='"' ) blob_append_char(pBlob, '"');
14121446
}
14131447
blob_append_char(pBlob, '"');
14141448
#else
1449
+ /* Quoting strategy for unix:
1450
+ ** If the name does not contain ', then surround the whole thing
1451
+ ** with '...'. If there is one or more ' characters within the
1452
+ ** name, then put \ before each special character.
1453
+ */
14151454
if( strchr(zIn,'\'') ){
14161455
if( zIn[0]=='-' ){
14171456
blob_append_char(pBlob, '.');
14181457
blob_append_char(pBlob, '/');
14191458
}
14201459
for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1421
- if( aSafeChar[c] ) blob_append_char(pBlob, '\\');
1460
+ if( aSafeChar[c] && aSafeChar[c]!=2 ) blob_append_char(pBlob, '\\');
14221461
blob_append_char(pBlob, (char)c);
14231462
}
14241463
}else{
14251464
blob_append_char(pBlob, '\'');
14261465
if( zIn[0]=='-' ){
@@ -1435,24 +1474,100 @@
14351474
}
14361475
14371476
/*
14381477
** COMMAND: test-escaped-arg
14391478
**
1440
-** Usage %fossil ARG ...
1479
+** Usage %fossil ARGS ...
14411480
**
14421481
** Run each argument through blob_append_escaped_arg() and show the
14431482
** result. Append each argument to "fossil test-echo" and run that
14441483
** using fossil_system() to verify that it really does get escaped
14451484
** correctly.
1485
+**
1486
+** Other options:
1487
+**
1488
+** --hex HEX Skip the --hex flag and instead decode HEX
1489
+** into ascii. This provides a way to insert
1490
+** unusual characters as an argument for testing.
1491
+**
1492
+** --compare HEX ASCII Verify that argument ASCII is identical to
1493
+** to decoded HEX.
1494
+**
1495
+** --fuzz N Run N fuzz cases. Each cases is a call
1496
+** to "fossil test-escaped-arg --compare HEX ARG"
1497
+** where HEX and ARG are the same argument.
1498
+** The argument is chosen at random.
14461499
*/
1447
-void test_escaped_arg__cmd(void){
1500
+void test_escaped_arg_command(void){
14481501
int i;
14491502
Blob x;
1503
+ const char *zArg;
1504
+ char zBuf[100];
14501505
blob_init(&x, 0, 0);
14511506
for(i=2; i<g.argc; i++){
1452
- fossil_print("%3d [%s]: ", i, g.argv[i]);
1453
- blob_appendf(&x, "fossil test-echo %$", g.argv[i]);
1507
+ zArg = g.argv[i];
1508
+ if( fossil_strcmp(zArg, "--hex")==0 && i+1<g.argc ){
1509
+ size_t n = strlen(g.argv[++i]);
1510
+ if( n>=(sizeof(zBuf)-1)*2 ){
1511
+ fossil_fatal("Argument to --hex is too big");
1512
+ }
1513
+ memset(zBuf, 0, sizeof(zBuf));
1514
+ decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, (int)n);
1515
+ zArg = zBuf;
1516
+ }else if( fossil_strcmp(zArg, "--compare")==0 && i+2<g.argc ){
1517
+ size_t n = strlen(g.argv[++i]);
1518
+ if( n>=(sizeof(zBuf)-1)*2 ){
1519
+ fossil_fatal("HEX argument to --compare is too big");
1520
+ }
1521
+ memset(zBuf, 0, sizeof(zBuf));
1522
+ if( decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf,
1523
+ (int)n) ){
1524
+ fossil_fatal("HEX decode of %s failed", g.argv[i]);
1525
+ }
1526
+ zArg = g.argv[++i];
1527
+ if( zArg[0]=='-' ){
1528
+ fossil_fatal("filename argument \"%s\" begins with \"-\"", zArg);
1529
+ }
1530
+#ifdef _WIN32
1531
+ if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='\\' ) zArg += 2;
1532
+#else
1533
+ if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='/' ) zArg += 2;
1534
+#endif
1535
+ if( strcmp(zBuf, zArg)!=0 ){
1536
+ fossil_fatal("argument disagree: \"%s\" (%s) versus \"%s\"",
1537
+ zBuf, g.argv[i-1], zArg);
1538
+ }
1539
+ continue;
1540
+ }else if( fossil_strcmp(zArg, "--fuzz")==0 && i+1<g.argc ){
1541
+ int n = atoi(g.argv[++i]);
1542
+ int j;
1543
+ for(j=0; j<n; j++){
1544
+ unsigned char m, k;
1545
+ int rc;
1546
+ unsigned char zWord[100];
1547
+ sqlite3_randomness(sizeof(m), &m);
1548
+ m %= 50;
1549
+ m += 2;
1550
+ sqlite3_randomness(m, zWord);
1551
+ for(k=0; k<m; k++){
1552
+ unsigned char cx = zWord[k];
1553
+ if( cx<0x20 || cx>=0x7f ){
1554
+ zWord[k] = "abcdefghijklmnopqrstuvwxyz_"[cx%27];
1555
+ }
1556
+ }
1557
+ zWord[k] = 0;
1558
+ encode16(zWord, (unsigned char*)zBuf, (int)m);
1559
+ blob_appendf(&x, "%$ test-escaped-arg --compare %s %$",
1560
+ g.nameOfExe, zBuf,zWord);
1561
+ rc = fossil_system(blob_str(&x));
1562
+ if( rc ) fossil_fatal("failed test (%d): %s\n", rc, blob_str(&x));
1563
+ blob_reset(&x);
1564
+ }
1565
+ continue;
1566
+ }
1567
+ fossil_print("%3d [%s]: ", i, zArg);
1568
+ blob_appendf(&x, "%$ test-echo %$", g.nameOfExe, zArg);
14541569
fossil_print("%s\n", blob_str(&x));
14551570
fossil_system(blob_str(&x));
14561571
blob_reset(&x);
14571572
}
14581573
}
14591574
--- src/blob.c
+++ src/blob.c
@@ -1307,25 +1307,39 @@
1307 ** 6x ` a b c d e f g h i j k l m n o
1308 ** 7x p q r s t u v w x y z { | } ~ ^_
1309 */
1310
1311 /*
1312 ** Characters that need to be escaped are marked with 1.
1313 ** Illegal characters are marked with 2.
 
 
 
 
 
 
1314 */
1315 static const char aSafeChar[256] = {
1316 #ifdef _WIN32
 
 
 
 
1317 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
1318 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
1319 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
1320 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, /* 2x */
1321 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
1322 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
1323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, /* 5x */
1324 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
1325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
1326 #else
 
 
 
 
1327 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
1328 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
1329 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
1330 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */
1331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
@@ -1332,16 +1346,26 @@
1332 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
1333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 5x */
1334 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
1335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
1336 #endif
1337 /* All the rest are zeros */
 
 
 
 
 
 
 
 
 
 
1338 };
1339
1340 /*
1341 ** pBlob is a shell command under construction. This routine safely
1342 ** appends argument zIn.
1343 **
1344 ** The argument is escaped if it contains white space or other characters
1345 ** that need to be escaped for the shell. If zIn contains characters
1346 ** that cannot be safely escaped, then throw a fatal error.
1347 **
@@ -1355,40 +1379,46 @@
1355 int i;
1356 unsigned char c;
1357 int needEscape = 0;
1358 int n = blob_size(pBlob);
1359 char *z = blob_buffer(pBlob);
1360 #if defined(_WIN32)
1361 const char *zNeedQuote = "\"^[];*? ";
1362 #else
1363 const char *zNeedQuote = "\"'\\*?$&|` ";
1364 #endif
1365
1366 /* Any control character is illegal. This prevents \n and \r in an
1367 ** argument. */
1368 for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1369 if( aSafeChar[c] ){
1370 if( aSafeChar[c]==2 ){
 
 
1371 Blob bad;
1372 blob_token(pBlob, &bad);
1373 fossil_fatal("the [%s] argument to the \"%s\" command contains "
1374 "a character (ascii 0x%02x) that is a security risk",
 
1375 zIn, blob_str(&bad), c);
1376 }else{
1377 needEscape = 1;
 
 
 
 
 
 
 
 
 
 
1378 }
1379 break;
1380 }
1381 }
1382
1383 /* Separate from the previous argument by a space */
1384 if( n>0 && !fossil_isspace(z[n-1]) ){
1385 blob_append_char(pBlob, ' ');
1386 }
1387
1388 /* Check for characters that need quoting */
1389 needEscape = strpbrk(zIn, zNeedQuote)!=0;
1390 if( !needEscape ){
1391 if( zIn[0]=='-' ){
1392 blob_append_char(pBlob, '.');
1393 #if defined(_WIN32)
1394 blob_append_char(pBlob, '\\');
@@ -1397,10 +1427,14 @@
1397 #endif
1398 }
1399 blob_append(pBlob, zIn, -1);
1400 }else{
1401 #if defined(_WIN32)
 
 
 
 
1402 blob_append_char(pBlob, '"');
1403 if( zIn[0]=='-' ){
1404 blob_append_char(pBlob, '.');
1405 blob_append_char(pBlob, '\\');
1406 }else if( zIn[0]=='/' ){
@@ -1410,17 +1444,22 @@
1410 blob_append_char(pBlob, (char)c);
1411 if( c=='"' ) blob_append_char(pBlob, '"');
1412 }
1413 blob_append_char(pBlob, '"');
1414 #else
 
 
 
 
 
1415 if( strchr(zIn,'\'') ){
1416 if( zIn[0]=='-' ){
1417 blob_append_char(pBlob, '.');
1418 blob_append_char(pBlob, '/');
1419 }
1420 for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1421 if( aSafeChar[c] ) blob_append_char(pBlob, '\\');
1422 blob_append_char(pBlob, (char)c);
1423 }
1424 }else{
1425 blob_append_char(pBlob, '\'');
1426 if( zIn[0]=='-' ){
@@ -1435,24 +1474,100 @@
1435 }
1436
1437 /*
1438 ** COMMAND: test-escaped-arg
1439 **
1440 ** Usage %fossil ARG ...
1441 **
1442 ** Run each argument through blob_append_escaped_arg() and show the
1443 ** result. Append each argument to "fossil test-echo" and run that
1444 ** using fossil_system() to verify that it really does get escaped
1445 ** correctly.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1446 */
1447 void test_escaped_arg__cmd(void){
1448 int i;
1449 Blob x;
 
 
1450 blob_init(&x, 0, 0);
1451 for(i=2; i<g.argc; i++){
1452 fossil_print("%3d [%s]: ", i, g.argv[i]);
1453 blob_appendf(&x, "fossil test-echo %$", g.argv[i]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1454 fossil_print("%s\n", blob_str(&x));
1455 fossil_system(blob_str(&x));
1456 blob_reset(&x);
1457 }
1458 }
1459
--- src/blob.c
+++ src/blob.c
@@ -1307,25 +1307,39 @@
1307 ** 6x ` a b c d e f g h i j k l m n o
1308 ** 7x p q r s t u v w x y z { | } ~ ^_
1309 */
1310
1311 /*
1312 ** Meanings for bytes in a filename:
1313 **
1314 ** 0 Ordinary character. No encoding required
1315 ** 1 Needs to be escaped
1316 ** 2 Illegal character. Do not allow in a filename
1317 ** 3 First byte of a 2-byte UTF-8
1318 ** 4 First byte of a 3-byte UTF-8
1319 ** 5 First byte of a 4-byte UTF-8
1320 */
1321 static const char aSafeChar[256] = {
1322 #ifdef _WIN32
1323 /* Windows
1324 ** Prohibit: all control characters, including tab, \r and \n
1325 ** Escape: (space) " # $ % & ' ( ) * ; < > ? [ ] ^ ` { | }
1326 */
1327 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
1328 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
1329 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
1330 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */
1331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
1332 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
1333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, /* 5x */
1334 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
1335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
1336 #else
1337 /* Unix
1338 ** Prohibit: all control characters, including tab, \r and \n
1339 ** Escape: (space) ! " # $ % & ' ( ) * ; < > ? [ \ ] ^ ` { | }
1340 */
1341 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */
1342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */
1343 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */
1344 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */
1345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */
@@ -1332,16 +1346,26 @@
1346 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
1347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 5x */
1348 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */
1349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */
1350 #endif
1351 /* all bytes 0x80 through 0xbf are unescaped, being secondary
1352 ** bytes to UTF8 characters. Bytes 0xc0 through 0xff are the
1353 ** first byte of a UTF8 character and do get escaped */
1354 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 8x */
1355 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */
1356 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* ax */
1357 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* bx */
1358 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* cx */
1359 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* dx */
1360 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* ex */
1361 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 /* fx */
1362 };
1363
1364 /*
1365 ** pBlob is a shell command under construction. This routine safely
1366 ** appends filename argument zIn.
1367 **
1368 ** The argument is escaped if it contains white space or other characters
1369 ** that need to be escaped for the shell. If zIn contains characters
1370 ** that cannot be safely escaped, then throw a fatal error.
1371 **
@@ -1355,40 +1379,46 @@
1379 int i;
1380 unsigned char c;
1381 int needEscape = 0;
1382 int n = blob_size(pBlob);
1383 char *z = blob_buffer(pBlob);
 
 
 
 
 
1384
1385 /* Any control character is illegal. This prevents \n and \r in an
1386 ** argument. */
1387 for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1388 if( aSafeChar[c] ){
1389 unsigned char x = aSafeChar[c];
1390 needEscape = 1;
1391 if( x==2 ){
1392 Blob bad;
1393 blob_token(pBlob, &bad);
1394 fossil_fatal("the [%s] argument to the \"%s\" command contains "
1395 "a character (ascii 0x%02x) that is not allowed in "
1396 "filename arguments",
1397 zIn, blob_str(&bad), c);
1398 }else if( x>2 ){
1399 if( (zIn[i+1]&0xc0)!=0x80
1400 || (x>=4 && (zIn[i+2]&0xc0)!=0x80)
1401 || (x==5 && (zIn[i+3]&0xc0)!=0x80)
1402 ){
1403 Blob bad;
1404 blob_token(pBlob, &bad);
1405 fossil_fatal("the [%s] argument to the \"%s\" command contains "
1406 "an illegal UTF-8 character",
1407 zIn, blob_str(&bad));
1408 }
1409 i += x-2;
1410 }
 
1411 }
1412 }
1413
1414 /* Separate from the previous argument by a space */
1415 if( n>0 && !fossil_isspace(z[n-1]) ){
1416 blob_append_char(pBlob, ' ');
1417 }
1418
1419 /* Check for characters that need quoting */
 
1420 if( !needEscape ){
1421 if( zIn[0]=='-' ){
1422 blob_append_char(pBlob, '.');
1423 #if defined(_WIN32)
1424 blob_append_char(pBlob, '\\');
@@ -1397,10 +1427,14 @@
1427 #endif
1428 }
1429 blob_append(pBlob, zIn, -1);
1430 }else{
1431 #if defined(_WIN32)
1432 /* Quoting strategy for windows:
1433 ** Put the entire name inside of "...". Any " characters within
1434 ** the name get doubled.
1435 */
1436 blob_append_char(pBlob, '"');
1437 if( zIn[0]=='-' ){
1438 blob_append_char(pBlob, '.');
1439 blob_append_char(pBlob, '\\');
1440 }else if( zIn[0]=='/' ){
@@ -1410,17 +1444,22 @@
1444 blob_append_char(pBlob, (char)c);
1445 if( c=='"' ) blob_append_char(pBlob, '"');
1446 }
1447 blob_append_char(pBlob, '"');
1448 #else
1449 /* Quoting strategy for unix:
1450 ** If the name does not contain ', then surround the whole thing
1451 ** with '...'. If there is one or more ' characters within the
1452 ** name, then put \ before each special character.
1453 */
1454 if( strchr(zIn,'\'') ){
1455 if( zIn[0]=='-' ){
1456 blob_append_char(pBlob, '.');
1457 blob_append_char(pBlob, '/');
1458 }
1459 for(i=0; (c = (unsigned char)zIn[i])!=0; i++){
1460 if( aSafeChar[c] && aSafeChar[c]!=2 ) blob_append_char(pBlob, '\\');
1461 blob_append_char(pBlob, (char)c);
1462 }
1463 }else{
1464 blob_append_char(pBlob, '\'');
1465 if( zIn[0]=='-' ){
@@ -1435,24 +1474,100 @@
1474 }
1475
1476 /*
1477 ** COMMAND: test-escaped-arg
1478 **
1479 ** Usage %fossil ARGS ...
1480 **
1481 ** Run each argument through blob_append_escaped_arg() and show the
1482 ** result. Append each argument to "fossil test-echo" and run that
1483 ** using fossil_system() to verify that it really does get escaped
1484 ** correctly.
1485 **
1486 ** Other options:
1487 **
1488 ** --hex HEX Skip the --hex flag and instead decode HEX
1489 ** into ascii. This provides a way to insert
1490 ** unusual characters as an argument for testing.
1491 **
1492 ** --compare HEX ASCII Verify that argument ASCII is identical to
1493 ** to decoded HEX.
1494 **
1495 ** --fuzz N Run N fuzz cases. Each cases is a call
1496 ** to "fossil test-escaped-arg --compare HEX ARG"
1497 ** where HEX and ARG are the same argument.
1498 ** The argument is chosen at random.
1499 */
1500 void test_escaped_arg_command(void){
1501 int i;
1502 Blob x;
1503 const char *zArg;
1504 char zBuf[100];
1505 blob_init(&x, 0, 0);
1506 for(i=2; i<g.argc; i++){
1507 zArg = g.argv[i];
1508 if( fossil_strcmp(zArg, "--hex")==0 && i+1<g.argc ){
1509 size_t n = strlen(g.argv[++i]);
1510 if( n>=(sizeof(zBuf)-1)*2 ){
1511 fossil_fatal("Argument to --hex is too big");
1512 }
1513 memset(zBuf, 0, sizeof(zBuf));
1514 decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, (int)n);
1515 zArg = zBuf;
1516 }else if( fossil_strcmp(zArg, "--compare")==0 && i+2<g.argc ){
1517 size_t n = strlen(g.argv[++i]);
1518 if( n>=(sizeof(zBuf)-1)*2 ){
1519 fossil_fatal("HEX argument to --compare is too big");
1520 }
1521 memset(zBuf, 0, sizeof(zBuf));
1522 if( decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf,
1523 (int)n) ){
1524 fossil_fatal("HEX decode of %s failed", g.argv[i]);
1525 }
1526 zArg = g.argv[++i];
1527 if( zArg[0]=='-' ){
1528 fossil_fatal("filename argument \"%s\" begins with \"-\"", zArg);
1529 }
1530 #ifdef _WIN32
1531 if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='\\' ) zArg += 2;
1532 #else
1533 if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='/' ) zArg += 2;
1534 #endif
1535 if( strcmp(zBuf, zArg)!=0 ){
1536 fossil_fatal("argument disagree: \"%s\" (%s) versus \"%s\"",
1537 zBuf, g.argv[i-1], zArg);
1538 }
1539 continue;
1540 }else if( fossil_strcmp(zArg, "--fuzz")==0 && i+1<g.argc ){
1541 int n = atoi(g.argv[++i]);
1542 int j;
1543 for(j=0; j<n; j++){
1544 unsigned char m, k;
1545 int rc;
1546 unsigned char zWord[100];
1547 sqlite3_randomness(sizeof(m), &m);
1548 m %= 50;
1549 m += 2;
1550 sqlite3_randomness(m, zWord);
1551 for(k=0; k<m; k++){
1552 unsigned char cx = zWord[k];
1553 if( cx<0x20 || cx>=0x7f ){
1554 zWord[k] = "abcdefghijklmnopqrstuvwxyz_"[cx%27];
1555 }
1556 }
1557 zWord[k] = 0;
1558 encode16(zWord, (unsigned char*)zBuf, (int)m);
1559 blob_appendf(&x, "%$ test-escaped-arg --compare %s %$",
1560 g.nameOfExe, zBuf,zWord);
1561 rc = fossil_system(blob_str(&x));
1562 if( rc ) fossil_fatal("failed test (%d): %s\n", rc, blob_str(&x));
1563 blob_reset(&x);
1564 }
1565 continue;
1566 }
1567 fossil_print("%3d [%s]: ", i, zArg);
1568 blob_appendf(&x, "%$ test-echo %$", g.nameOfExe, zArg);
1569 fossil_print("%s\n", blob_str(&x));
1570 fossil_system(blob_str(&x));
1571 blob_reset(&x);
1572 }
1573 }
1574

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button