Fossil SCM
Improve filename shell escaping logic to deal better with UTF-8 characters.
Commit
19f195a838ba539c35e581e42d67144a72aca218e00388cdd241a4608a6f5b9b
Parent
1190034e377a370…
1 file changed
+137
-22
+137
-22
| --- src/blob.c | ||
| +++ src/blob.c | ||
| @@ -1307,25 +1307,39 @@ | ||
| 1307 | 1307 | ** 6x ` a b c d e f g h i j k l m n o |
| 1308 | 1308 | ** 7x p q r s t u v w x y z { | } ~ ^_ |
| 1309 | 1309 | */ |
| 1310 | 1310 | |
| 1311 | 1311 | /* |
| 1312 | -** Characters that need to be escaped are marked with 1. | |
| 1313 | -** Illegal characters are marked with 2. | |
| 1312 | +** Meanings for bytes in a filename: | |
| 1313 | +** | |
| 1314 | +** 0 Ordinary character. No encoding required | |
| 1315 | +** 1 Needs to be escaped | |
| 1316 | +** 2 Illegal character. Do not allow in a filename | |
| 1317 | +** 3 First byte of a 2-byte UTF-8 | |
| 1318 | +** 4 First byte of a 3-byte UTF-8 | |
| 1319 | +** 5 First byte of a 4-byte UTF-8 | |
| 1314 | 1320 | */ |
| 1315 | 1321 | static const char aSafeChar[256] = { |
| 1316 | 1322 | #ifdef _WIN32 |
| 1323 | +/* Windows | |
| 1324 | +** Prohibit: all control characters, including tab, \r and \n | |
| 1325 | +** Escape: (space) " # $ % & ' ( ) * ; < > ? [ ] ^ ` { | } | |
| 1326 | +*/ | |
| 1317 | 1327 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ |
| 1318 | 1328 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */ |
| 1319 | 1329 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */ |
| 1320 | - 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, /* 2x */ | |
| 1321 | - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */ | |
| 1330 | + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */ | |
| 1331 | + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */ | |
| 1322 | 1332 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 1323 | 1333 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, /* 5x */ |
| 1324 | 1334 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ |
| 1325 | 1335 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */ |
| 1326 | 1336 | #else |
| 1337 | +/* Unix | |
| 1338 | +** Prohibit: all control characters, including tab, \r and \n | |
| 1339 | +** Escape: (space) ! " # $ % & ' ( ) * ; < > ? [ \ ] ^ ` { | } | |
| 1340 | +*/ | |
| 1327 | 1341 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ |
| 1328 | 1342 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */ |
| 1329 | 1343 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */ |
| 1330 | 1344 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */ |
| 1331 | 1345 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */ |
| @@ -1332,16 +1346,26 @@ | ||
| 1332 | 1346 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 1333 | 1347 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 5x */ |
| 1334 | 1348 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ |
| 1335 | 1349 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */ |
| 1336 | 1350 | #endif |
| 1337 | - /* All the rest are zeros */ | |
| 1351 | + /* all bytes 0x80 through 0xbf are unescaped, being secondary | |
| 1352 | + ** bytes to UTF8 characters. Bytes 0xc0 through 0xff are the | |
| 1353 | + ** first byte of a UTF8 character and do get escaped */ | |
| 1354 | + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 8x */ | |
| 1355 | + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */ | |
| 1356 | + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* ax */ | |
| 1357 | + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* bx */ | |
| 1358 | + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* cx */ | |
| 1359 | + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* dx */ | |
| 1360 | + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* ex */ | |
| 1361 | + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 /* fx */ | |
| 1338 | 1362 | }; |
| 1339 | 1363 | |
| 1340 | 1364 | /* |
| 1341 | 1365 | ** pBlob is a shell command under construction. This routine safely |
| 1342 | -** appends argument zIn. | |
| 1366 | +** appends filename argument zIn. | |
| 1343 | 1367 | ** |
| 1344 | 1368 | ** The argument is escaped if it contains white space or other characters |
| 1345 | 1369 | ** that need to be escaped for the shell. If zIn contains characters |
| 1346 | 1370 | ** that cannot be safely escaped, then throw a fatal error. |
| 1347 | 1371 | ** |
| @@ -1355,40 +1379,46 @@ | ||
| 1355 | 1379 | int i; |
| 1356 | 1380 | unsigned char c; |
| 1357 | 1381 | int needEscape = 0; |
| 1358 | 1382 | int n = blob_size(pBlob); |
| 1359 | 1383 | char *z = blob_buffer(pBlob); |
| 1360 | -#if defined(_WIN32) | |
| 1361 | - const char *zNeedQuote = "\"^[];*? "; | |
| 1362 | -#else | |
| 1363 | - const char *zNeedQuote = "\"'\\*?$&|` "; | |
| 1364 | -#endif | |
| 1365 | 1384 | |
| 1366 | 1385 | /* Any control character is illegal. This prevents \n and \r in an |
| 1367 | 1386 | ** argument. */ |
| 1368 | 1387 | for(i=0; (c = (unsigned char)zIn[i])!=0; i++){ |
| 1369 | 1388 | if( aSafeChar[c] ){ |
| 1370 | - if( aSafeChar[c]==2 ){ | |
| 1389 | + unsigned char x = aSafeChar[c]; | |
| 1390 | + needEscape = 1; | |
| 1391 | + if( x==2 ){ | |
| 1371 | 1392 | Blob bad; |
| 1372 | 1393 | blob_token(pBlob, &bad); |
| 1373 | 1394 | fossil_fatal("the [%s] argument to the \"%s\" command contains " |
| 1374 | - "a character (ascii 0x%02x) that is a security risk", | |
| 1395 | + "a character (ascii 0x%02x) that is not allowed in " | |
| 1396 | + "filename arguments", | |
| 1375 | 1397 | zIn, blob_str(&bad), c); |
| 1376 | - }else{ | |
| 1377 | - needEscape = 1; | |
| 1398 | + }else if( x>2 ){ | |
| 1399 | + if( (zIn[i+1]&0xc0)!=0x80 | |
| 1400 | + || (x>=4 && (zIn[i+2]&0xc0)!=0x80) | |
| 1401 | + || (x==5 && (zIn[i+3]&0xc0)!=0x80) | |
| 1402 | + ){ | |
| 1403 | + Blob bad; | |
| 1404 | + blob_token(pBlob, &bad); | |
| 1405 | + fossil_fatal("the [%s] argument to the \"%s\" command contains " | |
| 1406 | + "an illegal UTF-8 character", | |
| 1407 | + zIn, blob_str(&bad)); | |
| 1408 | + } | |
| 1409 | + i += x-2; | |
| 1378 | 1410 | } |
| 1379 | - break; | |
| 1380 | 1411 | } |
| 1381 | 1412 | } |
| 1382 | 1413 | |
| 1383 | 1414 | /* Separate from the previous argument by a space */ |
| 1384 | 1415 | if( n>0 && !fossil_isspace(z[n-1]) ){ |
| 1385 | 1416 | blob_append_char(pBlob, ' '); |
| 1386 | 1417 | } |
| 1387 | 1418 | |
| 1388 | 1419 | /* Check for characters that need quoting */ |
| 1389 | - needEscape = strpbrk(zIn, zNeedQuote)!=0; | |
| 1390 | 1420 | if( !needEscape ){ |
| 1391 | 1421 | if( zIn[0]=='-' ){ |
| 1392 | 1422 | blob_append_char(pBlob, '.'); |
| 1393 | 1423 | #if defined(_WIN32) |
| 1394 | 1424 | blob_append_char(pBlob, '\\'); |
| @@ -1397,10 +1427,14 @@ | ||
| 1397 | 1427 | #endif |
| 1398 | 1428 | } |
| 1399 | 1429 | blob_append(pBlob, zIn, -1); |
| 1400 | 1430 | }else{ |
| 1401 | 1431 | #if defined(_WIN32) |
| 1432 | + /* Quoting strategy for windows: | |
| 1433 | + ** Put the entire name inside of "...". Any " characters within | |
| 1434 | + ** the name get doubled. | |
| 1435 | + */ | |
| 1402 | 1436 | blob_append_char(pBlob, '"'); |
| 1403 | 1437 | if( zIn[0]=='-' ){ |
| 1404 | 1438 | blob_append_char(pBlob, '.'); |
| 1405 | 1439 | blob_append_char(pBlob, '\\'); |
| 1406 | 1440 | }else if( zIn[0]=='/' ){ |
| @@ -1410,17 +1444,22 @@ | ||
| 1410 | 1444 | blob_append_char(pBlob, (char)c); |
| 1411 | 1445 | if( c=='"' ) blob_append_char(pBlob, '"'); |
| 1412 | 1446 | } |
| 1413 | 1447 | blob_append_char(pBlob, '"'); |
| 1414 | 1448 | #else |
| 1449 | + /* Quoting strategy for unix: | |
| 1450 | + ** If the name does not contain ', then surround the whole thing | |
| 1451 | + ** with '...'. If there is one or more ' characters within the | |
| 1452 | + ** name, then put \ before each special character. | |
| 1453 | + */ | |
| 1415 | 1454 | if( strchr(zIn,'\'') ){ |
| 1416 | 1455 | if( zIn[0]=='-' ){ |
| 1417 | 1456 | blob_append_char(pBlob, '.'); |
| 1418 | 1457 | blob_append_char(pBlob, '/'); |
| 1419 | 1458 | } |
| 1420 | 1459 | for(i=0; (c = (unsigned char)zIn[i])!=0; i++){ |
| 1421 | - if( aSafeChar[c] ) blob_append_char(pBlob, '\\'); | |
| 1460 | + if( aSafeChar[c] && aSafeChar[c]!=2 ) blob_append_char(pBlob, '\\'); | |
| 1422 | 1461 | blob_append_char(pBlob, (char)c); |
| 1423 | 1462 | } |
| 1424 | 1463 | }else{ |
| 1425 | 1464 | blob_append_char(pBlob, '\''); |
| 1426 | 1465 | if( zIn[0]=='-' ){ |
| @@ -1435,24 +1474,100 @@ | ||
| 1435 | 1474 | } |
| 1436 | 1475 | |
| 1437 | 1476 | /* |
| 1438 | 1477 | ** COMMAND: test-escaped-arg |
| 1439 | 1478 | ** |
| 1440 | -** Usage %fossil ARG ... | |
| 1479 | +** Usage %fossil ARGS ... | |
| 1441 | 1480 | ** |
| 1442 | 1481 | ** Run each argument through blob_append_escaped_arg() and show the |
| 1443 | 1482 | ** result. Append each argument to "fossil test-echo" and run that |
| 1444 | 1483 | ** using fossil_system() to verify that it really does get escaped |
| 1445 | 1484 | ** correctly. |
| 1485 | +** | |
| 1486 | +** Other options: | |
| 1487 | +** | |
| 1488 | +** --hex HEX Skip the --hex flag and instead decode HEX | |
| 1489 | +** into ascii. This provides a way to insert | |
| 1490 | +** unusual characters as an argument for testing. | |
| 1491 | +** | |
| 1492 | +** --compare HEX ASCII Verify that argument ASCII is identical to | |
| 1493 | +** to decoded HEX. | |
| 1494 | +** | |
| 1495 | +** --fuzz N Run N fuzz cases. Each cases is a call | |
| 1496 | +** to "fossil test-escaped-arg --compare HEX ARG" | |
| 1497 | +** where HEX and ARG are the same argument. | |
| 1498 | +** The argument is chosen at random. | |
| 1446 | 1499 | */ |
| 1447 | -void test_escaped_arg__cmd(void){ | |
| 1500 | +void test_escaped_arg_command(void){ | |
| 1448 | 1501 | int i; |
| 1449 | 1502 | Blob x; |
| 1503 | + const char *zArg; | |
| 1504 | + char zBuf[100]; | |
| 1450 | 1505 | blob_init(&x, 0, 0); |
| 1451 | 1506 | for(i=2; i<g.argc; i++){ |
| 1452 | - fossil_print("%3d [%s]: ", i, g.argv[i]); | |
| 1453 | - blob_appendf(&x, "fossil test-echo %$", g.argv[i]); | |
| 1507 | + zArg = g.argv[i]; | |
| 1508 | + if( fossil_strcmp(zArg, "--hex")==0 && i+1<g.argc ){ | |
| 1509 | + size_t n = strlen(g.argv[++i]); | |
| 1510 | + if( n>=(sizeof(zBuf)-1)*2 ){ | |
| 1511 | + fossil_fatal("Argument to --hex is too big"); | |
| 1512 | + } | |
| 1513 | + memset(zBuf, 0, sizeof(zBuf)); | |
| 1514 | + decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, (int)n); | |
| 1515 | + zArg = zBuf; | |
| 1516 | + }else if( fossil_strcmp(zArg, "--compare")==0 && i+2<g.argc ){ | |
| 1517 | + size_t n = strlen(g.argv[++i]); | |
| 1518 | + if( n>=(sizeof(zBuf)-1)*2 ){ | |
| 1519 | + fossil_fatal("HEX argument to --compare is too big"); | |
| 1520 | + } | |
| 1521 | + memset(zBuf, 0, sizeof(zBuf)); | |
| 1522 | + if( decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, | |
| 1523 | + (int)n) ){ | |
| 1524 | + fossil_fatal("HEX decode of %s failed", g.argv[i]); | |
| 1525 | + } | |
| 1526 | + zArg = g.argv[++i]; | |
| 1527 | + if( zArg[0]=='-' ){ | |
| 1528 | + fossil_fatal("filename argument \"%s\" begins with \"-\"", zArg); | |
| 1529 | + } | |
| 1530 | +#ifdef _WIN32 | |
| 1531 | + if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='\\' ) zArg += 2; | |
| 1532 | +#else | |
| 1533 | + if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='/' ) zArg += 2; | |
| 1534 | +#endif | |
| 1535 | + if( strcmp(zBuf, zArg)!=0 ){ | |
| 1536 | + fossil_fatal("argument disagree: \"%s\" (%s) versus \"%s\"", | |
| 1537 | + zBuf, g.argv[i-1], zArg); | |
| 1538 | + } | |
| 1539 | + continue; | |
| 1540 | + }else if( fossil_strcmp(zArg, "--fuzz")==0 && i+1<g.argc ){ | |
| 1541 | + int n = atoi(g.argv[++i]); | |
| 1542 | + int j; | |
| 1543 | + for(j=0; j<n; j++){ | |
| 1544 | + unsigned char m, k; | |
| 1545 | + int rc; | |
| 1546 | + unsigned char zWord[100]; | |
| 1547 | + sqlite3_randomness(sizeof(m), &m); | |
| 1548 | + m %= 50; | |
| 1549 | + m += 2; | |
| 1550 | + sqlite3_randomness(m, zWord); | |
| 1551 | + for(k=0; k<m; k++){ | |
| 1552 | + unsigned char cx = zWord[k]; | |
| 1553 | + if( cx<0x20 || cx>=0x7f ){ | |
| 1554 | + zWord[k] = "abcdefghijklmnopqrstuvwxyz_"[cx%27]; | |
| 1555 | + } | |
| 1556 | + } | |
| 1557 | + zWord[k] = 0; | |
| 1558 | + encode16(zWord, (unsigned char*)zBuf, (int)m); | |
| 1559 | + blob_appendf(&x, "%$ test-escaped-arg --compare %s %$", | |
| 1560 | + g.nameOfExe, zBuf,zWord); | |
| 1561 | + rc = fossil_system(blob_str(&x)); | |
| 1562 | + if( rc ) fossil_fatal("failed test (%d): %s\n", rc, blob_str(&x)); | |
| 1563 | + blob_reset(&x); | |
| 1564 | + } | |
| 1565 | + continue; | |
| 1566 | + } | |
| 1567 | + fossil_print("%3d [%s]: ", i, zArg); | |
| 1568 | + blob_appendf(&x, "%$ test-echo %$", g.nameOfExe, zArg); | |
| 1454 | 1569 | fossil_print("%s\n", blob_str(&x)); |
| 1455 | 1570 | fossil_system(blob_str(&x)); |
| 1456 | 1571 | blob_reset(&x); |
| 1457 | 1572 | } |
| 1458 | 1573 | } |
| 1459 | 1574 |
| --- src/blob.c | |
| +++ src/blob.c | |
| @@ -1307,25 +1307,39 @@ | |
| 1307 | ** 6x ` a b c d e f g h i j k l m n o |
| 1308 | ** 7x p q r s t u v w x y z { | } ~ ^_ |
| 1309 | */ |
| 1310 | |
| 1311 | /* |
| 1312 | ** Characters that need to be escaped are marked with 1. |
| 1313 | ** Illegal characters are marked with 2. |
| 1314 | */ |
| 1315 | static const char aSafeChar[256] = { |
| 1316 | #ifdef _WIN32 |
| 1317 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ |
| 1318 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */ |
| 1319 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */ |
| 1320 | 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, /* 2x */ |
| 1321 | 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */ |
| 1322 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 1323 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, /* 5x */ |
| 1324 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ |
| 1325 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */ |
| 1326 | #else |
| 1327 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ |
| 1328 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */ |
| 1329 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */ |
| 1330 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */ |
| 1331 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */ |
| @@ -1332,16 +1346,26 @@ | |
| 1332 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 1333 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 5x */ |
| 1334 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ |
| 1335 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */ |
| 1336 | #endif |
| 1337 | /* All the rest are zeros */ |
| 1338 | }; |
| 1339 | |
| 1340 | /* |
| 1341 | ** pBlob is a shell command under construction. This routine safely |
| 1342 | ** appends argument zIn. |
| 1343 | ** |
| 1344 | ** The argument is escaped if it contains white space or other characters |
| 1345 | ** that need to be escaped for the shell. If zIn contains characters |
| 1346 | ** that cannot be safely escaped, then throw a fatal error. |
| 1347 | ** |
| @@ -1355,40 +1379,46 @@ | |
| 1355 | int i; |
| 1356 | unsigned char c; |
| 1357 | int needEscape = 0; |
| 1358 | int n = blob_size(pBlob); |
| 1359 | char *z = blob_buffer(pBlob); |
| 1360 | #if defined(_WIN32) |
| 1361 | const char *zNeedQuote = "\"^[];*? "; |
| 1362 | #else |
| 1363 | const char *zNeedQuote = "\"'\\*?$&|` "; |
| 1364 | #endif |
| 1365 | |
| 1366 | /* Any control character is illegal. This prevents \n and \r in an |
| 1367 | ** argument. */ |
| 1368 | for(i=0; (c = (unsigned char)zIn[i])!=0; i++){ |
| 1369 | if( aSafeChar[c] ){ |
| 1370 | if( aSafeChar[c]==2 ){ |
| 1371 | Blob bad; |
| 1372 | blob_token(pBlob, &bad); |
| 1373 | fossil_fatal("the [%s] argument to the \"%s\" command contains " |
| 1374 | "a character (ascii 0x%02x) that is a security risk", |
| 1375 | zIn, blob_str(&bad), c); |
| 1376 | }else{ |
| 1377 | needEscape = 1; |
| 1378 | } |
| 1379 | break; |
| 1380 | } |
| 1381 | } |
| 1382 | |
| 1383 | /* Separate from the previous argument by a space */ |
| 1384 | if( n>0 && !fossil_isspace(z[n-1]) ){ |
| 1385 | blob_append_char(pBlob, ' '); |
| 1386 | } |
| 1387 | |
| 1388 | /* Check for characters that need quoting */ |
| 1389 | needEscape = strpbrk(zIn, zNeedQuote)!=0; |
| 1390 | if( !needEscape ){ |
| 1391 | if( zIn[0]=='-' ){ |
| 1392 | blob_append_char(pBlob, '.'); |
| 1393 | #if defined(_WIN32) |
| 1394 | blob_append_char(pBlob, '\\'); |
| @@ -1397,10 +1427,14 @@ | |
| 1397 | #endif |
| 1398 | } |
| 1399 | blob_append(pBlob, zIn, -1); |
| 1400 | }else{ |
| 1401 | #if defined(_WIN32) |
| 1402 | blob_append_char(pBlob, '"'); |
| 1403 | if( zIn[0]=='-' ){ |
| 1404 | blob_append_char(pBlob, '.'); |
| 1405 | blob_append_char(pBlob, '\\'); |
| 1406 | }else if( zIn[0]=='/' ){ |
| @@ -1410,17 +1444,22 @@ | |
| 1410 | blob_append_char(pBlob, (char)c); |
| 1411 | if( c=='"' ) blob_append_char(pBlob, '"'); |
| 1412 | } |
| 1413 | blob_append_char(pBlob, '"'); |
| 1414 | #else |
| 1415 | if( strchr(zIn,'\'') ){ |
| 1416 | if( zIn[0]=='-' ){ |
| 1417 | blob_append_char(pBlob, '.'); |
| 1418 | blob_append_char(pBlob, '/'); |
| 1419 | } |
| 1420 | for(i=0; (c = (unsigned char)zIn[i])!=0; i++){ |
| 1421 | if( aSafeChar[c] ) blob_append_char(pBlob, '\\'); |
| 1422 | blob_append_char(pBlob, (char)c); |
| 1423 | } |
| 1424 | }else{ |
| 1425 | blob_append_char(pBlob, '\''); |
| 1426 | if( zIn[0]=='-' ){ |
| @@ -1435,24 +1474,100 @@ | |
| 1435 | } |
| 1436 | |
| 1437 | /* |
| 1438 | ** COMMAND: test-escaped-arg |
| 1439 | ** |
| 1440 | ** Usage %fossil ARG ... |
| 1441 | ** |
| 1442 | ** Run each argument through blob_append_escaped_arg() and show the |
| 1443 | ** result. Append each argument to "fossil test-echo" and run that |
| 1444 | ** using fossil_system() to verify that it really does get escaped |
| 1445 | ** correctly. |
| 1446 | */ |
| 1447 | void test_escaped_arg__cmd(void){ |
| 1448 | int i; |
| 1449 | Blob x; |
| 1450 | blob_init(&x, 0, 0); |
| 1451 | for(i=2; i<g.argc; i++){ |
| 1452 | fossil_print("%3d [%s]: ", i, g.argv[i]); |
| 1453 | blob_appendf(&x, "fossil test-echo %$", g.argv[i]); |
| 1454 | fossil_print("%s\n", blob_str(&x)); |
| 1455 | fossil_system(blob_str(&x)); |
| 1456 | blob_reset(&x); |
| 1457 | } |
| 1458 | } |
| 1459 |
| --- src/blob.c | |
| +++ src/blob.c | |
| @@ -1307,25 +1307,39 @@ | |
| 1307 | ** 6x ` a b c d e f g h i j k l m n o |
| 1308 | ** 7x p q r s t u v w x y z { | } ~ ^_ |
| 1309 | */ |
| 1310 | |
| 1311 | /* |
| 1312 | ** Meanings for bytes in a filename: |
| 1313 | ** |
| 1314 | ** 0 Ordinary character. No encoding required |
| 1315 | ** 1 Needs to be escaped |
| 1316 | ** 2 Illegal character. Do not allow in a filename |
| 1317 | ** 3 First byte of a 2-byte UTF-8 |
| 1318 | ** 4 First byte of a 3-byte UTF-8 |
| 1319 | ** 5 First byte of a 4-byte UTF-8 |
| 1320 | */ |
| 1321 | static const char aSafeChar[256] = { |
| 1322 | #ifdef _WIN32 |
| 1323 | /* Windows |
| 1324 | ** Prohibit: all control characters, including tab, \r and \n |
| 1325 | ** Escape: (space) " # $ % & ' ( ) * ; < > ? [ ] ^ ` { | } |
| 1326 | */ |
| 1327 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ |
| 1328 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */ |
| 1329 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */ |
| 1330 | 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */ |
| 1331 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */ |
| 1332 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 1333 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, /* 5x */ |
| 1334 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ |
| 1335 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */ |
| 1336 | #else |
| 1337 | /* Unix |
| 1338 | ** Prohibit: all control characters, including tab, \r and \n |
| 1339 | ** Escape: (space) ! " # $ % & ' ( ) * ; < > ? [ \ ] ^ ` { | } |
| 1340 | */ |
| 1341 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ |
| 1342 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x */ |
| 1343 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1x */ |
| 1344 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 2x */ |
| 1345 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, /* 3x */ |
| @@ -1332,16 +1346,26 @@ | |
| 1346 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 1347 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 5x */ |
| 1348 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ |
| 1349 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 7x */ |
| 1350 | #endif |
| 1351 | /* all bytes 0x80 through 0xbf are unescaped, being secondary |
| 1352 | ** bytes to UTF8 characters. Bytes 0xc0 through 0xff are the |
| 1353 | ** first byte of a UTF8 character and do get escaped */ |
| 1354 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 8x */ |
| 1355 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 9x */ |
| 1356 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* ax */ |
| 1357 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* bx */ |
| 1358 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* cx */ |
| 1359 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* dx */ |
| 1360 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* ex */ |
| 1361 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 /* fx */ |
| 1362 | }; |
| 1363 | |
| 1364 | /* |
| 1365 | ** pBlob is a shell command under construction. This routine safely |
| 1366 | ** appends filename argument zIn. |
| 1367 | ** |
| 1368 | ** The argument is escaped if it contains white space or other characters |
| 1369 | ** that need to be escaped for the shell. If zIn contains characters |
| 1370 | ** that cannot be safely escaped, then throw a fatal error. |
| 1371 | ** |
| @@ -1355,40 +1379,46 @@ | |
| 1379 | int i; |
| 1380 | unsigned char c; |
| 1381 | int needEscape = 0; |
| 1382 | int n = blob_size(pBlob); |
| 1383 | char *z = blob_buffer(pBlob); |
| 1384 | |
| 1385 | /* Any control character is illegal. This prevents \n and \r in an |
| 1386 | ** argument. */ |
| 1387 | for(i=0; (c = (unsigned char)zIn[i])!=0; i++){ |
| 1388 | if( aSafeChar[c] ){ |
| 1389 | unsigned char x = aSafeChar[c]; |
| 1390 | needEscape = 1; |
| 1391 | if( x==2 ){ |
| 1392 | Blob bad; |
| 1393 | blob_token(pBlob, &bad); |
| 1394 | fossil_fatal("the [%s] argument to the \"%s\" command contains " |
| 1395 | "a character (ascii 0x%02x) that is not allowed in " |
| 1396 | "filename arguments", |
| 1397 | zIn, blob_str(&bad), c); |
| 1398 | }else if( x>2 ){ |
| 1399 | if( (zIn[i+1]&0xc0)!=0x80 |
| 1400 | || (x>=4 && (zIn[i+2]&0xc0)!=0x80) |
| 1401 | || (x==5 && (zIn[i+3]&0xc0)!=0x80) |
| 1402 | ){ |
| 1403 | Blob bad; |
| 1404 | blob_token(pBlob, &bad); |
| 1405 | fossil_fatal("the [%s] argument to the \"%s\" command contains " |
| 1406 | "an illegal UTF-8 character", |
| 1407 | zIn, blob_str(&bad)); |
| 1408 | } |
| 1409 | i += x-2; |
| 1410 | } |
| 1411 | } |
| 1412 | } |
| 1413 | |
| 1414 | /* Separate from the previous argument by a space */ |
| 1415 | if( n>0 && !fossil_isspace(z[n-1]) ){ |
| 1416 | blob_append_char(pBlob, ' '); |
| 1417 | } |
| 1418 | |
| 1419 | /* Check for characters that need quoting */ |
| 1420 | if( !needEscape ){ |
| 1421 | if( zIn[0]=='-' ){ |
| 1422 | blob_append_char(pBlob, '.'); |
| 1423 | #if defined(_WIN32) |
| 1424 | blob_append_char(pBlob, '\\'); |
| @@ -1397,10 +1427,14 @@ | |
| 1427 | #endif |
| 1428 | } |
| 1429 | blob_append(pBlob, zIn, -1); |
| 1430 | }else{ |
| 1431 | #if defined(_WIN32) |
| 1432 | /* Quoting strategy for windows: |
| 1433 | ** Put the entire name inside of "...". Any " characters within |
| 1434 | ** the name get doubled. |
| 1435 | */ |
| 1436 | blob_append_char(pBlob, '"'); |
| 1437 | if( zIn[0]=='-' ){ |
| 1438 | blob_append_char(pBlob, '.'); |
| 1439 | blob_append_char(pBlob, '\\'); |
| 1440 | }else if( zIn[0]=='/' ){ |
| @@ -1410,17 +1444,22 @@ | |
| 1444 | blob_append_char(pBlob, (char)c); |
| 1445 | if( c=='"' ) blob_append_char(pBlob, '"'); |
| 1446 | } |
| 1447 | blob_append_char(pBlob, '"'); |
| 1448 | #else |
| 1449 | /* Quoting strategy for unix: |
| 1450 | ** If the name does not contain ', then surround the whole thing |
| 1451 | ** with '...'. If there is one or more ' characters within the |
| 1452 | ** name, then put \ before each special character. |
| 1453 | */ |
| 1454 | if( strchr(zIn,'\'') ){ |
| 1455 | if( zIn[0]=='-' ){ |
| 1456 | blob_append_char(pBlob, '.'); |
| 1457 | blob_append_char(pBlob, '/'); |
| 1458 | } |
| 1459 | for(i=0; (c = (unsigned char)zIn[i])!=0; i++){ |
| 1460 | if( aSafeChar[c] && aSafeChar[c]!=2 ) blob_append_char(pBlob, '\\'); |
| 1461 | blob_append_char(pBlob, (char)c); |
| 1462 | } |
| 1463 | }else{ |
| 1464 | blob_append_char(pBlob, '\''); |
| 1465 | if( zIn[0]=='-' ){ |
| @@ -1435,24 +1474,100 @@ | |
| 1474 | } |
| 1475 | |
| 1476 | /* |
| 1477 | ** COMMAND: test-escaped-arg |
| 1478 | ** |
| 1479 | ** Usage %fossil ARGS ... |
| 1480 | ** |
| 1481 | ** Run each argument through blob_append_escaped_arg() and show the |
| 1482 | ** result. Append each argument to "fossil test-echo" and run that |
| 1483 | ** using fossil_system() to verify that it really does get escaped |
| 1484 | ** correctly. |
| 1485 | ** |
| 1486 | ** Other options: |
| 1487 | ** |
| 1488 | ** --hex HEX Skip the --hex flag and instead decode HEX |
| 1489 | ** into ascii. This provides a way to insert |
| 1490 | ** unusual characters as an argument for testing. |
| 1491 | ** |
| 1492 | ** --compare HEX ASCII Verify that argument ASCII is identical to |
| 1493 | ** to decoded HEX. |
| 1494 | ** |
| 1495 | ** --fuzz N Run N fuzz cases. Each cases is a call |
| 1496 | ** to "fossil test-escaped-arg --compare HEX ARG" |
| 1497 | ** where HEX and ARG are the same argument. |
| 1498 | ** The argument is chosen at random. |
| 1499 | */ |
| 1500 | void test_escaped_arg_command(void){ |
| 1501 | int i; |
| 1502 | Blob x; |
| 1503 | const char *zArg; |
| 1504 | char zBuf[100]; |
| 1505 | blob_init(&x, 0, 0); |
| 1506 | for(i=2; i<g.argc; i++){ |
| 1507 | zArg = g.argv[i]; |
| 1508 | if( fossil_strcmp(zArg, "--hex")==0 && i+1<g.argc ){ |
| 1509 | size_t n = strlen(g.argv[++i]); |
| 1510 | if( n>=(sizeof(zBuf)-1)*2 ){ |
| 1511 | fossil_fatal("Argument to --hex is too big"); |
| 1512 | } |
| 1513 | memset(zBuf, 0, sizeof(zBuf)); |
| 1514 | decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, (int)n); |
| 1515 | zArg = zBuf; |
| 1516 | }else if( fossil_strcmp(zArg, "--compare")==0 && i+2<g.argc ){ |
| 1517 | size_t n = strlen(g.argv[++i]); |
| 1518 | if( n>=(sizeof(zBuf)-1)*2 ){ |
| 1519 | fossil_fatal("HEX argument to --compare is too big"); |
| 1520 | } |
| 1521 | memset(zBuf, 0, sizeof(zBuf)); |
| 1522 | if( decode16((const unsigned char*)g.argv[i], (unsigned char*)zBuf, |
| 1523 | (int)n) ){ |
| 1524 | fossil_fatal("HEX decode of %s failed", g.argv[i]); |
| 1525 | } |
| 1526 | zArg = g.argv[++i]; |
| 1527 | if( zArg[0]=='-' ){ |
| 1528 | fossil_fatal("filename argument \"%s\" begins with \"-\"", zArg); |
| 1529 | } |
| 1530 | #ifdef _WIN32 |
| 1531 | if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='\\' ) zArg += 2; |
| 1532 | #else |
| 1533 | if( zBuf[0]=='-' && zArg[0]=='.' && zArg[1]=='/' ) zArg += 2; |
| 1534 | #endif |
| 1535 | if( strcmp(zBuf, zArg)!=0 ){ |
| 1536 | fossil_fatal("argument disagree: \"%s\" (%s) versus \"%s\"", |
| 1537 | zBuf, g.argv[i-1], zArg); |
| 1538 | } |
| 1539 | continue; |
| 1540 | }else if( fossil_strcmp(zArg, "--fuzz")==0 && i+1<g.argc ){ |
| 1541 | int n = atoi(g.argv[++i]); |
| 1542 | int j; |
| 1543 | for(j=0; j<n; j++){ |
| 1544 | unsigned char m, k; |
| 1545 | int rc; |
| 1546 | unsigned char zWord[100]; |
| 1547 | sqlite3_randomness(sizeof(m), &m); |
| 1548 | m %= 50; |
| 1549 | m += 2; |
| 1550 | sqlite3_randomness(m, zWord); |
| 1551 | for(k=0; k<m; k++){ |
| 1552 | unsigned char cx = zWord[k]; |
| 1553 | if( cx<0x20 || cx>=0x7f ){ |
| 1554 | zWord[k] = "abcdefghijklmnopqrstuvwxyz_"[cx%27]; |
| 1555 | } |
| 1556 | } |
| 1557 | zWord[k] = 0; |
| 1558 | encode16(zWord, (unsigned char*)zBuf, (int)m); |
| 1559 | blob_appendf(&x, "%$ test-escaped-arg --compare %s %$", |
| 1560 | g.nameOfExe, zBuf,zWord); |
| 1561 | rc = fossil_system(blob_str(&x)); |
| 1562 | if( rc ) fossil_fatal("failed test (%d): %s\n", rc, blob_str(&x)); |
| 1563 | blob_reset(&x); |
| 1564 | } |
| 1565 | continue; |
| 1566 | } |
| 1567 | fossil_print("%3d [%s]: ", i, zArg); |
| 1568 | blob_appendf(&x, "%$ test-echo %$", g.nameOfExe, zArg); |
| 1569 | fossil_print("%s\n", blob_str(&x)); |
| 1570 | fossil_system(blob_str(&x)); |
| 1571 | blob_reset(&x); |
| 1572 | } |
| 1573 | } |
| 1574 |