Fossil SCM
Improve the quoted path parsing and support more escape sequences.
Commit
404d1a2554d0868149b57f029e93f63daceacb9c
Parent
2844434ef548a13…
1 file changed
+48
-51
+48
-51
| --- src/import.c | ||
| +++ src/import.c | ||
| @@ -366,38 +366,54 @@ | ||
| 366 | 366 | ** *pzIn point to the first character past the end of the zero |
| 367 | 367 | ** terminator, or at the zero-terminator at EOL. |
| 368 | 368 | */ |
| 369 | 369 | static char *next_token(char **pzIn){ |
| 370 | 370 | char *z = *pzIn; |
| 371 | - int i; | |
| 372 | - if( z[0]==0 ) return z; | |
| 373 | - for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){} | |
| 374 | - if( z[i] ){ | |
| 375 | - z[i] = 0; | |
| 376 | - *pzIn = &z[i+1]; | |
| 377 | - }else{ | |
| 378 | - *pzIn = &z[i]; | |
| 379 | - } | |
| 380 | - return z; | |
| 381 | -} | |
| 382 | - | |
| 383 | -/* | |
| 384 | -** Return a token that is all text up to (but omitting) the next \n | |
| 385 | -** or \r\n. | |
| 386 | -*/ | |
| 387 | -static char *rest_of_line(char **pzIn){ | |
| 388 | - char *z = *pzIn; | |
| 389 | - int i; | |
| 390 | - if( z[0]==0 ) return z; | |
| 391 | - for(i=0; z[i] && z[i]!='\r' && z[i]!='\n'; i++){} | |
| 392 | - if( z[i] ){ | |
| 393 | - if( z[i]=='\r' && z[i+1]=='\n' ){ | |
| 394 | - z[i] = 0; | |
| 395 | - i++; | |
| 396 | - }else{ | |
| 397 | - z[i] = 0; | |
| 398 | - } | |
| 371 | + int i, j; | |
| 372 | + if( z[0]==0 ) return z; | |
| 373 | + if( z[0]=='"' ){ | |
| 374 | + /* Quoted path name */ | |
| 375 | + z++; | |
| 376 | + for(i=0, j=0; z[i] && z[i]!='"' && z[i]!='\n'; i++, j++){ | |
| 377 | + if( z[i]=='\\' && z[i+1] ){ | |
| 378 | + char v, c = z[++i]; | |
| 379 | + switch( c ){ | |
| 380 | + case 0: | |
| 381 | + case '"': c = '"'; break; | |
| 382 | + case '\\': c = '\\'; break; | |
| 383 | + case 'a': c = '\a'; break; | |
| 384 | + case 'b': c = '\b'; break; | |
| 385 | + case 'f': c = '\f'; break; | |
| 386 | + case 'n': c = '\n'; break; | |
| 387 | + case 'r': c = '\r'; break; | |
| 388 | + case 't': c = '\t'; break; | |
| 389 | + case 'v': c = '\v'; break; | |
| 390 | + case '0': case '1': case '2': case '3': | |
| 391 | + v = (c - '0') << 6; | |
| 392 | + c = z[++i]; | |
| 393 | + if( c < '0' || c > '7' ) | |
| 394 | + fossil_fatal("Invalid octal digit '%c' in sequence", c); | |
| 395 | + v |= (c - '0') << 3; | |
| 396 | + c = z[++i]; | |
| 397 | + if( c < '0' || c > '7' ) | |
| 398 | + fossil_fatal("Invalid octal digit '%c' in sequence", c); | |
| 399 | + v |= (c - '0'); | |
| 400 | + c = v; | |
| 401 | + break; | |
| 402 | + default: | |
| 403 | + fossil_fatal("Unrecognized escape sequence \"\\%c\"", c); | |
| 404 | + } | |
| 405 | + z[j] = c; | |
| 406 | + } | |
| 407 | + } | |
| 408 | + if( z[i]=='"' ) z[i++] = 0; | |
| 409 | + }else{ | |
| 410 | + /* Unquoted path name or generic token */ | |
| 411 | + for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){} | |
| 412 | + } | |
| 413 | + if( z[i] ){ | |
| 414 | + z[i] = 0; | |
| 399 | 415 | *pzIn = &z[i+1]; |
| 400 | 416 | }else{ |
| 401 | 417 | *pzIn = &z[i]; |
| 402 | 418 | } |
| 403 | 419 | return z; |
| @@ -516,27 +532,10 @@ | ||
| 516 | 532 | i++; |
| 517 | 533 | } |
| 518 | 534 | return 0; |
| 519 | 535 | } |
| 520 | 536 | |
| 521 | -/* | |
| 522 | -** Dequote a fast-export filename. Filenames are normally unquoted. But | |
| 523 | -** if the contain some obscure special characters, quotes might be added. | |
| 524 | -*/ | |
| 525 | -static void dequote_git_filename(char *zName){ | |
| 526 | - int n, i, j; | |
| 527 | - if( zName==0 || zName[0]!='"' ) return; | |
| 528 | - n = (int)strlen(zName); | |
| 529 | - if( zName[n-1]!='"' ) return; | |
| 530 | - for(i=0, j=1; j<n-1; j++){ | |
| 531 | - char c = zName[j]; | |
| 532 | - if( c=='\\' ) c = zName[++j]; | |
| 533 | - zName[i++] = c; | |
| 534 | - } | |
| 535 | - zName[i] = 0; | |
| 536 | -} | |
| 537 | - | |
| 538 | 537 | |
| 539 | 538 | /* |
| 540 | 539 | ** Read the git-fast-import format from pIn and insert the corresponding |
| 541 | 540 | ** content into the database. |
| 542 | 541 | */ |
| @@ -676,12 +675,11 @@ | ||
| 676 | 675 | if( memcmp(zLine, "M ", 2)==0 ){ |
| 677 | 676 | import_prior_files(); |
| 678 | 677 | z = &zLine[2]; |
| 679 | 678 | zPerm = next_token(&z); |
| 680 | 679 | zUuid = next_token(&z); |
| 681 | - zName = rest_of_line(&z); | |
| 682 | - dequote_git_filename(zName); | |
| 680 | + zName = next_token(&z); | |
| 683 | 681 | i = 0; |
| 684 | 682 | pFile = import_find_file(zName, &i, gg.nFile); |
| 685 | 683 | if( pFile==0 ){ |
| 686 | 684 | pFile = import_add_file(); |
| 687 | 685 | pFile->zName = fossil_strdup(zName); |
| @@ -700,12 +698,11 @@ | ||
| 700 | 698 | pFile->hasChanged = 1; |
| 701 | 699 | }else |
| 702 | 700 | if( memcmp(zLine, "D ", 2)==0 ){ |
| 703 | 701 | import_prior_files(); |
| 704 | 702 | z = &zLine[2]; |
| 705 | - zName = rest_of_line(&z); | |
| 706 | - dequote_git_filename(zName); | |
| 703 | + zName = next_token(&z); | |
| 707 | 704 | i = 0; |
| 708 | 705 | pFile = import_find_file(zName, &i, gg.nFile); |
| 709 | 706 | if( pFile!=0 ){ |
| 710 | 707 | /* Do not remove the item from gg.aFile, just mark as deleted */ |
| 711 | 708 | fossil_free(pFile->zUuid); |
| @@ -717,11 +714,11 @@ | ||
| 717 | 714 | }else |
| 718 | 715 | if( memcmp(zLine, "C ", 2)==0 ){ |
| 719 | 716 | import_prior_files(); |
| 720 | 717 | z = &zLine[2]; |
| 721 | 718 | zFrom = next_token(&z); |
| 722 | - zTo = rest_of_line(&z); | |
| 719 | + zTo = next_token(&z); | |
| 723 | 720 | i = 0; |
| 724 | 721 | pFile = import_find_file(zFrom, &i, gg.nFile); |
| 725 | 722 | if( pFile!=0 ){ |
| 726 | 723 | int j = 0; |
| 727 | 724 | pNew = import_find_file(zTo, &j, gg.nFile); |
| @@ -742,11 +739,11 @@ | ||
| 742 | 739 | }else |
| 743 | 740 | if( memcmp(zLine, "R ", 2)==0 ){ |
| 744 | 741 | import_prior_files(); |
| 745 | 742 | z = &zLine[2]; |
| 746 | 743 | zFrom = next_token(&z); |
| 747 | - zTo = rest_of_line(&z); | |
| 744 | + zTo = next_token(&z); | |
| 748 | 745 | i = 0; |
| 749 | 746 | pFile = import_find_file(zFrom, &i, gg.nFile); |
| 750 | 747 | if( pFile!=0 ){ |
| 751 | 748 | /* |
| 752 | 749 | ** File renames in delta manifests require two "F" cards: one to |
| 753 | 750 |
| --- src/import.c | |
| +++ src/import.c | |
| @@ -366,38 +366,54 @@ | |
| 366 | ** *pzIn point to the first character past the end of the zero |
| 367 | ** terminator, or at the zero-terminator at EOL. |
| 368 | */ |
| 369 | static char *next_token(char **pzIn){ |
| 370 | char *z = *pzIn; |
| 371 | int i; |
| 372 | if( z[0]==0 ) return z; |
| 373 | for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){} |
| 374 | if( z[i] ){ |
| 375 | z[i] = 0; |
| 376 | *pzIn = &z[i+1]; |
| 377 | }else{ |
| 378 | *pzIn = &z[i]; |
| 379 | } |
| 380 | return z; |
| 381 | } |
| 382 | |
| 383 | /* |
| 384 | ** Return a token that is all text up to (but omitting) the next \n |
| 385 | ** or \r\n. |
| 386 | */ |
| 387 | static char *rest_of_line(char **pzIn){ |
| 388 | char *z = *pzIn; |
| 389 | int i; |
| 390 | if( z[0]==0 ) return z; |
| 391 | for(i=0; z[i] && z[i]!='\r' && z[i]!='\n'; i++){} |
| 392 | if( z[i] ){ |
| 393 | if( z[i]=='\r' && z[i+1]=='\n' ){ |
| 394 | z[i] = 0; |
| 395 | i++; |
| 396 | }else{ |
| 397 | z[i] = 0; |
| 398 | } |
| 399 | *pzIn = &z[i+1]; |
| 400 | }else{ |
| 401 | *pzIn = &z[i]; |
| 402 | } |
| 403 | return z; |
| @@ -516,27 +532,10 @@ | |
| 516 | i++; |
| 517 | } |
| 518 | return 0; |
| 519 | } |
| 520 | |
| 521 | /* |
| 522 | ** Dequote a fast-export filename. Filenames are normally unquoted. But |
| 523 | ** if the contain some obscure special characters, quotes might be added. |
| 524 | */ |
| 525 | static void dequote_git_filename(char *zName){ |
| 526 | int n, i, j; |
| 527 | if( zName==0 || zName[0]!='"' ) return; |
| 528 | n = (int)strlen(zName); |
| 529 | if( zName[n-1]!='"' ) return; |
| 530 | for(i=0, j=1; j<n-1; j++){ |
| 531 | char c = zName[j]; |
| 532 | if( c=='\\' ) c = zName[++j]; |
| 533 | zName[i++] = c; |
| 534 | } |
| 535 | zName[i] = 0; |
| 536 | } |
| 537 | |
| 538 | |
| 539 | /* |
| 540 | ** Read the git-fast-import format from pIn and insert the corresponding |
| 541 | ** content into the database. |
| 542 | */ |
| @@ -676,12 +675,11 @@ | |
| 676 | if( memcmp(zLine, "M ", 2)==0 ){ |
| 677 | import_prior_files(); |
| 678 | z = &zLine[2]; |
| 679 | zPerm = next_token(&z); |
| 680 | zUuid = next_token(&z); |
| 681 | zName = rest_of_line(&z); |
| 682 | dequote_git_filename(zName); |
| 683 | i = 0; |
| 684 | pFile = import_find_file(zName, &i, gg.nFile); |
| 685 | if( pFile==0 ){ |
| 686 | pFile = import_add_file(); |
| 687 | pFile->zName = fossil_strdup(zName); |
| @@ -700,12 +698,11 @@ | |
| 700 | pFile->hasChanged = 1; |
| 701 | }else |
| 702 | if( memcmp(zLine, "D ", 2)==0 ){ |
| 703 | import_prior_files(); |
| 704 | z = &zLine[2]; |
| 705 | zName = rest_of_line(&z); |
| 706 | dequote_git_filename(zName); |
| 707 | i = 0; |
| 708 | pFile = import_find_file(zName, &i, gg.nFile); |
| 709 | if( pFile!=0 ){ |
| 710 | /* Do not remove the item from gg.aFile, just mark as deleted */ |
| 711 | fossil_free(pFile->zUuid); |
| @@ -717,11 +714,11 @@ | |
| 717 | }else |
| 718 | if( memcmp(zLine, "C ", 2)==0 ){ |
| 719 | import_prior_files(); |
| 720 | z = &zLine[2]; |
| 721 | zFrom = next_token(&z); |
| 722 | zTo = rest_of_line(&z); |
| 723 | i = 0; |
| 724 | pFile = import_find_file(zFrom, &i, gg.nFile); |
| 725 | if( pFile!=0 ){ |
| 726 | int j = 0; |
| 727 | pNew = import_find_file(zTo, &j, gg.nFile); |
| @@ -742,11 +739,11 @@ | |
| 742 | }else |
| 743 | if( memcmp(zLine, "R ", 2)==0 ){ |
| 744 | import_prior_files(); |
| 745 | z = &zLine[2]; |
| 746 | zFrom = next_token(&z); |
| 747 | zTo = rest_of_line(&z); |
| 748 | i = 0; |
| 749 | pFile = import_find_file(zFrom, &i, gg.nFile); |
| 750 | if( pFile!=0 ){ |
| 751 | /* |
| 752 | ** File renames in delta manifests require two "F" cards: one to |
| 753 |
| --- src/import.c | |
| +++ src/import.c | |
| @@ -366,38 +366,54 @@ | |
| 366 | ** *pzIn point to the first character past the end of the zero |
| 367 | ** terminator, or at the zero-terminator at EOL. |
| 368 | */ |
| 369 | static char *next_token(char **pzIn){ |
| 370 | char *z = *pzIn; |
| 371 | int i, j; |
| 372 | if( z[0]==0 ) return z; |
| 373 | if( z[0]=='"' ){ |
| 374 | /* Quoted path name */ |
| 375 | z++; |
| 376 | for(i=0, j=0; z[i] && z[i]!='"' && z[i]!='\n'; i++, j++){ |
| 377 | if( z[i]=='\\' && z[i+1] ){ |
| 378 | char v, c = z[++i]; |
| 379 | switch( c ){ |
| 380 | case 0: |
| 381 | case '"': c = '"'; break; |
| 382 | case '\\': c = '\\'; break; |
| 383 | case 'a': c = '\a'; break; |
| 384 | case 'b': c = '\b'; break; |
| 385 | case 'f': c = '\f'; break; |
| 386 | case 'n': c = '\n'; break; |
| 387 | case 'r': c = '\r'; break; |
| 388 | case 't': c = '\t'; break; |
| 389 | case 'v': c = '\v'; break; |
| 390 | case '0': case '1': case '2': case '3': |
| 391 | v = (c - '0') << 6; |
| 392 | c = z[++i]; |
| 393 | if( c < '0' || c > '7' ) |
| 394 | fossil_fatal("Invalid octal digit '%c' in sequence", c); |
| 395 | v |= (c - '0') << 3; |
| 396 | c = z[++i]; |
| 397 | if( c < '0' || c > '7' ) |
| 398 | fossil_fatal("Invalid octal digit '%c' in sequence", c); |
| 399 | v |= (c - '0'); |
| 400 | c = v; |
| 401 | break; |
| 402 | default: |
| 403 | fossil_fatal("Unrecognized escape sequence \"\\%c\"", c); |
| 404 | } |
| 405 | z[j] = c; |
| 406 | } |
| 407 | } |
| 408 | if( z[i]=='"' ) z[i++] = 0; |
| 409 | }else{ |
| 410 | /* Unquoted path name or generic token */ |
| 411 | for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){} |
| 412 | } |
| 413 | if( z[i] ){ |
| 414 | z[i] = 0; |
| 415 | *pzIn = &z[i+1]; |
| 416 | }else{ |
| 417 | *pzIn = &z[i]; |
| 418 | } |
| 419 | return z; |
| @@ -516,27 +532,10 @@ | |
| 532 | i++; |
| 533 | } |
| 534 | return 0; |
| 535 | } |
| 536 | |
| 537 | |
| 538 | /* |
| 539 | ** Read the git-fast-import format from pIn and insert the corresponding |
| 540 | ** content into the database. |
| 541 | */ |
| @@ -676,12 +675,11 @@ | |
| 675 | if( memcmp(zLine, "M ", 2)==0 ){ |
| 676 | import_prior_files(); |
| 677 | z = &zLine[2]; |
| 678 | zPerm = next_token(&z); |
| 679 | zUuid = next_token(&z); |
| 680 | zName = next_token(&z); |
| 681 | i = 0; |
| 682 | pFile = import_find_file(zName, &i, gg.nFile); |
| 683 | if( pFile==0 ){ |
| 684 | pFile = import_add_file(); |
| 685 | pFile->zName = fossil_strdup(zName); |
| @@ -700,12 +698,11 @@ | |
| 698 | pFile->hasChanged = 1; |
| 699 | }else |
| 700 | if( memcmp(zLine, "D ", 2)==0 ){ |
| 701 | import_prior_files(); |
| 702 | z = &zLine[2]; |
| 703 | zName = next_token(&z); |
| 704 | i = 0; |
| 705 | pFile = import_find_file(zName, &i, gg.nFile); |
| 706 | if( pFile!=0 ){ |
| 707 | /* Do not remove the item from gg.aFile, just mark as deleted */ |
| 708 | fossil_free(pFile->zUuid); |
| @@ -717,11 +714,11 @@ | |
| 714 | }else |
| 715 | if( memcmp(zLine, "C ", 2)==0 ){ |
| 716 | import_prior_files(); |
| 717 | z = &zLine[2]; |
| 718 | zFrom = next_token(&z); |
| 719 | zTo = next_token(&z); |
| 720 | i = 0; |
| 721 | pFile = import_find_file(zFrom, &i, gg.nFile); |
| 722 | if( pFile!=0 ){ |
| 723 | int j = 0; |
| 724 | pNew = import_find_file(zTo, &j, gg.nFile); |
| @@ -742,11 +739,11 @@ | |
| 739 | }else |
| 740 | if( memcmp(zLine, "R ", 2)==0 ){ |
| 741 | import_prior_files(); |
| 742 | z = &zLine[2]; |
| 743 | zFrom = next_token(&z); |
| 744 | zTo = next_token(&z); |
| 745 | i = 0; |
| 746 | pFile = import_find_file(zFrom, &i, gg.nFile); |
| 747 | if( pFile!=0 ){ |
| 748 | /* |
| 749 | ** File renames in delta manifests require two "F" cards: one to |
| 750 |