Fossil SCM

Improve the quoted path parsing and support more escape sequences.

isaac.jurado 2013-07-14 22:21 git-better-import
Commit 404d1a2554d0868149b57f029e93f63daceacb9c
1 file changed +48 -51
+48 -51
--- src/import.c
+++ src/import.c
@@ -366,38 +366,54 @@
366366
** *pzIn point to the first character past the end of the zero
367367
** terminator, or at the zero-terminator at EOL.
368368
*/
369369
static char *next_token(char **pzIn){
370370
char *z = *pzIn;
371
- int i;
372
- if( z[0]==0 ) return z;
373
- for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
374
- if( z[i] ){
375
- z[i] = 0;
376
- *pzIn = &z[i+1];
377
- }else{
378
- *pzIn = &z[i];
379
- }
380
- return z;
381
-}
382
-
383
-/*
384
-** Return a token that is all text up to (but omitting) the next \n
385
-** or \r\n.
386
-*/
387
-static char *rest_of_line(char **pzIn){
388
- char *z = *pzIn;
389
- int i;
390
- if( z[0]==0 ) return z;
391
- for(i=0; z[i] && z[i]!='\r' && z[i]!='\n'; i++){}
392
- if( z[i] ){
393
- if( z[i]=='\r' && z[i+1]=='\n' ){
394
- z[i] = 0;
395
- i++;
396
- }else{
397
- z[i] = 0;
398
- }
371
+ int i, j;
372
+ if( z[0]==0 ) return z;
373
+ if( z[0]=='"' ){
374
+ /* Quoted path name */
375
+ z++;
376
+ for(i=0, j=0; z[i] && z[i]!='"' && z[i]!='\n'; i++, j++){
377
+ if( z[i]=='\\' && z[i+1] ){
378
+ char v, c = z[++i];
379
+ switch( c ){
380
+ case 0:
381
+ case '"': c = '"'; break;
382
+ case '\\': c = '\\'; break;
383
+ case 'a': c = '\a'; break;
384
+ case 'b': c = '\b'; break;
385
+ case 'f': c = '\f'; break;
386
+ case 'n': c = '\n'; break;
387
+ case 'r': c = '\r'; break;
388
+ case 't': c = '\t'; break;
389
+ case 'v': c = '\v'; break;
390
+ case '0': case '1': case '2': case '3':
391
+ v = (c - '0') << 6;
392
+ c = z[++i];
393
+ if( c < '0' || c > '7' )
394
+ fossil_fatal("Invalid octal digit '%c' in sequence", c);
395
+ v |= (c - '0') << 3;
396
+ c = z[++i];
397
+ if( c < '0' || c > '7' )
398
+ fossil_fatal("Invalid octal digit '%c' in sequence", c);
399
+ v |= (c - '0');
400
+ c = v;
401
+ break;
402
+ default:
403
+ fossil_fatal("Unrecognized escape sequence \"\\%c\"", c);
404
+ }
405
+ z[j] = c;
406
+ }
407
+ }
408
+ if( z[i]=='"' ) z[i++] = 0;
409
+ }else{
410
+ /* Unquoted path name or generic token */
411
+ for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
412
+ }
413
+ if( z[i] ){
414
+ z[i] = 0;
399415
*pzIn = &z[i+1];
400416
}else{
401417
*pzIn = &z[i];
402418
}
403419
return z;
@@ -516,27 +532,10 @@
516532
i++;
517533
}
518534
return 0;
519535
}
520536
521
-/*
522
-** Dequote a fast-export filename. Filenames are normally unquoted. But
523
-** if the contain some obscure special characters, quotes might be added.
524
-*/
525
-static void dequote_git_filename(char *zName){
526
- int n, i, j;
527
- if( zName==0 || zName[0]!='"' ) return;
528
- n = (int)strlen(zName);
529
- if( zName[n-1]!='"' ) return;
530
- for(i=0, j=1; j<n-1; j++){
531
- char c = zName[j];
532
- if( c=='\\' ) c = zName[++j];
533
- zName[i++] = c;
534
- }
535
- zName[i] = 0;
536
-}
537
-
538537
539538
/*
540539
** Read the git-fast-import format from pIn and insert the corresponding
541540
** content into the database.
542541
*/
@@ -676,12 +675,11 @@
676675
if( memcmp(zLine, "M ", 2)==0 ){
677676
import_prior_files();
678677
z = &zLine[2];
679678
zPerm = next_token(&z);
680679
zUuid = next_token(&z);
681
- zName = rest_of_line(&z);
682
- dequote_git_filename(zName);
680
+ zName = next_token(&z);
683681
i = 0;
684682
pFile = import_find_file(zName, &i, gg.nFile);
685683
if( pFile==0 ){
686684
pFile = import_add_file();
687685
pFile->zName = fossil_strdup(zName);
@@ -700,12 +698,11 @@
700698
pFile->hasChanged = 1;
701699
}else
702700
if( memcmp(zLine, "D ", 2)==0 ){
703701
import_prior_files();
704702
z = &zLine[2];
705
- zName = rest_of_line(&z);
706
- dequote_git_filename(zName);
703
+ zName = next_token(&z);
707704
i = 0;
708705
pFile = import_find_file(zName, &i, gg.nFile);
709706
if( pFile!=0 ){
710707
/* Do not remove the item from gg.aFile, just mark as deleted */
711708
fossil_free(pFile->zUuid);
@@ -717,11 +714,11 @@
717714
}else
718715
if( memcmp(zLine, "C ", 2)==0 ){
719716
import_prior_files();
720717
z = &zLine[2];
721718
zFrom = next_token(&z);
722
- zTo = rest_of_line(&z);
719
+ zTo = next_token(&z);
723720
i = 0;
724721
pFile = import_find_file(zFrom, &i, gg.nFile);
725722
if( pFile!=0 ){
726723
int j = 0;
727724
pNew = import_find_file(zTo, &j, gg.nFile);
@@ -742,11 +739,11 @@
742739
}else
743740
if( memcmp(zLine, "R ", 2)==0 ){
744741
import_prior_files();
745742
z = &zLine[2];
746743
zFrom = next_token(&z);
747
- zTo = rest_of_line(&z);
744
+ zTo = next_token(&z);
748745
i = 0;
749746
pFile = import_find_file(zFrom, &i, gg.nFile);
750747
if( pFile!=0 ){
751748
/*
752749
** File renames in delta manifests require two "F" cards: one to
753750
--- src/import.c
+++ src/import.c
@@ -366,38 +366,54 @@
366 ** *pzIn point to the first character past the end of the zero
367 ** terminator, or at the zero-terminator at EOL.
368 */
369 static char *next_token(char **pzIn){
370 char *z = *pzIn;
371 int i;
372 if( z[0]==0 ) return z;
373 for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
374 if( z[i] ){
375 z[i] = 0;
376 *pzIn = &z[i+1];
377 }else{
378 *pzIn = &z[i];
379 }
380 return z;
381 }
382
383 /*
384 ** Return a token that is all text up to (but omitting) the next \n
385 ** or \r\n.
386 */
387 static char *rest_of_line(char **pzIn){
388 char *z = *pzIn;
389 int i;
390 if( z[0]==0 ) return z;
391 for(i=0; z[i] && z[i]!='\r' && z[i]!='\n'; i++){}
392 if( z[i] ){
393 if( z[i]=='\r' && z[i+1]=='\n' ){
394 z[i] = 0;
395 i++;
396 }else{
397 z[i] = 0;
398 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399 *pzIn = &z[i+1];
400 }else{
401 *pzIn = &z[i];
402 }
403 return z;
@@ -516,27 +532,10 @@
516 i++;
517 }
518 return 0;
519 }
520
521 /*
522 ** Dequote a fast-export filename. Filenames are normally unquoted. But
523 ** if the contain some obscure special characters, quotes might be added.
524 */
525 static void dequote_git_filename(char *zName){
526 int n, i, j;
527 if( zName==0 || zName[0]!='"' ) return;
528 n = (int)strlen(zName);
529 if( zName[n-1]!='"' ) return;
530 for(i=0, j=1; j<n-1; j++){
531 char c = zName[j];
532 if( c=='\\' ) c = zName[++j];
533 zName[i++] = c;
534 }
535 zName[i] = 0;
536 }
537
538
539 /*
540 ** Read the git-fast-import format from pIn and insert the corresponding
541 ** content into the database.
542 */
@@ -676,12 +675,11 @@
676 if( memcmp(zLine, "M ", 2)==0 ){
677 import_prior_files();
678 z = &zLine[2];
679 zPerm = next_token(&z);
680 zUuid = next_token(&z);
681 zName = rest_of_line(&z);
682 dequote_git_filename(zName);
683 i = 0;
684 pFile = import_find_file(zName, &i, gg.nFile);
685 if( pFile==0 ){
686 pFile = import_add_file();
687 pFile->zName = fossil_strdup(zName);
@@ -700,12 +698,11 @@
700 pFile->hasChanged = 1;
701 }else
702 if( memcmp(zLine, "D ", 2)==0 ){
703 import_prior_files();
704 z = &zLine[2];
705 zName = rest_of_line(&z);
706 dequote_git_filename(zName);
707 i = 0;
708 pFile = import_find_file(zName, &i, gg.nFile);
709 if( pFile!=0 ){
710 /* Do not remove the item from gg.aFile, just mark as deleted */
711 fossil_free(pFile->zUuid);
@@ -717,11 +714,11 @@
717 }else
718 if( memcmp(zLine, "C ", 2)==0 ){
719 import_prior_files();
720 z = &zLine[2];
721 zFrom = next_token(&z);
722 zTo = rest_of_line(&z);
723 i = 0;
724 pFile = import_find_file(zFrom, &i, gg.nFile);
725 if( pFile!=0 ){
726 int j = 0;
727 pNew = import_find_file(zTo, &j, gg.nFile);
@@ -742,11 +739,11 @@
742 }else
743 if( memcmp(zLine, "R ", 2)==0 ){
744 import_prior_files();
745 z = &zLine[2];
746 zFrom = next_token(&z);
747 zTo = rest_of_line(&z);
748 i = 0;
749 pFile = import_find_file(zFrom, &i, gg.nFile);
750 if( pFile!=0 ){
751 /*
752 ** File renames in delta manifests require two "F" cards: one to
753
--- src/import.c
+++ src/import.c
@@ -366,38 +366,54 @@
366 ** *pzIn point to the first character past the end of the zero
367 ** terminator, or at the zero-terminator at EOL.
368 */
369 static char *next_token(char **pzIn){
370 char *z = *pzIn;
371 int i, j;
372 if( z[0]==0 ) return z;
373 if( z[0]=='"' ){
374 /* Quoted path name */
375 z++;
376 for(i=0, j=0; z[i] && z[i]!='"' && z[i]!='\n'; i++, j++){
377 if( z[i]=='\\' && z[i+1] ){
378 char v, c = z[++i];
379 switch( c ){
380 case 0:
381 case '"': c = '"'; break;
382 case '\\': c = '\\'; break;
383 case 'a': c = '\a'; break;
384 case 'b': c = '\b'; break;
385 case 'f': c = '\f'; break;
386 case 'n': c = '\n'; break;
387 case 'r': c = '\r'; break;
388 case 't': c = '\t'; break;
389 case 'v': c = '\v'; break;
390 case '0': case '1': case '2': case '3':
391 v = (c - '0') << 6;
392 c = z[++i];
393 if( c < '0' || c > '7' )
394 fossil_fatal("Invalid octal digit '%c' in sequence", c);
395 v |= (c - '0') << 3;
396 c = z[++i];
397 if( c < '0' || c > '7' )
398 fossil_fatal("Invalid octal digit '%c' in sequence", c);
399 v |= (c - '0');
400 c = v;
401 break;
402 default:
403 fossil_fatal("Unrecognized escape sequence \"\\%c\"", c);
404 }
405 z[j] = c;
406 }
407 }
408 if( z[i]=='"' ) z[i++] = 0;
409 }else{
410 /* Unquoted path name or generic token */
411 for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
412 }
413 if( z[i] ){
414 z[i] = 0;
415 *pzIn = &z[i+1];
416 }else{
417 *pzIn = &z[i];
418 }
419 return z;
@@ -516,27 +532,10 @@
532 i++;
533 }
534 return 0;
535 }
536
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
538 /*
539 ** Read the git-fast-import format from pIn and insert the corresponding
540 ** content into the database.
541 */
@@ -676,12 +675,11 @@
675 if( memcmp(zLine, "M ", 2)==0 ){
676 import_prior_files();
677 z = &zLine[2];
678 zPerm = next_token(&z);
679 zUuid = next_token(&z);
680 zName = next_token(&z);
 
681 i = 0;
682 pFile = import_find_file(zName, &i, gg.nFile);
683 if( pFile==0 ){
684 pFile = import_add_file();
685 pFile->zName = fossil_strdup(zName);
@@ -700,12 +698,11 @@
698 pFile->hasChanged = 1;
699 }else
700 if( memcmp(zLine, "D ", 2)==0 ){
701 import_prior_files();
702 z = &zLine[2];
703 zName = next_token(&z);
 
704 i = 0;
705 pFile = import_find_file(zName, &i, gg.nFile);
706 if( pFile!=0 ){
707 /* Do not remove the item from gg.aFile, just mark as deleted */
708 fossil_free(pFile->zUuid);
@@ -717,11 +714,11 @@
714 }else
715 if( memcmp(zLine, "C ", 2)==0 ){
716 import_prior_files();
717 z = &zLine[2];
718 zFrom = next_token(&z);
719 zTo = next_token(&z);
720 i = 0;
721 pFile = import_find_file(zFrom, &i, gg.nFile);
722 if( pFile!=0 ){
723 int j = 0;
724 pNew = import_find_file(zTo, &j, gg.nFile);
@@ -742,11 +739,11 @@
739 }else
740 if( memcmp(zLine, "R ", 2)==0 ){
741 import_prior_files();
742 z = &zLine[2];
743 zFrom = next_token(&z);
744 zTo = next_token(&z);
745 i = 0;
746 pFile = import_find_file(zFrom, &i, gg.nFile);
747 if( pFile!=0 ){
748 /*
749 ** File renames in delta manifests require two "F" cards: one to
750

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button