Fossil SCM

Improve the quoted path parsing and support more escape sequences.

isaac.jurado 2013-07-14 22:21 git-better-import

Commit 404d1a2554d0868149b57f029e93f63daceacb9c

Parent 2844434ef548a13…

1 file changed +48 -51

M src/import.c

+48 -51

		--- src/import.c
		+++ src/import.c
		@@ -366,38 +366,54 @@
366	366	** *pzIn point to the first character past the end of the zero
367	367	** terminator, or at the zero-terminator at EOL.
368	368	*/
369	369	static char next_token(char *pzIn){
370	370	char z = pzIn;
371		- int i;
372		- if( z[0]==0 ) return z;
373		- for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
374		- if( z[i] ){
375		- z[i] = 0;
376		- *pzIn = &z[i+1];
377		- }else{
378		- *pzIn = &z[i];
379		- }
380		- return z;
381		-}
382		-
383		-/*
384		-** Return a token that is all text up to (but omitting) the next \n
385		-** or \r\n.
386		-*/
387		-static char rest_of_line(char *pzIn){
388		- char z = pzIn;
389		- int i;
390		- if( z[0]==0 ) return z;
391		- for(i=0; z[i] && z[i]!='\r' && z[i]!='\n'; i++){}
392		- if( z[i] ){
393		- if( z[i]=='\r' && z[i+1]=='\n' ){
394		- z[i] = 0;
395		- i++;
396		- }else{
397		- z[i] = 0;
398		- }
	371	+ int i, j;
	372	+ if( z[0]==0 ) return z;
	373	+ if( z[0]=='"' ){
	374	+ /* Quoted path name */
	375	+ z++;
	376	+ for(i=0, j=0; z[i] && z[i]!='"' && z[i]!='\n'; i++, j++){
	377	+ if( z[i]=='\\' && z[i+1] ){
	378	+ char v, c = z[++i];
	379	+ switch( c ){
	380	+ case 0:
	381	+ case '"': c = '"'; break;
	382	+ case '\\': c = '\\'; break;
	383	+ case 'a': c = '\a'; break;
	384	+ case 'b': c = '\b'; break;
	385	+ case 'f': c = '\f'; break;
	386	+ case 'n': c = '\n'; break;
	387	+ case 'r': c = '\r'; break;
	388	+ case 't': c = '\t'; break;
	389	+ case 'v': c = '\v'; break;
	390	+ case '0': case '1': case '2': case '3':
	391	+ v = (c - '0') << 6;
	392	+ c = z[++i];
	393	+ if( c < '0' \|\| c > '7' )
	394	+ fossil_fatal("Invalid octal digit '%c' in sequence", c);
	395	+ v \|= (c - '0') << 3;
	396	+ c = z[++i];
	397	+ if( c < '0' \|\| c > '7' )
	398	+ fossil_fatal("Invalid octal digit '%c' in sequence", c);
	399	+ v \|= (c - '0');
	400	+ c = v;
	401	+ break;
	402	+ default:
	403	+ fossil_fatal("Unrecognized escape sequence \"\\%c\"", c);
	404	+ }
	405	+ z[j] = c;
	406	+ }
	407	+ }
	408	+ if( z[i]=='"' ) z[i++] = 0;
	409	+ }else{
	410	+ /* Unquoted path name or generic token */
	411	+ for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
	412	+ }
	413	+ if( z[i] ){
	414	+ z[i] = 0;
399	415	*pzIn = &z[i+1];
400	416	}else{
401	417	*pzIn = &z[i];
402	418	}
403	419	return z;
		@@ -516,27 +532,10 @@
516	532	i++;
517	533	}
518	534	return 0;
519	535	}
520	536
521		-/*
522		-** Dequote a fast-export filename. Filenames are normally unquoted. But
523		-** if the contain some obscure special characters, quotes might be added.
524		-*/
525		-static void dequote_git_filename(char *zName){
526		- int n, i, j;
527		- if( zName==0 \|\| zName[0]!='"' ) return;
528		- n = (int)strlen(zName);
529		- if( zName[n-1]!='"' ) return;
530		- for(i=0, j=1; j<n-1; j++){
531		- char c = zName[j];
532		- if( c=='\\' ) c = zName[++j];
533		- zName[i++] = c;
534		- }
535		- zName[i] = 0;
536		-}
537		-
538	537
539	538	/*
540	539	** Read the git-fast-import format from pIn and insert the corresponding
541	540	** content into the database.
542	541	*/
		@@ -676,12 +675,11 @@
676	675	if( memcmp(zLine, "M ", 2)==0 ){
677	676	import_prior_files();
678	677	z = &zLine[2];
679	678	zPerm = next_token(&z);
680	679	zUuid = next_token(&z);
681		- zName = rest_of_line(&z);
682		- dequote_git_filename(zName);
	680	+ zName = next_token(&z);
683	681	i = 0;
684	682	pFile = import_find_file(zName, &i, gg.nFile);
685	683	if( pFile==0 ){
686	684	pFile = import_add_file();
687	685	pFile->zName = fossil_strdup(zName);
		@@ -700,12 +698,11 @@
700	698	pFile->hasChanged = 1;
701	699	}else
702	700	if( memcmp(zLine, "D ", 2)==0 ){
703	701	import_prior_files();
704	702	z = &zLine[2];
705		- zName = rest_of_line(&z);
706		- dequote_git_filename(zName);
	703	+ zName = next_token(&z);
707	704	i = 0;
708	705	pFile = import_find_file(zName, &i, gg.nFile);
709	706	if( pFile!=0 ){
710	707	/* Do not remove the item from gg.aFile, just mark as deleted */
711	708	fossil_free(pFile->zUuid);
		@@ -717,11 +714,11 @@
717	714	}else
718	715	if( memcmp(zLine, "C ", 2)==0 ){
719	716	import_prior_files();
720	717	z = &zLine[2];
721	718	zFrom = next_token(&z);
722		- zTo = rest_of_line(&z);
	719	+ zTo = next_token(&z);
723	720	i = 0;
724	721	pFile = import_find_file(zFrom, &i, gg.nFile);
725	722	if( pFile!=0 ){
726	723	int j = 0;
727	724	pNew = import_find_file(zTo, &j, gg.nFile);
		@@ -742,11 +739,11 @@
742	739	}else
743	740	if( memcmp(zLine, "R ", 2)==0 ){
744	741	import_prior_files();
745	742	z = &zLine[2];
746	743	zFrom = next_token(&z);
747		- zTo = rest_of_line(&z);
	744	+ zTo = next_token(&z);
748	745	i = 0;
749	746	pFile = import_find_file(zFrom, &i, gg.nFile);
750	747	if( pFile!=0 ){
751	748	/*
752	749	** File renames in delta manifests require two "F" cards: one to
753	750

	--- src/import.c
	+++ src/import.c
	@@ -366,38 +366,54 @@
366	** *pzIn point to the first character past the end of the zero
367	** terminator, or at the zero-terminator at EOL.
368	*/
369	static char next_token(char *pzIn){
370	char z = pzIn;
371	int i;
372	if( z[0]==0 ) return z;
373	for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
374	if( z[i] ){
375	z[i] = 0;
376	*pzIn = &z[i+1];
377	}else{
378	*pzIn = &z[i];
379	}
380	return z;
381	}
382
383	/*
384	** Return a token that is all text up to (but omitting) the next \n
385	** or \r\n.
386	*/
387	static char rest_of_line(char *pzIn){
388	char z = pzIn;
389	int i;
390	if( z[0]==0 ) return z;
391	for(i=0; z[i] && z[i]!='\r' && z[i]!='\n'; i++){}
392	if( z[i] ){
393	if( z[i]=='\r' && z[i+1]=='\n' ){
394	z[i] = 0;
395	i++;
396	}else{
397	z[i] = 0;
398	}
















399	*pzIn = &z[i+1];
400	}else{
401	*pzIn = &z[i];
402	}
403	return z;
	@@ -516,27 +532,10 @@
516	i++;
517	}
518	return 0;
519	}
520
521	/*
522	** Dequote a fast-export filename. Filenames are normally unquoted. But
523	** if the contain some obscure special characters, quotes might be added.
524	*/
525	static void dequote_git_filename(char *zName){
526	int n, i, j;
527	if( zName==0 \|\| zName[0]!='"' ) return;
528	n = (int)strlen(zName);
529	if( zName[n-1]!='"' ) return;
530	for(i=0, j=1; j<n-1; j++){
531	char c = zName[j];
532	if( c=='\\' ) c = zName[++j];
533	zName[i++] = c;
534	}
535	zName[i] = 0;
536	}
537
538
539	/*
540	** Read the git-fast-import format from pIn and insert the corresponding
541	** content into the database.
542	*/
	@@ -676,12 +675,11 @@
676	if( memcmp(zLine, "M ", 2)==0 ){
677	import_prior_files();
678	z = &zLine[2];
679	zPerm = next_token(&z);
680	zUuid = next_token(&z);
681	zName = rest_of_line(&z);
682	dequote_git_filename(zName);
683	i = 0;
684	pFile = import_find_file(zName, &i, gg.nFile);
685	if( pFile==0 ){
686	pFile = import_add_file();
687	pFile->zName = fossil_strdup(zName);
	@@ -700,12 +698,11 @@
700	pFile->hasChanged = 1;
701	}else
702	if( memcmp(zLine, "D ", 2)==0 ){
703	import_prior_files();
704	z = &zLine[2];
705	zName = rest_of_line(&z);
706	dequote_git_filename(zName);
707	i = 0;
708	pFile = import_find_file(zName, &i, gg.nFile);
709	if( pFile!=0 ){
710	/* Do not remove the item from gg.aFile, just mark as deleted */
711	fossil_free(pFile->zUuid);
	@@ -717,11 +714,11 @@
717	}else
718	if( memcmp(zLine, "C ", 2)==0 ){
719	import_prior_files();
720	z = &zLine[2];
721	zFrom = next_token(&z);
722	zTo = rest_of_line(&z);
723	i = 0;
724	pFile = import_find_file(zFrom, &i, gg.nFile);
725	if( pFile!=0 ){
726	int j = 0;
727	pNew = import_find_file(zTo, &j, gg.nFile);
	@@ -742,11 +739,11 @@
742	}else
743	if( memcmp(zLine, "R ", 2)==0 ){
744	import_prior_files();
745	z = &zLine[2];
746	zFrom = next_token(&z);
747	zTo = rest_of_line(&z);
748	i = 0;
749	pFile = import_find_file(zFrom, &i, gg.nFile);
750	if( pFile!=0 ){
751	/*
752	** File renames in delta manifests require two "F" cards: one to
753

	--- src/import.c
	+++ src/import.c
	@@ -366,38 +366,54 @@
366	** *pzIn point to the first character past the end of the zero
367	** terminator, or at the zero-terminator at EOL.
368	*/
369	static char next_token(char *pzIn){
370	char z = pzIn;
371	int i, j;
372	if( z[0]==0 ) return z;
373	if( z[0]=='"' ){
374	/* Quoted path name */
375	z++;
376	for(i=0, j=0; z[i] && z[i]!='"' && z[i]!='\n'; i++, j++){
377	if( z[i]=='\\' && z[i+1] ){
378	char v, c = z[++i];
379	switch( c ){
380	case 0:
381	case '"': c = '"'; break;
382	case '\\': c = '\\'; break;
383	case 'a': c = '\a'; break;
384	case 'b': c = '\b'; break;
385	case 'f': c = '\f'; break;
386	case 'n': c = '\n'; break;
387	case 'r': c = '\r'; break;
388	case 't': c = '\t'; break;
389	case 'v': c = '\v'; break;
390	case '0': case '1': case '2': case '3':
391	v = (c - '0') << 6;
392	c = z[++i];
393	if( c < '0' \|\| c > '7' )
394	fossil_fatal("Invalid octal digit '%c' in sequence", c);
395	v \|= (c - '0') << 3;
396	c = z[++i];
397	if( c < '0' \|\| c > '7' )
398	fossil_fatal("Invalid octal digit '%c' in sequence", c);
399	v \|= (c - '0');
400	c = v;
401	break;
402	default:
403	fossil_fatal("Unrecognized escape sequence \"\\%c\"", c);
404	}
405	z[j] = c;
406	}
407	}
408	if( z[i]=='"' ) z[i++] = 0;
409	}else{
410	/* Unquoted path name or generic token */
411	for(i=0; z[i] && z[i]!=' ' && z[i]!='\n'; i++){}
412	}
413	if( z[i] ){
414	z[i] = 0;
415	*pzIn = &z[i+1];
416	}else{
417	*pzIn = &z[i];
418	}
419	return z;
	@@ -516,27 +532,10 @@
532	i++;
533	}
534	return 0;
535	}
536

















537
538	/*
539	** Read the git-fast-import format from pIn and insert the corresponding
540	** content into the database.
541	*/
	@@ -676,12 +675,11 @@
675	if( memcmp(zLine, "M ", 2)==0 ){
676	import_prior_files();
677	z = &zLine[2];
678	zPerm = next_token(&z);
679	zUuid = next_token(&z);
680	zName = next_token(&z);

681	i = 0;
682	pFile = import_find_file(zName, &i, gg.nFile);
683	if( pFile==0 ){
684	pFile = import_add_file();
685	pFile->zName = fossil_strdup(zName);
	@@ -700,12 +698,11 @@
698	pFile->hasChanged = 1;
699	}else
700	if( memcmp(zLine, "D ", 2)==0 ){
701	import_prior_files();
702	z = &zLine[2];
703	zName = next_token(&z);

704	i = 0;
705	pFile = import_find_file(zName, &i, gg.nFile);
706	if( pFile!=0 ){
707	/* Do not remove the item from gg.aFile, just mark as deleted */
708	fossil_free(pFile->zUuid);
	@@ -717,11 +714,11 @@
714	}else
715	if( memcmp(zLine, "C ", 2)==0 ){
716	import_prior_files();
717	z = &zLine[2];
718	zFrom = next_token(&z);
719	zTo = next_token(&z);
720	i = 0;
721	pFile = import_find_file(zFrom, &i, gg.nFile);
722	if( pFile!=0 ){
723	int j = 0;
724	pNew = import_find_file(zTo, &j, gg.nFile);
	@@ -742,11 +739,11 @@
739	}else
740	if( memcmp(zLine, "R ", 2)==0 ){
741	import_prior_files();
742	z = &zLine[2];
743	zFrom = next_token(&z);
744	zTo = next_token(&z);
745	i = 0;
746	pFile = import_find_file(zFrom, &i, gg.nFile);
747	if( pFile!=0 ){
748	/*
749	** File renames in delta manifests require two "F" cards: one to
750

Fossil SCM

Keyboard Shortcuts