Fossil SCM

Improved diff alignment following an indentation change. The objective of this change is to improve the diff output for of the [a36dd09d17f3057f] check-in.

drh 2022-02-18 15:23 trunk

Commit 868d1608382a9bb11c594bd88f4de8c41e9f63c45fb46e05942c35fc9cfe457a

Parent e2aed163ac6e7a8…

1 file changed +68 -9

~ src/diff.c

M src/diff.c

+68 -9

		--- src/diff.c
		+++ src/diff.c
		@@ -2618,33 +2618,92 @@
2618	2618	** ALGORITHM (subject to change and refinement):
2619	2619	**
2620	2620	** 1. If the subsequence is larger than 1/7th of the original span,
2621	2621	** then consider it valid. --> return 1
2622	2622	**
2623		-** 2. If the subsequence contains any charaters other than '}', '{",
2624		-** or whitespace, then consider it valid. --> return 1
	2623	+** 2. If no lines of the subsequence contains more than one
	2624	+** non-whitespace character, --> return 0
	2625	+**
	2626	+** 3. If any line of the subsequence contains more than one non-whitespace
	2627	+** character and is unique across the entire sequence after ignoring
	2628	+** leading and trailing whitespace --> return 1
2625	2629	**
2626		-** 3. Otherwise, it is potentially an artifact of an indentation
	2630	+** 4. Otherwise, it is potentially an artifact of an indentation
2627	2631	** change. --> return 0
2628	2632	*/
2629	2633	static int likelyNotIndentChngArtifact(
2630	2634	DContext p, / The complete diff context */
2631	2635	int iS1, /* Start of the main segment */
2632	2636	int iSX, /* Start of the subsequence */
2633	2637	int iEX, /* First row past the end of the subsequence */
2634	2638	int iE1 /* First row past the end of the main segment */
2635	2639	){
2636		- int i, j;
	2640	+ int i, j, n;
	2641	+
	2642	+ /* Rule (1) */
2637	2643	if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1;
	2644	+
	2645	+ /* Compute DLine.indent and DLine.nw for all lines of the subsequence.
	2646	+ ** If no lines contain more than one non-whitespace character return
	2647	+ ** 0 because the subsequence could be due to an indentation change.
	2648	+ ** Rule (2).
	2649	+ */
	2650	+ n = 0;
	2651	+ for(i=iSX; i<iEX; i++){
	2652	+ DLine *pA = &p->aFrom[i];
	2653	+ if( pA->nw==0 && pA->n ){
	2654	+ const char *zA = pA->z;
	2655	+ const int nn = pA->n;
	2656	+ int ii, jj;
	2657	+ for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
	2658	+ pA->indent = ii;
	2659	+ for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
	2660	+ pA->nw = jj - ii + 1;
	2661	+ }
	2662	+ if( pA->nw>1 ) n++;
	2663	+ }
	2664	+ if( n==0 ) return 0;
	2665	+
	2666	+ /* Compute DLine.indent and DLine.nw for the entire sequence */
	2667	+ for(i=iS1; i<iE1; i++){
	2668	+ DLine *pA;
	2669	+ if( i==iSX ){
	2670	+ i = iEX;
	2671	+ if( i>=iE1 ) break;
	2672	+ }
	2673	+ pA = &p->aFrom[i];
	2674	+ if( pA->nw==0 && pA->n ){
	2675	+ const char *zA = pA->z;
	2676	+ const int nn = pA->n;
	2677	+ int ii, jj;
	2678	+ for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
	2679	+ pA->indent = ii;
	2680	+ for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
	2681	+ pA->nw = jj - ii + 1;
	2682	+ }
	2683	+ }
	2684	+
	2685	+ /* Check to see if any subsequence line that has more than one
	2686	+ ** non-whitespace character is unique across the entire sequence.
	2687	+ ** Rule (3)
	2688	+ */
2638	2689	for(i=iSX; i<iEX; i++){
2639		- const char *z = p->aFrom[i].z;
2640		- for(j=p->aFrom[i].n-1; j>=0; j--){
2641		- char c = z[j];
2642		- if( c!='}' && c!='{' && !diff_isspace(c) ) return 1;
	2690	+ const char *z = p->aFrom[i].z + p->aFrom[i].indent;
	2691	+ const int nw = p->aFrom[i].nw;
	2692	+ if( nw<=1 ) continue;
	2693	+ for(j=iS1; j<iSX; j++){
	2694	+ if( p->aFrom[j].nw!=nw ) continue;
	2695	+ if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
	2696	+ }
	2697	+ if( j<iSX ) continue;
	2698	+ for(j=iEX; j<iE1; j++){
	2699	+ if( p->aFrom[j].nw!=nw ) continue;
	2700	+ if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
2643	2701	}
	2702	+ if( j>=iE1 ) break;
2644	2703	}
2645		- return 0;
	2704	+ return i<iEX;
2646	2705	}
2647	2706
2648	2707
2649	2708	/*
2650	2709	** Do a single step in the difference. Compute a sequence of
2651	2710

	--- src/diff.c
	+++ src/diff.c
	@@ -2618,33 +2618,92 @@
2618	** ALGORITHM (subject to change and refinement):
2619	**
2620	** 1. If the subsequence is larger than 1/7th of the original span,
2621	** then consider it valid. --> return 1
2622	**
2623	** 2. If the subsequence contains any charaters other than '}', '{",
2624	** or whitespace, then consider it valid. --> return 1




2625	**
2626	** 3. Otherwise, it is potentially an artifact of an indentation
2627	** change. --> return 0
2628	*/
2629	static int likelyNotIndentChngArtifact(
2630	DContext p, / The complete diff context */
2631	int iS1, /* Start of the main segment */
2632	int iSX, /* Start of the subsequence */
2633	int iEX, /* First row past the end of the subsequence */
2634	int iE1 /* First row past the end of the main segment */
2635	){
2636	int i, j;


2637	if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1;













































2638	for(i=iSX; i<iEX; i++){
2639	const char *z = p->aFrom[i].z;
2640	for(j=p->aFrom[i].n-1; j>=0; j--){
2641	char c = z[j];
2642	if( c!='}' && c!='{' && !diff_isspace(c) ) return 1;







2643	}

2644	}
2645	return 0;
2646	}
2647
2648
2649	/*
2650	** Do a single step in the difference. Compute a sequence of
2651

	--- src/diff.c
	+++ src/diff.c
	@@ -2618,33 +2618,92 @@
2618	** ALGORITHM (subject to change and refinement):
2619	**
2620	** 1. If the subsequence is larger than 1/7th of the original span,
2621	** then consider it valid. --> return 1
2622	**
2623	** 2. If no lines of the subsequence contains more than one
2624	** non-whitespace character, --> return 0
2625	**
2626	** 3. If any line of the subsequence contains more than one non-whitespace
2627	** character and is unique across the entire sequence after ignoring
2628	** leading and trailing whitespace --> return 1
2629	**
2630	** 4. Otherwise, it is potentially an artifact of an indentation
2631	** change. --> return 0
2632	*/
2633	static int likelyNotIndentChngArtifact(
2634	DContext p, / The complete diff context */
2635	int iS1, /* Start of the main segment */
2636	int iSX, /* Start of the subsequence */
2637	int iEX, /* First row past the end of the subsequence */
2638	int iE1 /* First row past the end of the main segment */
2639	){
2640	int i, j, n;
2641
2642	/* Rule (1) */
2643	if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1;
2644
2645	/* Compute DLine.indent and DLine.nw for all lines of the subsequence.
2646	** If no lines contain more than one non-whitespace character return
2647	** 0 because the subsequence could be due to an indentation change.
2648	** Rule (2).
2649	*/
2650	n = 0;
2651	for(i=iSX; i<iEX; i++){
2652	DLine *pA = &p->aFrom[i];
2653	if( pA->nw==0 && pA->n ){
2654	const char *zA = pA->z;
2655	const int nn = pA->n;
2656	int ii, jj;
2657	for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
2658	pA->indent = ii;
2659	for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
2660	pA->nw = jj - ii + 1;
2661	}
2662	if( pA->nw>1 ) n++;
2663	}
2664	if( n==0 ) return 0;
2665
2666	/* Compute DLine.indent and DLine.nw for the entire sequence */
2667	for(i=iS1; i<iE1; i++){
2668	DLine *pA;
2669	if( i==iSX ){
2670	i = iEX;
2671	if( i>=iE1 ) break;
2672	}
2673	pA = &p->aFrom[i];
2674	if( pA->nw==0 && pA->n ){
2675	const char *zA = pA->z;
2676	const int nn = pA->n;
2677	int ii, jj;
2678	for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
2679	pA->indent = ii;
2680	for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
2681	pA->nw = jj - ii + 1;
2682	}
2683	}
2684
2685	/* Check to see if any subsequence line that has more than one
2686	** non-whitespace character is unique across the entire sequence.
2687	** Rule (3)
2688	*/
2689	for(i=iSX; i<iEX; i++){
2690	const char *z = p->aFrom[i].z + p->aFrom[i].indent;
2691	const int nw = p->aFrom[i].nw;
2692	if( nw<=1 ) continue;
2693	for(j=iS1; j<iSX; j++){
2694	if( p->aFrom[j].nw!=nw ) continue;
2695	if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
2696	}
2697	if( j<iSX ) continue;
2698	for(j=iEX; j<iE1; j++){
2699	if( p->aFrom[j].nw!=nw ) continue;
2700	if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
2701	}
2702	if( j>=iE1 ) break;
2703	}
2704	return i<iEX;
2705	}
2706
2707
2708	/*
2709	** Do a single step in the difference. Compute a sequence of
2710

Fossil SCM

Keyboard Shortcuts