Fossil SCM

Improved diff alignment following an indentation change. The objective of this change is to improve the diff output for of the [a36dd09d17f3057f] check-in.

drh 2022-02-18 15:23 trunk
Commit 868d1608382a9bb11c594bd88f4de8c41e9f63c45fb46e05942c35fc9cfe457a
1 file changed +68 -9
+68 -9
--- src/diff.c
+++ src/diff.c
@@ -2618,33 +2618,92 @@
26182618
** ALGORITHM (subject to change and refinement):
26192619
**
26202620
** 1. If the subsequence is larger than 1/7th of the original span,
26212621
** then consider it valid. --> return 1
26222622
**
2623
-** 2. If the subsequence contains any charaters other than '}', '{",
2624
-** or whitespace, then consider it valid. --> return 1
2623
+** 2. If no lines of the subsequence contains more than one
2624
+** non-whitespace character, --> return 0
2625
+**
2626
+** 3. If any line of the subsequence contains more than one non-whitespace
2627
+** character and is unique across the entire sequence after ignoring
2628
+** leading and trailing whitespace --> return 1
26252629
**
2626
-** 3. Otherwise, it is potentially an artifact of an indentation
2630
+** 4. Otherwise, it is potentially an artifact of an indentation
26272631
** change. --> return 0
26282632
*/
26292633
static int likelyNotIndentChngArtifact(
26302634
DContext *p, /* The complete diff context */
26312635
int iS1, /* Start of the main segment */
26322636
int iSX, /* Start of the subsequence */
26332637
int iEX, /* First row past the end of the subsequence */
26342638
int iE1 /* First row past the end of the main segment */
26352639
){
2636
- int i, j;
2640
+ int i, j, n;
2641
+
2642
+ /* Rule (1) */
26372643
if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1;
2644
+
2645
+ /* Compute DLine.indent and DLine.nw for all lines of the subsequence.
2646
+ ** If no lines contain more than one non-whitespace character return
2647
+ ** 0 because the subsequence could be due to an indentation change.
2648
+ ** Rule (2).
2649
+ */
2650
+ n = 0;
2651
+ for(i=iSX; i<iEX; i++){
2652
+ DLine *pA = &p->aFrom[i];
2653
+ if( pA->nw==0 && pA->n ){
2654
+ const char *zA = pA->z;
2655
+ const int nn = pA->n;
2656
+ int ii, jj;
2657
+ for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
2658
+ pA->indent = ii;
2659
+ for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
2660
+ pA->nw = jj - ii + 1;
2661
+ }
2662
+ if( pA->nw>1 ) n++;
2663
+ }
2664
+ if( n==0 ) return 0;
2665
+
2666
+ /* Compute DLine.indent and DLine.nw for the entire sequence */
2667
+ for(i=iS1; i<iE1; i++){
2668
+ DLine *pA;
2669
+ if( i==iSX ){
2670
+ i = iEX;
2671
+ if( i>=iE1 ) break;
2672
+ }
2673
+ pA = &p->aFrom[i];
2674
+ if( pA->nw==0 && pA->n ){
2675
+ const char *zA = pA->z;
2676
+ const int nn = pA->n;
2677
+ int ii, jj;
2678
+ for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
2679
+ pA->indent = ii;
2680
+ for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
2681
+ pA->nw = jj - ii + 1;
2682
+ }
2683
+ }
2684
+
2685
+ /* Check to see if any subsequence line that has more than one
2686
+ ** non-whitespace character is unique across the entire sequence.
2687
+ ** Rule (3)
2688
+ */
26382689
for(i=iSX; i<iEX; i++){
2639
- const char *z = p->aFrom[i].z;
2640
- for(j=p->aFrom[i].n-1; j>=0; j--){
2641
- char c = z[j];
2642
- if( c!='}' && c!='{' && !diff_isspace(c) ) return 1;
2690
+ const char *z = p->aFrom[i].z + p->aFrom[i].indent;
2691
+ const int nw = p->aFrom[i].nw;
2692
+ if( nw<=1 ) continue;
2693
+ for(j=iS1; j<iSX; j++){
2694
+ if( p->aFrom[j].nw!=nw ) continue;
2695
+ if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
2696
+ }
2697
+ if( j<iSX ) continue;
2698
+ for(j=iEX; j<iE1; j++){
2699
+ if( p->aFrom[j].nw!=nw ) continue;
2700
+ if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
26432701
}
2702
+ if( j>=iE1 ) break;
26442703
}
2645
- return 0;
2704
+ return i<iEX;
26462705
}
26472706
26482707
26492708
/*
26502709
** Do a single step in the difference. Compute a sequence of
26512710
--- src/diff.c
+++ src/diff.c
@@ -2618,33 +2618,92 @@
2618 ** ALGORITHM (subject to change and refinement):
2619 **
2620 ** 1. If the subsequence is larger than 1/7th of the original span,
2621 ** then consider it valid. --> return 1
2622 **
2623 ** 2. If the subsequence contains any charaters other than '}', '{",
2624 ** or whitespace, then consider it valid. --> return 1
 
 
 
 
2625 **
2626 ** 3. Otherwise, it is potentially an artifact of an indentation
2627 ** change. --> return 0
2628 */
2629 static int likelyNotIndentChngArtifact(
2630 DContext *p, /* The complete diff context */
2631 int iS1, /* Start of the main segment */
2632 int iSX, /* Start of the subsequence */
2633 int iEX, /* First row past the end of the subsequence */
2634 int iE1 /* First row past the end of the main segment */
2635 ){
2636 int i, j;
 
 
2637 if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2638 for(i=iSX; i<iEX; i++){
2639 const char *z = p->aFrom[i].z;
2640 for(j=p->aFrom[i].n-1; j>=0; j--){
2641 char c = z[j];
2642 if( c!='}' && c!='{' && !diff_isspace(c) ) return 1;
 
 
 
 
 
 
 
2643 }
 
2644 }
2645 return 0;
2646 }
2647
2648
2649 /*
2650 ** Do a single step in the difference. Compute a sequence of
2651
--- src/diff.c
+++ src/diff.c
@@ -2618,33 +2618,92 @@
2618 ** ALGORITHM (subject to change and refinement):
2619 **
2620 ** 1. If the subsequence is larger than 1/7th of the original span,
2621 ** then consider it valid. --> return 1
2622 **
2623 ** 2. If no lines of the subsequence contains more than one
2624 ** non-whitespace character, --> return 0
2625 **
2626 ** 3. If any line of the subsequence contains more than one non-whitespace
2627 ** character and is unique across the entire sequence after ignoring
2628 ** leading and trailing whitespace --> return 1
2629 **
2630 ** 4. Otherwise, it is potentially an artifact of an indentation
2631 ** change. --> return 0
2632 */
2633 static int likelyNotIndentChngArtifact(
2634 DContext *p, /* The complete diff context */
2635 int iS1, /* Start of the main segment */
2636 int iSX, /* Start of the subsequence */
2637 int iEX, /* First row past the end of the subsequence */
2638 int iE1 /* First row past the end of the main segment */
2639 ){
2640 int i, j, n;
2641
2642 /* Rule (1) */
2643 if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1;
2644
2645 /* Compute DLine.indent and DLine.nw for all lines of the subsequence.
2646 ** If no lines contain more than one non-whitespace character return
2647 ** 0 because the subsequence could be due to an indentation change.
2648 ** Rule (2).
2649 */
2650 n = 0;
2651 for(i=iSX; i<iEX; i++){
2652 DLine *pA = &p->aFrom[i];
2653 if( pA->nw==0 && pA->n ){
2654 const char *zA = pA->z;
2655 const int nn = pA->n;
2656 int ii, jj;
2657 for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
2658 pA->indent = ii;
2659 for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
2660 pA->nw = jj - ii + 1;
2661 }
2662 if( pA->nw>1 ) n++;
2663 }
2664 if( n==0 ) return 0;
2665
2666 /* Compute DLine.indent and DLine.nw for the entire sequence */
2667 for(i=iS1; i<iE1; i++){
2668 DLine *pA;
2669 if( i==iSX ){
2670 i = iEX;
2671 if( i>=iE1 ) break;
2672 }
2673 pA = &p->aFrom[i];
2674 if( pA->nw==0 && pA->n ){
2675 const char *zA = pA->z;
2676 const int nn = pA->n;
2677 int ii, jj;
2678 for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){}
2679 pA->indent = ii;
2680 for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){}
2681 pA->nw = jj - ii + 1;
2682 }
2683 }
2684
2685 /* Check to see if any subsequence line that has more than one
2686 ** non-whitespace character is unique across the entire sequence.
2687 ** Rule (3)
2688 */
2689 for(i=iSX; i<iEX; i++){
2690 const char *z = p->aFrom[i].z + p->aFrom[i].indent;
2691 const int nw = p->aFrom[i].nw;
2692 if( nw<=1 ) continue;
2693 for(j=iS1; j<iSX; j++){
2694 if( p->aFrom[j].nw!=nw ) continue;
2695 if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
2696 }
2697 if( j<iSX ) continue;
2698 for(j=iEX; j<iE1; j++){
2699 if( p->aFrom[j].nw!=nw ) continue;
2700 if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break;
2701 }
2702 if( j>=iE1 ) break;
2703 }
2704 return i<iEX;
2705 }
2706
2707
2708 /*
2709 ** Do a single step in the difference. Compute a sequence of
2710

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button