Fossil SCM
Improved diff alignment following an indentation change. The objective of this change is to improve the diff output for of the [a36dd09d17f3057f] check-in.
Commit
868d1608382a9bb11c594bd88f4de8c41e9f63c45fb46e05942c35fc9cfe457a
Parent
e2aed163ac6e7a8…
1 file changed
+68
-9
+68
-9
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -2618,33 +2618,92 @@ | ||
| 2618 | 2618 | ** ALGORITHM (subject to change and refinement): |
| 2619 | 2619 | ** |
| 2620 | 2620 | ** 1. If the subsequence is larger than 1/7th of the original span, |
| 2621 | 2621 | ** then consider it valid. --> return 1 |
| 2622 | 2622 | ** |
| 2623 | -** 2. If the subsequence contains any charaters other than '}', '{", | |
| 2624 | -** or whitespace, then consider it valid. --> return 1 | |
| 2623 | +** 2. If no lines of the subsequence contains more than one | |
| 2624 | +** non-whitespace character, --> return 0 | |
| 2625 | +** | |
| 2626 | +** 3. If any line of the subsequence contains more than one non-whitespace | |
| 2627 | +** character and is unique across the entire sequence after ignoring | |
| 2628 | +** leading and trailing whitespace --> return 1 | |
| 2625 | 2629 | ** |
| 2626 | -** 3. Otherwise, it is potentially an artifact of an indentation | |
| 2630 | +** 4. Otherwise, it is potentially an artifact of an indentation | |
| 2627 | 2631 | ** change. --> return 0 |
| 2628 | 2632 | */ |
| 2629 | 2633 | static int likelyNotIndentChngArtifact( |
| 2630 | 2634 | DContext *p, /* The complete diff context */ |
| 2631 | 2635 | int iS1, /* Start of the main segment */ |
| 2632 | 2636 | int iSX, /* Start of the subsequence */ |
| 2633 | 2637 | int iEX, /* First row past the end of the subsequence */ |
| 2634 | 2638 | int iE1 /* First row past the end of the main segment */ |
| 2635 | 2639 | ){ |
| 2636 | - int i, j; | |
| 2640 | + int i, j, n; | |
| 2641 | + | |
| 2642 | + /* Rule (1) */ | |
| 2637 | 2643 | if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1; |
| 2644 | + | |
| 2645 | + /* Compute DLine.indent and DLine.nw for all lines of the subsequence. | |
| 2646 | + ** If no lines contain more than one non-whitespace character return | |
| 2647 | + ** 0 because the subsequence could be due to an indentation change. | |
| 2648 | + ** Rule (2). | |
| 2649 | + */ | |
| 2650 | + n = 0; | |
| 2651 | + for(i=iSX; i<iEX; i++){ | |
| 2652 | + DLine *pA = &p->aFrom[i]; | |
| 2653 | + if( pA->nw==0 && pA->n ){ | |
| 2654 | + const char *zA = pA->z; | |
| 2655 | + const int nn = pA->n; | |
| 2656 | + int ii, jj; | |
| 2657 | + for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){} | |
| 2658 | + pA->indent = ii; | |
| 2659 | + for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){} | |
| 2660 | + pA->nw = jj - ii + 1; | |
| 2661 | + } | |
| 2662 | + if( pA->nw>1 ) n++; | |
| 2663 | + } | |
| 2664 | + if( n==0 ) return 0; | |
| 2665 | + | |
| 2666 | + /* Compute DLine.indent and DLine.nw for the entire sequence */ | |
| 2667 | + for(i=iS1; i<iE1; i++){ | |
| 2668 | + DLine *pA; | |
| 2669 | + if( i==iSX ){ | |
| 2670 | + i = iEX; | |
| 2671 | + if( i>=iE1 ) break; | |
| 2672 | + } | |
| 2673 | + pA = &p->aFrom[i]; | |
| 2674 | + if( pA->nw==0 && pA->n ){ | |
| 2675 | + const char *zA = pA->z; | |
| 2676 | + const int nn = pA->n; | |
| 2677 | + int ii, jj; | |
| 2678 | + for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){} | |
| 2679 | + pA->indent = ii; | |
| 2680 | + for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){} | |
| 2681 | + pA->nw = jj - ii + 1; | |
| 2682 | + } | |
| 2683 | + } | |
| 2684 | + | |
| 2685 | + /* Check to see if any subsequence line that has more than one | |
| 2686 | + ** non-whitespace character is unique across the entire sequence. | |
| 2687 | + ** Rule (3) | |
| 2688 | + */ | |
| 2638 | 2689 | for(i=iSX; i<iEX; i++){ |
| 2639 | - const char *z = p->aFrom[i].z; | |
| 2640 | - for(j=p->aFrom[i].n-1; j>=0; j--){ | |
| 2641 | - char c = z[j]; | |
| 2642 | - if( c!='}' && c!='{' && !diff_isspace(c) ) return 1; | |
| 2690 | + const char *z = p->aFrom[i].z + p->aFrom[i].indent; | |
| 2691 | + const int nw = p->aFrom[i].nw; | |
| 2692 | + if( nw<=1 ) continue; | |
| 2693 | + for(j=iS1; j<iSX; j++){ | |
| 2694 | + if( p->aFrom[j].nw!=nw ) continue; | |
| 2695 | + if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break; | |
| 2696 | + } | |
| 2697 | + if( j<iSX ) continue; | |
| 2698 | + for(j=iEX; j<iE1; j++){ | |
| 2699 | + if( p->aFrom[j].nw!=nw ) continue; | |
| 2700 | + if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break; | |
| 2643 | 2701 | } |
| 2702 | + if( j>=iE1 ) break; | |
| 2644 | 2703 | } |
| 2645 | - return 0; | |
| 2704 | + return i<iEX; | |
| 2646 | 2705 | } |
| 2647 | 2706 | |
| 2648 | 2707 | |
| 2649 | 2708 | /* |
| 2650 | 2709 | ** Do a single step in the difference. Compute a sequence of |
| 2651 | 2710 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -2618,33 +2618,92 @@ | |
| 2618 | ** ALGORITHM (subject to change and refinement): |
| 2619 | ** |
| 2620 | ** 1. If the subsequence is larger than 1/7th of the original span, |
| 2621 | ** then consider it valid. --> return 1 |
| 2622 | ** |
| 2623 | ** 2. If the subsequence contains any charaters other than '}', '{", |
| 2624 | ** or whitespace, then consider it valid. --> return 1 |
| 2625 | ** |
| 2626 | ** 3. Otherwise, it is potentially an artifact of an indentation |
| 2627 | ** change. --> return 0 |
| 2628 | */ |
| 2629 | static int likelyNotIndentChngArtifact( |
| 2630 | DContext *p, /* The complete diff context */ |
| 2631 | int iS1, /* Start of the main segment */ |
| 2632 | int iSX, /* Start of the subsequence */ |
| 2633 | int iEX, /* First row past the end of the subsequence */ |
| 2634 | int iE1 /* First row past the end of the main segment */ |
| 2635 | ){ |
| 2636 | int i, j; |
| 2637 | if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1; |
| 2638 | for(i=iSX; i<iEX; i++){ |
| 2639 | const char *z = p->aFrom[i].z; |
| 2640 | for(j=p->aFrom[i].n-1; j>=0; j--){ |
| 2641 | char c = z[j]; |
| 2642 | if( c!='}' && c!='{' && !diff_isspace(c) ) return 1; |
| 2643 | } |
| 2644 | } |
| 2645 | return 0; |
| 2646 | } |
| 2647 | |
| 2648 | |
| 2649 | /* |
| 2650 | ** Do a single step in the difference. Compute a sequence of |
| 2651 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -2618,33 +2618,92 @@ | |
| 2618 | ** ALGORITHM (subject to change and refinement): |
| 2619 | ** |
| 2620 | ** 1. If the subsequence is larger than 1/7th of the original span, |
| 2621 | ** then consider it valid. --> return 1 |
| 2622 | ** |
| 2623 | ** 2. If no lines of the subsequence contains more than one |
| 2624 | ** non-whitespace character, --> return 0 |
| 2625 | ** |
| 2626 | ** 3. If any line of the subsequence contains more than one non-whitespace |
| 2627 | ** character and is unique across the entire sequence after ignoring |
| 2628 | ** leading and trailing whitespace --> return 1 |
| 2629 | ** |
| 2630 | ** 4. Otherwise, it is potentially an artifact of an indentation |
| 2631 | ** change. --> return 0 |
| 2632 | */ |
| 2633 | static int likelyNotIndentChngArtifact( |
| 2634 | DContext *p, /* The complete diff context */ |
| 2635 | int iS1, /* Start of the main segment */ |
| 2636 | int iSX, /* Start of the subsequence */ |
| 2637 | int iEX, /* First row past the end of the subsequence */ |
| 2638 | int iE1 /* First row past the end of the main segment */ |
| 2639 | ){ |
| 2640 | int i, j, n; |
| 2641 | |
| 2642 | /* Rule (1) */ |
| 2643 | if( (iEX-iSX)*7 >= (iE1-iS1) ) return 1; |
| 2644 | |
| 2645 | /* Compute DLine.indent and DLine.nw for all lines of the subsequence. |
| 2646 | ** If no lines contain more than one non-whitespace character return |
| 2647 | ** 0 because the subsequence could be due to an indentation change. |
| 2648 | ** Rule (2). |
| 2649 | */ |
| 2650 | n = 0; |
| 2651 | for(i=iSX; i<iEX; i++){ |
| 2652 | DLine *pA = &p->aFrom[i]; |
| 2653 | if( pA->nw==0 && pA->n ){ |
| 2654 | const char *zA = pA->z; |
| 2655 | const int nn = pA->n; |
| 2656 | int ii, jj; |
| 2657 | for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){} |
| 2658 | pA->indent = ii; |
| 2659 | for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){} |
| 2660 | pA->nw = jj - ii + 1; |
| 2661 | } |
| 2662 | if( pA->nw>1 ) n++; |
| 2663 | } |
| 2664 | if( n==0 ) return 0; |
| 2665 | |
| 2666 | /* Compute DLine.indent and DLine.nw for the entire sequence */ |
| 2667 | for(i=iS1; i<iE1; i++){ |
| 2668 | DLine *pA; |
| 2669 | if( i==iSX ){ |
| 2670 | i = iEX; |
| 2671 | if( i>=iE1 ) break; |
| 2672 | } |
| 2673 | pA = &p->aFrom[i]; |
| 2674 | if( pA->nw==0 && pA->n ){ |
| 2675 | const char *zA = pA->z; |
| 2676 | const int nn = pA->n; |
| 2677 | int ii, jj; |
| 2678 | for(ii=0; ii<nn && diff_isspace(zA[ii]); ii++){} |
| 2679 | pA->indent = ii; |
| 2680 | for(jj=nn-1; jj>ii && diff_isspace(zA[jj]); jj--){} |
| 2681 | pA->nw = jj - ii + 1; |
| 2682 | } |
| 2683 | } |
| 2684 | |
| 2685 | /* Check to see if any subsequence line that has more than one |
| 2686 | ** non-whitespace character is unique across the entire sequence. |
| 2687 | ** Rule (3) |
| 2688 | */ |
| 2689 | for(i=iSX; i<iEX; i++){ |
| 2690 | const char *z = p->aFrom[i].z + p->aFrom[i].indent; |
| 2691 | const int nw = p->aFrom[i].nw; |
| 2692 | if( nw<=1 ) continue; |
| 2693 | for(j=iS1; j<iSX; j++){ |
| 2694 | if( p->aFrom[j].nw!=nw ) continue; |
| 2695 | if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break; |
| 2696 | } |
| 2697 | if( j<iSX ) continue; |
| 2698 | for(j=iEX; j<iE1; j++){ |
| 2699 | if( p->aFrom[j].nw!=nw ) continue; |
| 2700 | if( memcmp(p->aFrom[j].z+p->aFrom[j].indent,z,nw)==0 ) break; |
| 2701 | } |
| 2702 | if( j>=iE1 ) break; |
| 2703 | } |
| 2704 | return i<iEX; |
| 2705 | } |
| 2706 | |
| 2707 | |
| 2708 | /* |
| 2709 | ** Do a single step in the difference. Compute a sequence of |
| 2710 |