Fossil SCM

A new, faster algorithm for alignment of rows in a change block.

drh 2021-09-05 00:31 diff-color-enhancements
Commit 71759ef5bf2a7f1afc12fc2130d02d927574afa3a25be57b49e6e8b1bbab14b6
1 file changed +289 -237
+289 -237
--- src/diff.c
+++ src/diff.c
@@ -835,245 +835,10 @@
835835
/*
836836
** Minimum of two values
837837
*/
838838
static int minInt(int a, int b){ return a<b ? a : b; }
839839
840
-/*
841
-** Return the number between 0 and 100 that is smaller the closer pA and
842
-** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
843
-** completely different.
844
-**
845
-** The current algorithm is as follows:
846
-**
847
-** (1) Remove leading and trailing whitespace.
848
-** (2) Truncate both strings to at most 250 characters
849
-** (3) Find the length of the longest common subsequence
850
-** (4) Longer common subsequences yield lower scores.
851
-*/
852
-static int match_dline(const DLine *pA, const DLine *pB){
853
- const char *zA; /* Left string */
854
- const char *zB; /* right string */
855
- int nA; /* Bytes in zA[] */
856
- int nB; /* Bytes in zB[] */
857
- int avg; /* Average length of A and B */
858
- int i, j, k; /* Loop counters */
859
- int best = 0; /* Longest match found so far */
860
- int score; /* Final score. 0..100 */
861
- unsigned char c; /* Character being examined */
862
- unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */
863
- unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */
864
-
865
- zA = pA->z;
866
- zB = pB->z;
867
- nA = pA->n;
868
- nB = pB->n;
869
- while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
870
- while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
871
- while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
872
- while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
873
- if( nA>250 ) nA = 250;
874
- if( nB>250 ) nB = 250;
875
- avg = (nA+nB)/2;
876
- if( avg==0 ) return 0;
877
- if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0;
878
- memset(aFirst, 0xff, sizeof(aFirst));
879
- zA--; zB--; /* Make both zA[] and zB[] 1-indexed */
880
- for(i=nB; i>0; i--){
881
- c = (unsigned char)zB[i];
882
- aNext[i] = aFirst[c];
883
- aFirst[c] = i;
884
- }
885
- best = 0;
886
- for(i=1; i<=nA-best; i++){
887
- c = (unsigned char)zA[i];
888
- for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){
889
- int limit = minInt(nA-i, nB-j);
890
- for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){}
891
- if( k>best ) best = k;
892
- }
893
- }
894
- score = (best>avg) ? 0 : (avg - best)*100/avg;
895
-
896
-#if 0
897
- fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n",
898
- nA, zA+1, nB, zB+1, best, avg, score);
899
-#endif
900
-
901
- /* Return the result */
902
- return score;
903
-}
904
-
905
-/*
906
-** COMMAND: test-line-match
907
-** Usage: %fossil test-line-match STRING1 STRING2
908
-**
909
-** Return a score from 0 to 100 that is how similar STRING1 is to
910
-** STRING2. Smaller numbers mean more similar. 0 is an exact match.
911
-**
912
-** This command is used to test to match_dline() function in the
913
-** internal Fossil diff logic.
914
-*/
915
-void test_dline_match(void){
916
- DLine a, b;
917
- int x;
918
- if( g.argc!=4 ) usage("STRING1 STRING2");
919
- a.z = g.argv[2];
920
- a.n = (int)strlen(a.z);
921
- b.z = g.argv[3];
922
- b.n = (int)strlen(b.z);
923
- x = match_dline(&a, &b);
924
- fossil_print("%d\n", x);
925
-}
926
-
927
-/*
928
-** There is a change block in which nLeft lines of text on the left are
929
-** converted into nRight lines of text on the right. This routine computes
930
-** how the lines on the left line up with the lines on the right.
931
-**
932
-** The return value is a buffer of unsigned characters, obtained from
933
-** fossil_malloc(). (The caller needs to free the return value using
934
-** fossil_free().) Entries in the returned array have values as follows:
935
-**
936
-** 1. Delete the next line of pLeft.
937
-** 2. Insert the next line of pRight.
938
-** 3. The next line of pLeft changes into the next line of pRight.
939
-** 4. Delete one line from pLeft and add one line to pRight.
940
-**
941
-** The (4) case only happens when we cannot get a reasonable alignment between
942
-** the two blocks. If any return value is (4), then all return values will
943
-** be (4).
944
-**
945
-** Algorithm: Wagner's minimum edit-distance algorithm, modified by
946
-** adding a cost to each match based on how well the two rows match
947
-** each other. Insertion and deletion costs are 50. Match costs
948
-** are between 0 and 100 where 0 is a perfect match 100 is a complete
949
-** mismatch.
950
-*/
951
-static unsigned char *diffBlockAlignment(
952
- const DLine *aLeft, int nLeft, /* Text on the left */
953
- const DLine *aRight, int nRight, /* Text on the right */
954
- u64 diffFlags /* Flags passed into the original diff */
955
-){
956
- int i, j, k; /* Loop counters */
957
- int *a; /* One row of the Wagner matrix */
958
- int *pToFree; /* Space that needs to be freed */
959
- unsigned char *aM; /* Wagner result matrix */
960
- int nMatch, iMatch; /* Number of matching lines and match score */
961
- int mnLen; /* minInt(nLeft, nRight) */
962
- int mxLen; /* MAX(nLeft, nRight) */
963
- int aBuf[100]; /* Stack space for a[] if nRight not to big */
964
-
965
- if( nLeft==0 ){
966
- aM = fossil_malloc( nLeft + nRight + 2 );
967
- memset(aM, 2, nRight);
968
- return aM;
969
- }
970
- if( nRight==0 ){
971
- aM = fossil_malloc( nLeft + nRight + 2 );
972
- memset(aM, 1, nLeft);
973
- return aM;
974
- }
975
-
976
- /* This algorithm is O(N**2). So if N is too big, bail out with a
977
- ** simple (but stupid and ugly) result that doesn't take too long. */
978
- mnLen = nLeft<nRight ? nLeft : nRight;
979
- if( nLeft*nRight>100000 && (diffFlags & DIFF_SLOW_SBS)==0 ){
980
- aM = fossil_malloc( nLeft + nRight + 2 );
981
- memset(aM, 4, mnLen);
982
- if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen);
983
- if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen);
984
- return aM;
985
- }
986
-
987
- if( nRight < count(aBuf)-1 ){
988
- pToFree = 0;
989
- a = aBuf;
990
- }else{
991
- a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
992
- }
993
- aM = fossil_malloc( (nLeft+1)*(nRight+1) );
994
-
995
-
996
- /* Compute the best alignment */
997
- for(i=0; i<=nRight; i++){
998
- aM[i] = 2;
999
- a[i] = i*50;
1000
- }
1001
- aM[0] = 0;
1002
- for(j=1; j<=nLeft; j++){
1003
- int p = a[0];
1004
- a[0] = p+50;
1005
- aM[j*(nRight+1)] = 1;
1006
- for(i=1; i<=nRight; i++){
1007
- int m = a[i-1]+50;
1008
- int d = 2;
1009
- if( m>a[i]+50 ){
1010
- m = a[i]+50;
1011
- d = 1;
1012
- }
1013
- if( m>p ){
1014
- int score = match_dline(&aLeft[j-1], &aRight[i-1]);
1015
- if( (score<=63 || (i<j+1 && i>j-1)) && m>p+score ){
1016
- m = p+score;
1017
- d = 3 | score*4;
1018
- }
1019
- }
1020
- p = a[i];
1021
- a[i] = m;
1022
- aM[j*(nRight+1)+i] = d;
1023
- }
1024
- }
1025
-
1026
- /* Compute the lowest-cost path back through the matrix */
1027
- i = nRight;
1028
- j = nLeft;
1029
- k = (nRight+1)*(nLeft+1)-1;
1030
- nMatch = iMatch = 0;
1031
- while( i+j>0 ){
1032
- unsigned char c = aM[k];
1033
- if( c>=3 ){
1034
- assert( i>0 && j>0 );
1035
- i--;
1036
- j--;
1037
- nMatch++;
1038
- iMatch += (c>>2);
1039
- aM[k] = 3;
1040
- }else if( c==2 ){
1041
- assert( i>0 );
1042
- i--;
1043
- }else{
1044
- assert( j>0 );
1045
- j--;
1046
- }
1047
- k--;
1048
- aM[k] = aM[j*(nRight+1)+i];
1049
- }
1050
- k++;
1051
- i = (nRight+1)*(nLeft+1) - k;
1052
- memmove(aM, &aM[k], i);
1053
-
1054
- /* If:
1055
- ** (1) the alignment is more than 25% longer than the longest side, and
1056
- ** (2) the average match cost exceeds 15
1057
- ** Then this is probably an alignment that will be difficult for humans
1058
- ** to read. So instead, just show all of the right side inserted followed
1059
- ** by all of the left side deleted.
1060
- **
1061
- ** The coefficients for conditions (1) and (2) above are determined by
1062
- ** experimentation.
1063
- */
1064
- mxLen = nLeft>nRight ? nLeft : nRight;
1065
- if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1066
- memset(aM, 4, mnLen);
1067
- if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen);
1068
- if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen);
1069
- }
1070
-
1071
- /* Return the result */
1072
- fossil_free(pToFree);
1073
- return aM;
1074
-}
1075840
1076841
1077842
/*
1078843
** This is an abstract superclass for an object that accepts difference
1079844
** lines and formats them for display. Subclasses of this object format
@@ -1881,10 +1646,294 @@
18811646
p->width = diff_width(diffFlags);
18821647
p->pOut = pOut;
18831648
return p;
18841649
}
18851650
/****************************************************************************/
1651
+/*
1652
+** Return the number between 0 and 100 that is smaller the closer pA and
1653
+** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
1654
+** completely different.
1655
+**
1656
+** The current algorithm is as follows:
1657
+**
1658
+** (1) Remove leading and trailing whitespace.
1659
+** (2) Truncate both strings to at most 250 characters
1660
+** (3) Find the length of the longest common subsequence
1661
+** (4) Longer common subsequences yield lower scores.
1662
+*/
1663
+static int match_dline(const DLine *pA, const DLine *pB){
1664
+ const char *zA; /* Left string */
1665
+ const char *zB; /* right string */
1666
+ int nA; /* Bytes in zA[] */
1667
+ int nB; /* Bytes in zB[] */
1668
+ int avg; /* Average length of A and B */
1669
+ int i, j, k; /* Loop counters */
1670
+ int best = 0; /* Longest match found so far */
1671
+ int score; /* Final score. 0..100 */
1672
+ unsigned char c; /* Character being examined */
1673
+ unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */
1674
+ unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */
1675
+
1676
+ zA = pA->z;
1677
+ zB = pB->z;
1678
+ nA = pA->n;
1679
+ nB = pB->n;
1680
+ while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
1681
+ while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
1682
+ while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
1683
+ while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
1684
+ if( nA>250 ) nA = 250;
1685
+ if( nB>250 ) nB = 250;
1686
+ avg = (nA+nB)/2;
1687
+ if( avg==0 ) return 0;
1688
+ if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0;
1689
+ memset(aFirst, 0xff, sizeof(aFirst));
1690
+ zA--; zB--; /* Make both zA[] and zB[] 1-indexed */
1691
+ for(i=nB; i>0; i--){
1692
+ c = (unsigned char)zB[i];
1693
+ aNext[i] = aFirst[c];
1694
+ aFirst[c] = i;
1695
+ }
1696
+ best = 0;
1697
+ for(i=1; i<=nA-best; i++){
1698
+ c = (unsigned char)zA[i];
1699
+ for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){
1700
+ int limit = minInt(nA-i, nB-j);
1701
+ for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){}
1702
+ if( k>best ) best = k;
1703
+ }
1704
+ }
1705
+ score = (best>avg) ? 0 : (avg - best)*100/avg;
1706
+
1707
+#if 0
1708
+ fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n",
1709
+ nA, zA+1, nB, zB+1, best, avg, score);
1710
+#endif
1711
+
1712
+ /* Return the result */
1713
+ return score;
1714
+}
1715
+
1716
+/*
1717
+** COMMAND: test-line-match
1718
+** Usage: %fossil test-line-match STRING1 STRING2
1719
+**
1720
+** Return a score from 0 to 100 that is how similar STRING1 is to
1721
+** STRING2. Smaller numbers mean more similar. 0 is an exact match.
1722
+**
1723
+** This command is used to test to match_dline() function in the
1724
+** internal Fossil diff logic.
1725
+*/
1726
+void test_dline_match(void){
1727
+ DLine a, b;
1728
+ int x;
1729
+ if( g.argc!=4 ) usage("STRING1 STRING2");
1730
+ a.z = g.argv[2];
1731
+ a.n = (int)strlen(a.z);
1732
+ b.z = g.argv[3];
1733
+ b.n = (int)strlen(b.z);
1734
+ x = match_dline(&a, &b);
1735
+ fossil_print("%d\n", x);
1736
+}
1737
+
1738
+/*
1739
+** There is a change block in which nLeft lines of text on the left are
1740
+** converted into nRight lines of text on the right. This routine computes
1741
+** how the lines on the left line up with the lines on the right.
1742
+**
1743
+** The return value is a buffer of unsigned characters, obtained from
1744
+** fossil_malloc(). (The caller needs to free the return value using
1745
+** fossil_free().) Entries in the returned array have values as follows:
1746
+**
1747
+** 1. Delete the next line of pLeft.
1748
+** 2. Insert the next line of pRight.
1749
+** 3. The next line of pLeft changes into the next line of pRight.
1750
+** 4. Delete one line from pLeft and add one line to pRight.
1751
+**
1752
+** The length of the returned array will be at most nLeft+nRight bytes.
1753
+** If the first bytes is 4, that means we could not compute reasonable
1754
+** alignment between the two blocks.
1755
+**
1756
+** Algorithm: Wagner's minimum edit-distance algorithm, modified by
1757
+** adding a cost to each match based on how well the two rows match
1758
+** each other. Insertion and deletion costs are 50. Match costs
1759
+** are between 0 and 100 where 0 is a perfect match 100 is a complete
1760
+** mismatch.
1761
+*/
1762
+static unsigned char *diffBlockAlignment(
1763
+ const DLine *aLeft, int nLeft, /* Text on the left */
1764
+ const DLine *aRight, int nRight, /* Text on the right */
1765
+ u64 diffFlags, /* Flags passed into the original diff */
1766
+ int *pNResult /* OUTPUT: Bytes of result */
1767
+){
1768
+ int i, j, k; /* Loop counters */
1769
+ int *a; /* One row of the Wagner matrix */
1770
+ int *pToFree; /* Space that needs to be freed */
1771
+ unsigned char *aM; /* Wagner result matrix */
1772
+ int nMatch, iMatch; /* Number of matching lines and match score */
1773
+ int mnLen; /* minInt(nLeft, nRight) */
1774
+ int mxLen; /* MAX(nLeft, nRight) */
1775
+ int aBuf[100]; /* Stack space for a[] if nRight not to big */
1776
+
1777
+ if( nLeft==0 ){
1778
+ aM = fossil_malloc( nRight + 2 );
1779
+ memset(aM, 2, nRight);
1780
+ *pNResult = nRight;
1781
+ return aM;
1782
+ }
1783
+ if( nRight==0 ){
1784
+ aM = fossil_malloc( nLeft + 2 );
1785
+ memset(aM, 1, nLeft);
1786
+ *pNResult = nLeft;
1787
+ return aM;
1788
+ }
1789
+
1790
+ /* For large alignments, use a divide and conquer algorithm that is
1791
+ ** O(NlogN). The result is not as precise, but this whole thing is an
1792
+ ** approximation anyhow, and the faster response time is an acceptable
1793
+ ** trade-off for reduced precision.
1794
+ */
1795
+ mnLen = nLeft<nRight ? nLeft : nRight;
1796
+ if( nLeft*nRight>1000 && (diffFlags & DIFF_SLOW_SBS)==0 ){
1797
+ const DLine *aSmall; /* The smaller of aLeft and aRight */
1798
+ const DLine *aBig; /* The larger of aLeft and aRight */
1799
+ int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */
1800
+ int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */
1801
+ int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */
1802
+ unsigned char *a1, *a2; /* Results of the alignments on two halves */
1803
+ int n1, n2; /* Number of entries in a1 and a2 */
1804
+ int score, bestScore; /* Score and best score seen so far */
1805
+ if( nLeft>nRight ){
1806
+ aSmall = aRight;
1807
+ nSmall = nRight;
1808
+ aBig = aLeft;
1809
+ nBig = nLeft;
1810
+ }else{
1811
+ aSmall = aLeft;
1812
+ nSmall = nLeft;
1813
+ aBig = aRight;
1814
+ nBig = nRight;
1815
+ }
1816
+ iDivBig = nBig/2;
1817
+ bestScore = 10000;
1818
+ for(i=0; i<nSmall; i++){
1819
+ score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2;
1820
+ if( score<bestScore ){
1821
+ bestScore = score;
1822
+ iDivSmall = i;
1823
+ }
1824
+ }
1825
+ if( aSmall==aRight ){
1826
+ iDivRight = iDivSmall;
1827
+ iDivLeft = iDivBig;
1828
+ }else{
1829
+ iDivRight = iDivBig;
1830
+ iDivLeft = iDivSmall;
1831
+ }
1832
+ a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,diffFlags,&n1);
1833
+ a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft,
1834
+ aRight+iDivRight, nRight-iDivRight,
1835
+ diffFlags, &n2);
1836
+ a1 = fossil_realloc(a1, n1+n2 );
1837
+ memcpy(a1+n1,a2,n2);
1838
+ fossil_free(a2);
1839
+ *pNResult = n1+n2;
1840
+ return a1;
1841
+ }
1842
+
1843
+ /* If we reach this point, we will be doing an O(N*N) Wagner minimum
1844
+ ** edit distance to compute the alignment.
1845
+ */
1846
+ if( nRight < count(aBuf)-1 ){
1847
+ pToFree = 0;
1848
+ a = aBuf;
1849
+ }else{
1850
+ a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
1851
+ }
1852
+ aM = fossil_malloc( (nLeft+1)*(nRight+1) );
1853
+
1854
+
1855
+ /* Compute the best alignment */
1856
+ for(i=0; i<=nRight; i++){
1857
+ aM[i] = 2;
1858
+ a[i] = i*50;
1859
+ }
1860
+ aM[0] = 0;
1861
+ for(j=1; j<=nLeft; j++){
1862
+ int p = a[0];
1863
+ a[0] = p+50;
1864
+ aM[j*(nRight+1)] = 1;
1865
+ for(i=1; i<=nRight; i++){
1866
+ int m = a[i-1]+50;
1867
+ int d = 2;
1868
+ if( m>a[i]+50 ){
1869
+ m = a[i]+50;
1870
+ d = 1;
1871
+ }
1872
+ if( m>p ){
1873
+ int score = match_dline(&aLeft[j-1], &aRight[i-1]);
1874
+ if( (score<=63 || (i<j+1 && i>j-1)) && m>p+score ){
1875
+ m = p+score;
1876
+ d = 3 | score*4;
1877
+ }
1878
+ }
1879
+ p = a[i];
1880
+ a[i] = m;
1881
+ aM[j*(nRight+1)+i] = d;
1882
+ }
1883
+ }
1884
+
1885
+ /* Compute the lowest-cost path back through the matrix */
1886
+ i = nRight;
1887
+ j = nLeft;
1888
+ k = (nRight+1)*(nLeft+1)-1;
1889
+ nMatch = iMatch = 0;
1890
+ while( i+j>0 ){
1891
+ unsigned char c = aM[k];
1892
+ if( c>=3 ){
1893
+ assert( i>0 && j>0 );
1894
+ i--;
1895
+ j--;
1896
+ nMatch++;
1897
+ iMatch += (c>>2);
1898
+ aM[k] = 3;
1899
+ }else if( c==2 ){
1900
+ assert( i>0 );
1901
+ i--;
1902
+ }else{
1903
+ assert( j>0 );
1904
+ j--;
1905
+ }
1906
+ k--;
1907
+ aM[k] = aM[j*(nRight+1)+i];
1908
+ }
1909
+ k++;
1910
+ i = (nRight+1)*(nLeft+1) - k;
1911
+ memmove(aM, &aM[k], i);
1912
+ *pNResult = i;
1913
+
1914
+ /* If:
1915
+ ** (1) the alignment is more than 25% longer than the longest side, and
1916
+ ** (2) the average match cost exceeds 15
1917
+ ** Then this is probably an alignment that will be difficult for humans
1918
+ ** to read. So instead, just show all of the right side inserted followed
1919
+ ** by all of the left side deleted.
1920
+ **
1921
+ ** The coefficients for conditions (1) and (2) above are determined by
1922
+ ** experimentation.
1923
+ */
1924
+ mxLen = nLeft>nRight ? nLeft : nRight;
1925
+ if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1926
+ memset(aM, 4, mnLen); *pNResult = mnLen;
1927
+ if( nLeft>mnLen ){ memset(aM+mnLen, 1, nLeft-mnLen); *pNResult = nLeft; }
1928
+ if( nRight>mnLen ){ memset(aM+mnLen, 2, nRight-mnLen); *pNResult = nRight; }
1929
+ }
1930
+
1931
+ /* Return the result */
1932
+ fossil_free(pToFree);
1933
+ return aM;
1934
+}
18861935
18871936
/*
18881937
** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a
18891938
** pair of adjacent differences. Return true if the gap between these
18901939
** two differences is so small that they should be rendered as a single
@@ -1993,16 +2042,17 @@
19932042
a += m;
19942043
b += m;
19952044
19962045
/* Show the differences */
19972046
for(i=0; i<nr; i++){
2047
+ int nAlign;
19982048
unsigned char *alignment;
19992049
ma = R[r+i*3+1]; /* Lines on left but not on right */
20002050
mb = R[r+i*3+2]; /* Lines on right but not on left */
20012051
20022052
/* Try to find an alignment for the lines within this one block */
2003
- alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags);
2053
+ alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign);
20042054
20052055
/* If we could not get a good alignment, try merging the current
20062056
** block with subsequent blocks, if the subsequent blocks are
20072057
** nearby */
20082058
while( alignment[0]==4 && i<nr-1 && smallGap(&R[r+i*3]) ){
@@ -2009,14 +2059,15 @@
20092059
i++;
20102060
m = R[r+i*3];
20112061
ma += R[r+i*3+1] + m;
20122062
mb += R[r+i*3+2] + m;
20132063
fossil_free(alignment);
2014
- alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags);
2064
+ alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags,&nAlign);
20152065
}
20162066
20172067
for(j=0; ma+mb>0; j++){
2068
+ assert( j<nAlign );
20182069
switch( alignment[j] ){
20192070
case 1: {
20202071
/* Delete one line from the left */
20212072
pBuilder->xDelete(pBuilder, &A[a]);
20222073
ma--;
@@ -2050,10 +2101,11 @@
20502101
b++;
20512102
break;
20522103
}
20532104
}
20542105
}
2106
+ assert( nAlign==j );
20552107
fossil_free(alignment);
20562108
if( i<nr-1 ){
20572109
m = R[r+i*3+3];
20582110
for(j=0; j<m; j++){
20592111
pBuilder->xCommon(pBuilder, &A[a+j]);
20602112
--- src/diff.c
+++ src/diff.c
@@ -835,245 +835,10 @@
835 /*
836 ** Minimum of two values
837 */
838 static int minInt(int a, int b){ return a<b ? a : b; }
839
840 /*
841 ** Return the number between 0 and 100 that is smaller the closer pA and
842 ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
843 ** completely different.
844 **
845 ** The current algorithm is as follows:
846 **
847 ** (1) Remove leading and trailing whitespace.
848 ** (2) Truncate both strings to at most 250 characters
849 ** (3) Find the length of the longest common subsequence
850 ** (4) Longer common subsequences yield lower scores.
851 */
852 static int match_dline(const DLine *pA, const DLine *pB){
853 const char *zA; /* Left string */
854 const char *zB; /* right string */
855 int nA; /* Bytes in zA[] */
856 int nB; /* Bytes in zB[] */
857 int avg; /* Average length of A and B */
858 int i, j, k; /* Loop counters */
859 int best = 0; /* Longest match found so far */
860 int score; /* Final score. 0..100 */
861 unsigned char c; /* Character being examined */
862 unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */
863 unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */
864
865 zA = pA->z;
866 zB = pB->z;
867 nA = pA->n;
868 nB = pB->n;
869 while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
870 while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
871 while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
872 while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
873 if( nA>250 ) nA = 250;
874 if( nB>250 ) nB = 250;
875 avg = (nA+nB)/2;
876 if( avg==0 ) return 0;
877 if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0;
878 memset(aFirst, 0xff, sizeof(aFirst));
879 zA--; zB--; /* Make both zA[] and zB[] 1-indexed */
880 for(i=nB; i>0; i--){
881 c = (unsigned char)zB[i];
882 aNext[i] = aFirst[c];
883 aFirst[c] = i;
884 }
885 best = 0;
886 for(i=1; i<=nA-best; i++){
887 c = (unsigned char)zA[i];
888 for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){
889 int limit = minInt(nA-i, nB-j);
890 for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){}
891 if( k>best ) best = k;
892 }
893 }
894 score = (best>avg) ? 0 : (avg - best)*100/avg;
895
896 #if 0
897 fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n",
898 nA, zA+1, nB, zB+1, best, avg, score);
899 #endif
900
901 /* Return the result */
902 return score;
903 }
904
905 /*
906 ** COMMAND: test-line-match
907 ** Usage: %fossil test-line-match STRING1 STRING2
908 **
909 ** Return a score from 0 to 100 that is how similar STRING1 is to
910 ** STRING2. Smaller numbers mean more similar. 0 is an exact match.
911 **
912 ** This command is used to test to match_dline() function in the
913 ** internal Fossil diff logic.
914 */
915 void test_dline_match(void){
916 DLine a, b;
917 int x;
918 if( g.argc!=4 ) usage("STRING1 STRING2");
919 a.z = g.argv[2];
920 a.n = (int)strlen(a.z);
921 b.z = g.argv[3];
922 b.n = (int)strlen(b.z);
923 x = match_dline(&a, &b);
924 fossil_print("%d\n", x);
925 }
926
927 /*
928 ** There is a change block in which nLeft lines of text on the left are
929 ** converted into nRight lines of text on the right. This routine computes
930 ** how the lines on the left line up with the lines on the right.
931 **
932 ** The return value is a buffer of unsigned characters, obtained from
933 ** fossil_malloc(). (The caller needs to free the return value using
934 ** fossil_free().) Entries in the returned array have values as follows:
935 **
936 ** 1. Delete the next line of pLeft.
937 ** 2. Insert the next line of pRight.
938 ** 3. The next line of pLeft changes into the next line of pRight.
939 ** 4. Delete one line from pLeft and add one line to pRight.
940 **
941 ** The (4) case only happens when we cannot get a reasonable alignment between
942 ** the two blocks. If any return value is (4), then all return values will
943 ** be (4).
944 **
945 ** Algorithm: Wagner's minimum edit-distance algorithm, modified by
946 ** adding a cost to each match based on how well the two rows match
947 ** each other. Insertion and deletion costs are 50. Match costs
948 ** are between 0 and 100 where 0 is a perfect match 100 is a complete
949 ** mismatch.
950 */
951 static unsigned char *diffBlockAlignment(
952 const DLine *aLeft, int nLeft, /* Text on the left */
953 const DLine *aRight, int nRight, /* Text on the right */
954 u64 diffFlags /* Flags passed into the original diff */
955 ){
956 int i, j, k; /* Loop counters */
957 int *a; /* One row of the Wagner matrix */
958 int *pToFree; /* Space that needs to be freed */
959 unsigned char *aM; /* Wagner result matrix */
960 int nMatch, iMatch; /* Number of matching lines and match score */
961 int mnLen; /* minInt(nLeft, nRight) */
962 int mxLen; /* MAX(nLeft, nRight) */
963 int aBuf[100]; /* Stack space for a[] if nRight not to big */
964
965 if( nLeft==0 ){
966 aM = fossil_malloc( nLeft + nRight + 2 );
967 memset(aM, 2, nRight);
968 return aM;
969 }
970 if( nRight==0 ){
971 aM = fossil_malloc( nLeft + nRight + 2 );
972 memset(aM, 1, nLeft);
973 return aM;
974 }
975
976 /* This algorithm is O(N**2). So if N is too big, bail out with a
977 ** simple (but stupid and ugly) result that doesn't take too long. */
978 mnLen = nLeft<nRight ? nLeft : nRight;
979 if( nLeft*nRight>100000 && (diffFlags & DIFF_SLOW_SBS)==0 ){
980 aM = fossil_malloc( nLeft + nRight + 2 );
981 memset(aM, 4, mnLen);
982 if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen);
983 if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen);
984 return aM;
985 }
986
987 if( nRight < count(aBuf)-1 ){
988 pToFree = 0;
989 a = aBuf;
990 }else{
991 a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
992 }
993 aM = fossil_malloc( (nLeft+1)*(nRight+1) );
994
995
996 /* Compute the best alignment */
997 for(i=0; i<=nRight; i++){
998 aM[i] = 2;
999 a[i] = i*50;
1000 }
1001 aM[0] = 0;
1002 for(j=1; j<=nLeft; j++){
1003 int p = a[0];
1004 a[0] = p+50;
1005 aM[j*(nRight+1)] = 1;
1006 for(i=1; i<=nRight; i++){
1007 int m = a[i-1]+50;
1008 int d = 2;
1009 if( m>a[i]+50 ){
1010 m = a[i]+50;
1011 d = 1;
1012 }
1013 if( m>p ){
1014 int score = match_dline(&aLeft[j-1], &aRight[i-1]);
1015 if( (score<=63 || (i<j+1 && i>j-1)) && m>p+score ){
1016 m = p+score;
1017 d = 3 | score*4;
1018 }
1019 }
1020 p = a[i];
1021 a[i] = m;
1022 aM[j*(nRight+1)+i] = d;
1023 }
1024 }
1025
1026 /* Compute the lowest-cost path back through the matrix */
1027 i = nRight;
1028 j = nLeft;
1029 k = (nRight+1)*(nLeft+1)-1;
1030 nMatch = iMatch = 0;
1031 while( i+j>0 ){
1032 unsigned char c = aM[k];
1033 if( c>=3 ){
1034 assert( i>0 && j>0 );
1035 i--;
1036 j--;
1037 nMatch++;
1038 iMatch += (c>>2);
1039 aM[k] = 3;
1040 }else if( c==2 ){
1041 assert( i>0 );
1042 i--;
1043 }else{
1044 assert( j>0 );
1045 j--;
1046 }
1047 k--;
1048 aM[k] = aM[j*(nRight+1)+i];
1049 }
1050 k++;
1051 i = (nRight+1)*(nLeft+1) - k;
1052 memmove(aM, &aM[k], i);
1053
1054 /* If:
1055 ** (1) the alignment is more than 25% longer than the longest side, and
1056 ** (2) the average match cost exceeds 15
1057 ** Then this is probably an alignment that will be difficult for humans
1058 ** to read. So instead, just show all of the right side inserted followed
1059 ** by all of the left side deleted.
1060 **
1061 ** The coefficients for conditions (1) and (2) above are determined by
1062 ** experimentation.
1063 */
1064 mxLen = nLeft>nRight ? nLeft : nRight;
1065 if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1066 memset(aM, 4, mnLen);
1067 if( nLeft>mnLen ) memset(aM+mnLen, 1, nLeft-mnLen);
1068 if( nRight>mnLen ) memset(aM+mnLen, 2, nRight-mnLen);
1069 }
1070
1071 /* Return the result */
1072 fossil_free(pToFree);
1073 return aM;
1074 }
1075
1076
1077 /*
1078 ** This is an abstract superclass for an object that accepts difference
1079 ** lines and formats them for display. Subclasses of this object format
@@ -1881,10 +1646,294 @@
1881 p->width = diff_width(diffFlags);
1882 p->pOut = pOut;
1883 return p;
1884 }
1885 /****************************************************************************/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1886
1887 /*
1888 ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a
1889 ** pair of adjacent differences. Return true if the gap between these
1890 ** two differences is so small that they should be rendered as a single
@@ -1993,16 +2042,17 @@
1993 a += m;
1994 b += m;
1995
1996 /* Show the differences */
1997 for(i=0; i<nr; i++){
 
1998 unsigned char *alignment;
1999 ma = R[r+i*3+1]; /* Lines on left but not on right */
2000 mb = R[r+i*3+2]; /* Lines on right but not on left */
2001
2002 /* Try to find an alignment for the lines within this one block */
2003 alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags);
2004
2005 /* If we could not get a good alignment, try merging the current
2006 ** block with subsequent blocks, if the subsequent blocks are
2007 ** nearby */
2008 while( alignment[0]==4 && i<nr-1 && smallGap(&R[r+i*3]) ){
@@ -2009,14 +2059,15 @@
2009 i++;
2010 m = R[r+i*3];
2011 ma += R[r+i*3+1] + m;
2012 mb += R[r+i*3+2] + m;
2013 fossil_free(alignment);
2014 alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags);
2015 }
2016
2017 for(j=0; ma+mb>0; j++){
 
2018 switch( alignment[j] ){
2019 case 1: {
2020 /* Delete one line from the left */
2021 pBuilder->xDelete(pBuilder, &A[a]);
2022 ma--;
@@ -2050,10 +2101,11 @@
2050 b++;
2051 break;
2052 }
2053 }
2054 }
 
2055 fossil_free(alignment);
2056 if( i<nr-1 ){
2057 m = R[r+i*3+3];
2058 for(j=0; j<m; j++){
2059 pBuilder->xCommon(pBuilder, &A[a+j]);
2060
--- src/diff.c
+++ src/diff.c
@@ -835,245 +835,10 @@
835 /*
836 ** Minimum of two values
837 */
838 static int minInt(int a, int b){ return a<b ? a : b; }
839
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
840
841
842 /*
843 ** This is an abstract superclass for an object that accepts difference
844 ** lines and formats them for display. Subclasses of this object format
@@ -1881,10 +1646,294 @@
1646 p->width = diff_width(diffFlags);
1647 p->pOut = pOut;
1648 return p;
1649 }
1650 /****************************************************************************/
1651 /*
1652 ** Return the number between 0 and 100 that is smaller the closer pA and
1653 ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
1654 ** completely different.
1655 **
1656 ** The current algorithm is as follows:
1657 **
1658 ** (1) Remove leading and trailing whitespace.
1659 ** (2) Truncate both strings to at most 250 characters
1660 ** (3) Find the length of the longest common subsequence
1661 ** (4) Longer common subsequences yield lower scores.
1662 */
1663 static int match_dline(const DLine *pA, const DLine *pB){
1664 const char *zA; /* Left string */
1665 const char *zB; /* right string */
1666 int nA; /* Bytes in zA[] */
1667 int nB; /* Bytes in zB[] */
1668 int avg; /* Average length of A and B */
1669 int i, j, k; /* Loop counters */
1670 int best = 0; /* Longest match found so far */
1671 int score; /* Final score. 0..100 */
1672 unsigned char c; /* Character being examined */
1673 unsigned char aFirst[256]; /* aFirst[X] = index in zB[] of first char X */
1674 unsigned char aNext[252]; /* aNext[i] = index in zB[] of next zB[i] char */
1675
1676 zA = pA->z;
1677 zB = pB->z;
1678 nA = pA->n;
1679 nB = pB->n;
1680 while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; }
1681 while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
1682 while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
1683 while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
1684 if( nA>250 ) nA = 250;
1685 if( nB>250 ) nB = 250;
1686 avg = (nA+nB)/2;
1687 if( avg==0 ) return 0;
1688 if( nA==nB && memcmp(zA, zB, nA)==0 ) return 0;
1689 memset(aFirst, 0xff, sizeof(aFirst));
1690 zA--; zB--; /* Make both zA[] and zB[] 1-indexed */
1691 for(i=nB; i>0; i--){
1692 c = (unsigned char)zB[i];
1693 aNext[i] = aFirst[c];
1694 aFirst[c] = i;
1695 }
1696 best = 0;
1697 for(i=1; i<=nA-best; i++){
1698 c = (unsigned char)zA[i];
1699 for(j=aFirst[c]; j<nB-best && memcmp(&zA[i],&zB[j],best)==0; j = aNext[j]){
1700 int limit = minInt(nA-i, nB-j);
1701 for(k=best; k<=limit && zA[k+i]==zB[k+j]; k++){}
1702 if( k>best ) best = k;
1703 }
1704 }
1705 score = (best>avg) ? 0 : (avg - best)*100/avg;
1706
1707 #if 0
1708 fprintf(stderr, "A: [%.*s]\nB: [%.*s]\nbest=%d avg=%d score=%d\n",
1709 nA, zA+1, nB, zB+1, best, avg, score);
1710 #endif
1711
1712 /* Return the result */
1713 return score;
1714 }
1715
1716 /*
1717 ** COMMAND: test-line-match
1718 ** Usage: %fossil test-line-match STRING1 STRING2
1719 **
1720 ** Return a score from 0 to 100 that is how similar STRING1 is to
1721 ** STRING2. Smaller numbers mean more similar. 0 is an exact match.
1722 **
1723 ** This command is used to test to match_dline() function in the
1724 ** internal Fossil diff logic.
1725 */
1726 void test_dline_match(void){
1727 DLine a, b;
1728 int x;
1729 if( g.argc!=4 ) usage("STRING1 STRING2");
1730 a.z = g.argv[2];
1731 a.n = (int)strlen(a.z);
1732 b.z = g.argv[3];
1733 b.n = (int)strlen(b.z);
1734 x = match_dline(&a, &b);
1735 fossil_print("%d\n", x);
1736 }
1737
1738 /*
1739 ** There is a change block in which nLeft lines of text on the left are
1740 ** converted into nRight lines of text on the right. This routine computes
1741 ** how the lines on the left line up with the lines on the right.
1742 **
1743 ** The return value is a buffer of unsigned characters, obtained from
1744 ** fossil_malloc(). (The caller needs to free the return value using
1745 ** fossil_free().) Entries in the returned array have values as follows:
1746 **
1747 ** 1. Delete the next line of pLeft.
1748 ** 2. Insert the next line of pRight.
1749 ** 3. The next line of pLeft changes into the next line of pRight.
1750 ** 4. Delete one line from pLeft and add one line to pRight.
1751 **
1752 ** The length of the returned array will be at most nLeft+nRight bytes.
1753 ** If the first bytes is 4, that means we could not compute reasonable
1754 ** alignment between the two blocks.
1755 **
1756 ** Algorithm: Wagner's minimum edit-distance algorithm, modified by
1757 ** adding a cost to each match based on how well the two rows match
1758 ** each other. Insertion and deletion costs are 50. Match costs
1759 ** are between 0 and 100 where 0 is a perfect match 100 is a complete
1760 ** mismatch.
1761 */
1762 static unsigned char *diffBlockAlignment(
1763 const DLine *aLeft, int nLeft, /* Text on the left */
1764 const DLine *aRight, int nRight, /* Text on the right */
1765 u64 diffFlags, /* Flags passed into the original diff */
1766 int *pNResult /* OUTPUT: Bytes of result */
1767 ){
1768 int i, j, k; /* Loop counters */
1769 int *a; /* One row of the Wagner matrix */
1770 int *pToFree; /* Space that needs to be freed */
1771 unsigned char *aM; /* Wagner result matrix */
1772 int nMatch, iMatch; /* Number of matching lines and match score */
1773 int mnLen; /* minInt(nLeft, nRight) */
1774 int mxLen; /* MAX(nLeft, nRight) */
1775 int aBuf[100]; /* Stack space for a[] if nRight not to big */
1776
1777 if( nLeft==0 ){
1778 aM = fossil_malloc( nRight + 2 );
1779 memset(aM, 2, nRight);
1780 *pNResult = nRight;
1781 return aM;
1782 }
1783 if( nRight==0 ){
1784 aM = fossil_malloc( nLeft + 2 );
1785 memset(aM, 1, nLeft);
1786 *pNResult = nLeft;
1787 return aM;
1788 }
1789
1790 /* For large alignments, use a divide and conquer algorithm that is
1791 ** O(NlogN). The result is not as precise, but this whole thing is an
1792 ** approximation anyhow, and the faster response time is an acceptable
1793 ** trade-off for reduced precision.
1794 */
1795 mnLen = nLeft<nRight ? nLeft : nRight;
1796 if( nLeft*nRight>1000 && (diffFlags & DIFF_SLOW_SBS)==0 ){
1797 const DLine *aSmall; /* The smaller of aLeft and aRight */
1798 const DLine *aBig; /* The larger of aLeft and aRight */
1799 int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */
1800 int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */
1801 int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */
1802 unsigned char *a1, *a2; /* Results of the alignments on two halves */
1803 int n1, n2; /* Number of entries in a1 and a2 */
1804 int score, bestScore; /* Score and best score seen so far */
1805 if( nLeft>nRight ){
1806 aSmall = aRight;
1807 nSmall = nRight;
1808 aBig = aLeft;
1809 nBig = nLeft;
1810 }else{
1811 aSmall = aLeft;
1812 nSmall = nLeft;
1813 aBig = aRight;
1814 nBig = nRight;
1815 }
1816 iDivBig = nBig/2;
1817 bestScore = 10000;
1818 for(i=0; i<nSmall; i++){
1819 score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2;
1820 if( score<bestScore ){
1821 bestScore = score;
1822 iDivSmall = i;
1823 }
1824 }
1825 if( aSmall==aRight ){
1826 iDivRight = iDivSmall;
1827 iDivLeft = iDivBig;
1828 }else{
1829 iDivRight = iDivBig;
1830 iDivLeft = iDivSmall;
1831 }
1832 a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,diffFlags,&n1);
1833 a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft,
1834 aRight+iDivRight, nRight-iDivRight,
1835 diffFlags, &n2);
1836 a1 = fossil_realloc(a1, n1+n2 );
1837 memcpy(a1+n1,a2,n2);
1838 fossil_free(a2);
1839 *pNResult = n1+n2;
1840 return a1;
1841 }
1842
1843 /* If we reach this point, we will be doing an O(N*N) Wagner minimum
1844 ** edit distance to compute the alignment.
1845 */
1846 if( nRight < count(aBuf)-1 ){
1847 pToFree = 0;
1848 a = aBuf;
1849 }else{
1850 a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
1851 }
1852 aM = fossil_malloc( (nLeft+1)*(nRight+1) );
1853
1854
1855 /* Compute the best alignment */
1856 for(i=0; i<=nRight; i++){
1857 aM[i] = 2;
1858 a[i] = i*50;
1859 }
1860 aM[0] = 0;
1861 for(j=1; j<=nLeft; j++){
1862 int p = a[0];
1863 a[0] = p+50;
1864 aM[j*(nRight+1)] = 1;
1865 for(i=1; i<=nRight; i++){
1866 int m = a[i-1]+50;
1867 int d = 2;
1868 if( m>a[i]+50 ){
1869 m = a[i]+50;
1870 d = 1;
1871 }
1872 if( m>p ){
1873 int score = match_dline(&aLeft[j-1], &aRight[i-1]);
1874 if( (score<=63 || (i<j+1 && i>j-1)) && m>p+score ){
1875 m = p+score;
1876 d = 3 | score*4;
1877 }
1878 }
1879 p = a[i];
1880 a[i] = m;
1881 aM[j*(nRight+1)+i] = d;
1882 }
1883 }
1884
1885 /* Compute the lowest-cost path back through the matrix */
1886 i = nRight;
1887 j = nLeft;
1888 k = (nRight+1)*(nLeft+1)-1;
1889 nMatch = iMatch = 0;
1890 while( i+j>0 ){
1891 unsigned char c = aM[k];
1892 if( c>=3 ){
1893 assert( i>0 && j>0 );
1894 i--;
1895 j--;
1896 nMatch++;
1897 iMatch += (c>>2);
1898 aM[k] = 3;
1899 }else if( c==2 ){
1900 assert( i>0 );
1901 i--;
1902 }else{
1903 assert( j>0 );
1904 j--;
1905 }
1906 k--;
1907 aM[k] = aM[j*(nRight+1)+i];
1908 }
1909 k++;
1910 i = (nRight+1)*(nLeft+1) - k;
1911 memmove(aM, &aM[k], i);
1912 *pNResult = i;
1913
1914 /* If:
1915 ** (1) the alignment is more than 25% longer than the longest side, and
1916 ** (2) the average match cost exceeds 15
1917 ** Then this is probably an alignment that will be difficult for humans
1918 ** to read. So instead, just show all of the right side inserted followed
1919 ** by all of the left side deleted.
1920 **
1921 ** The coefficients for conditions (1) and (2) above are determined by
1922 ** experimentation.
1923 */
1924 mxLen = nLeft>nRight ? nLeft : nRight;
1925 if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1926 memset(aM, 4, mnLen); *pNResult = mnLen;
1927 if( nLeft>mnLen ){ memset(aM+mnLen, 1, nLeft-mnLen); *pNResult = nLeft; }
1928 if( nRight>mnLen ){ memset(aM+mnLen, 2, nRight-mnLen); *pNResult = nRight; }
1929 }
1930
1931 /* Return the result */
1932 fossil_free(pToFree);
1933 return aM;
1934 }
1935
1936 /*
1937 ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a
1938 ** pair of adjacent differences. Return true if the gap between these
1939 ** two differences is so small that they should be rendered as a single
@@ -1993,16 +2042,17 @@
2042 a += m;
2043 b += m;
2044
2045 /* Show the differences */
2046 for(i=0; i<nr; i++){
2047 int nAlign;
2048 unsigned char *alignment;
2049 ma = R[r+i*3+1]; /* Lines on left but not on right */
2050 mb = R[r+i*3+2]; /* Lines on right but not on left */
2051
2052 /* Try to find an alignment for the lines within this one block */
2053 alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign);
2054
2055 /* If we could not get a good alignment, try merging the current
2056 ** block with subsequent blocks, if the subsequent blocks are
2057 ** nearby */
2058 while( alignment[0]==4 && i<nr-1 && smallGap(&R[r+i*3]) ){
@@ -2009,14 +2059,15 @@
2059 i++;
2060 m = R[r+i*3];
2061 ma += R[r+i*3+1] + m;
2062 mb += R[r+i*3+2] + m;
2063 fossil_free(alignment);
2064 alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags,&nAlign);
2065 }
2066
2067 for(j=0; ma+mb>0; j++){
2068 assert( j<nAlign );
2069 switch( alignment[j] ){
2070 case 1: {
2071 /* Delete one line from the left */
2072 pBuilder->xDelete(pBuilder, &A[a]);
2073 ma--;
@@ -2050,10 +2101,11 @@
2101 b++;
2102 break;
2103 }
2104 }
2105 }
2106 assert( nAlign==j );
2107 fossil_free(alignment);
2108 if( i<nr-1 ){
2109 m = R[r+i*3+3];
2110 for(j=0; j<m; j++){
2111 pBuilder->xCommon(pBuilder, &A[a+j]);
2112

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button