Fossil SCM

Further improvements to diff alignment.

drh 2021-09-05 20:54 trunk
Commit e5b1c70e2a6e0434eff9caa76cc8b268b8fb698b1a34efc15c2b2116aef1261a
1 file changed +13 -31
+13 -31
--- src/diff.c
+++ src/diff.c
@@ -1775,12 +1775,10 @@
17751775
int i, j, k; /* Loop counters */
17761776
int *a; /* One row of the Wagner matrix */
17771777
int *pToFree; /* Space that needs to be freed */
17781778
unsigned char *aM; /* Wagner result matrix */
17791779
int nMatch, iMatch; /* Number of matching lines and match score */
1780
- int mnLen; /* minInt(nLeft, nRight) */
1781
- int mxLen; /* MAX(nLeft, nRight) */
17821780
int aBuf[100]; /* Stack space for a[] if nRight not to big */
17831781
17841782
if( nLeft==0 ){
17851783
aM = fossil_malloc( nRight + 2 );
17861784
memset(aM, 2, nRight);
@@ -1797,11 +1795,10 @@
17971795
/* For large alignments, use a divide and conquer algorithm that is
17981796
** O(NlogN). The result is not as precise, but this whole thing is an
17991797
** approximation anyhow, and the faster response time is an acceptable
18001798
** trade-off for reduced precision.
18011799
*/
1802
- mnLen = nLeft<nRight ? nLeft : nRight;
18031800
if( nLeft*nRight>DIFF_ALIGN_MX && (diffFlags & DIFF_SLOW_SBS)==0 ){
18041801
const DLine *aSmall; /* The smaller of aLeft and aRight */
18051802
const DLine *aBig; /* The larger of aLeft and aRight */
18061803
int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */
18071804
int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */
@@ -1917,27 +1914,10 @@
19171914
k++;
19181915
i = (nRight+1)*(nLeft+1) - k;
19191916
memmove(aM, &aM[k], i);
19201917
*pNResult = i;
19211918
1922
- /* If:
1923
- ** (1) the alignment is more than 25% longer than the longest side, and
1924
- ** (2) the average match cost exceeds 15
1925
- ** Then this is probably an alignment that will be difficult for humans
1926
- ** to read. So instead, just show all of the right side inserted followed
1927
- ** by all of the left side deleted.
1928
- **
1929
- ** The coefficients for conditions (1) and (2) above are determined by
1930
- ** experimentation.
1931
- */
1932
- mxLen = nLeft>nRight ? nLeft : nRight;
1933
- if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1934
- memset(aM, 4, mnLen); *pNResult = mnLen;
1935
- if( nLeft>mnLen ){ memset(aM+mnLen, 1, nLeft-mnLen); *pNResult = nLeft; }
1936
- if( nRight>mnLen ){ memset(aM+mnLen, 2, nRight-mnLen); *pNResult = nRight; }
1937
- }
1938
-
19391919
/* Return the result */
19401920
fossil_free(pToFree);
19411921
return aM;
19421922
}
19431923
@@ -1945,12 +1925,16 @@
19451925
** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a
19461926
** pair of adjacent differences. Return true if the gap between these
19471927
** two differences is so small that they should be rendered as a single
19481928
** edit.
19491929
*/
1950
-static int smallGap(const int *R){
1951
- return R[3]<=2 || R[3]<=(R[1]+R[2]+R[4]+R[5])/8;
1930
+static int smallGap(const int *R, int ma, int mb){
1931
+ int m = R[3];
1932
+ ma += R[4] + m;
1933
+ mb += R[5] + m;
1934
+ if( ma*mb>DIFF_ALIGN_MX ) return 0;
1935
+ return m<=2 || m<=(R[1]+R[2]+R[4]+R[5])/8;
19521936
}
19531937
19541938
/*
19551939
** Format a diff using a DiffBuilder object
19561940
*/
@@ -2055,25 +2039,23 @@
20552039
int nAlign;
20562040
unsigned char *alignment;
20572041
ma = R[r+i*3+1]; /* Lines on left but not on right */
20582042
mb = R[r+i*3+2]; /* Lines on right but not on left */
20592043
2060
- /* Try to find an alignment for the lines within this one block */
2061
- alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign);
2062
-
2063
- /* If we could not get a good alignment, try merging the current
2064
- ** block with subsequent blocks, if the subsequent blocks are
2065
- ** nearby */
2066
- while( alignment[0]==4 && i<nr-1 && smallGap(&R[r+i*3]) ){
2044
+ /* Try merging the current block with subsequent blocks, if the
2045
+ ** subsequent blocks are nearby and there result isn't too big.
2046
+ */
2047
+ while( i<nr-1 && smallGap(&R[r+i*3],ma,mb) ){
20672048
i++;
20682049
m = R[r+i*3];
20692050
ma += R[r+i*3+1] + m;
20702051
mb += R[r+i*3+2] + m;
2071
- fossil_free(alignment);
2072
- alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags,&nAlign);
20732052
}
20742053
2054
+ /* Try to find an alignment for the lines within this one block */
2055
+ alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign);
2056
+
20752057
for(j=0; ma+mb>0; j++){
20762058
assert( j<nAlign );
20772059
switch( alignment[j] ){
20782060
case 1: {
20792061
/* Delete one line from the left */
20802062
--- src/diff.c
+++ src/diff.c
@@ -1775,12 +1775,10 @@
1775 int i, j, k; /* Loop counters */
1776 int *a; /* One row of the Wagner matrix */
1777 int *pToFree; /* Space that needs to be freed */
1778 unsigned char *aM; /* Wagner result matrix */
1779 int nMatch, iMatch; /* Number of matching lines and match score */
1780 int mnLen; /* minInt(nLeft, nRight) */
1781 int mxLen; /* MAX(nLeft, nRight) */
1782 int aBuf[100]; /* Stack space for a[] if nRight not to big */
1783
1784 if( nLeft==0 ){
1785 aM = fossil_malloc( nRight + 2 );
1786 memset(aM, 2, nRight);
@@ -1797,11 +1795,10 @@
1797 /* For large alignments, use a divide and conquer algorithm that is
1798 ** O(NlogN). The result is not as precise, but this whole thing is an
1799 ** approximation anyhow, and the faster response time is an acceptable
1800 ** trade-off for reduced precision.
1801 */
1802 mnLen = nLeft<nRight ? nLeft : nRight;
1803 if( nLeft*nRight>DIFF_ALIGN_MX && (diffFlags & DIFF_SLOW_SBS)==0 ){
1804 const DLine *aSmall; /* The smaller of aLeft and aRight */
1805 const DLine *aBig; /* The larger of aLeft and aRight */
1806 int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */
1807 int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */
@@ -1917,27 +1914,10 @@
1917 k++;
1918 i = (nRight+1)*(nLeft+1) - k;
1919 memmove(aM, &aM[k], i);
1920 *pNResult = i;
1921
1922 /* If:
1923 ** (1) the alignment is more than 25% longer than the longest side, and
1924 ** (2) the average match cost exceeds 15
1925 ** Then this is probably an alignment that will be difficult for humans
1926 ** to read. So instead, just show all of the right side inserted followed
1927 ** by all of the left side deleted.
1928 **
1929 ** The coefficients for conditions (1) and (2) above are determined by
1930 ** experimentation.
1931 */
1932 mxLen = nLeft>nRight ? nLeft : nRight;
1933 if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1934 memset(aM, 4, mnLen); *pNResult = mnLen;
1935 if( nLeft>mnLen ){ memset(aM+mnLen, 1, nLeft-mnLen); *pNResult = nLeft; }
1936 if( nRight>mnLen ){ memset(aM+mnLen, 2, nRight-mnLen); *pNResult = nRight; }
1937 }
1938
1939 /* Return the result */
1940 fossil_free(pToFree);
1941 return aM;
1942 }
1943
@@ -1945,12 +1925,16 @@
1945 ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a
1946 ** pair of adjacent differences. Return true if the gap between these
1947 ** two differences is so small that they should be rendered as a single
1948 ** edit.
1949 */
1950 static int smallGap(const int *R){
1951 return R[3]<=2 || R[3]<=(R[1]+R[2]+R[4]+R[5])/8;
 
 
 
 
1952 }
1953
1954 /*
1955 ** Format a diff using a DiffBuilder object
1956 */
@@ -2055,25 +2039,23 @@
2055 int nAlign;
2056 unsigned char *alignment;
2057 ma = R[r+i*3+1]; /* Lines on left but not on right */
2058 mb = R[r+i*3+2]; /* Lines on right but not on left */
2059
2060 /* Try to find an alignment for the lines within this one block */
2061 alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign);
2062
2063 /* If we could not get a good alignment, try merging the current
2064 ** block with subsequent blocks, if the subsequent blocks are
2065 ** nearby */
2066 while( alignment[0]==4 && i<nr-1 && smallGap(&R[r+i*3]) ){
2067 i++;
2068 m = R[r+i*3];
2069 ma += R[r+i*3+1] + m;
2070 mb += R[r+i*3+2] + m;
2071 fossil_free(alignment);
2072 alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags,&nAlign);
2073 }
2074
 
 
 
2075 for(j=0; ma+mb>0; j++){
2076 assert( j<nAlign );
2077 switch( alignment[j] ){
2078 case 1: {
2079 /* Delete one line from the left */
2080
--- src/diff.c
+++ src/diff.c
@@ -1775,12 +1775,10 @@
1775 int i, j, k; /* Loop counters */
1776 int *a; /* One row of the Wagner matrix */
1777 int *pToFree; /* Space that needs to be freed */
1778 unsigned char *aM; /* Wagner result matrix */
1779 int nMatch, iMatch; /* Number of matching lines and match score */
 
 
1780 int aBuf[100]; /* Stack space for a[] if nRight not to big */
1781
1782 if( nLeft==0 ){
1783 aM = fossil_malloc( nRight + 2 );
1784 memset(aM, 2, nRight);
@@ -1797,11 +1795,10 @@
1795 /* For large alignments, use a divide and conquer algorithm that is
1796 ** O(NlogN). The result is not as precise, but this whole thing is an
1797 ** approximation anyhow, and the faster response time is an acceptable
1798 ** trade-off for reduced precision.
1799 */
 
1800 if( nLeft*nRight>DIFF_ALIGN_MX && (diffFlags & DIFF_SLOW_SBS)==0 ){
1801 const DLine *aSmall; /* The smaller of aLeft and aRight */
1802 const DLine *aBig; /* The larger of aLeft and aRight */
1803 int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */
1804 int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */
@@ -1917,27 +1914,10 @@
1914 k++;
1915 i = (nRight+1)*(nLeft+1) - k;
1916 memmove(aM, &aM[k], i);
1917 *pNResult = i;
1918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1919 /* Return the result */
1920 fossil_free(pToFree);
1921 return aM;
1922 }
1923
@@ -1945,12 +1925,16 @@
1925 ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a
1926 ** pair of adjacent differences. Return true if the gap between these
1927 ** two differences is so small that they should be rendered as a single
1928 ** edit.
1929 */
1930 static int smallGap(const int *R, int ma, int mb){
1931 int m = R[3];
1932 ma += R[4] + m;
1933 mb += R[5] + m;
1934 if( ma*mb>DIFF_ALIGN_MX ) return 0;
1935 return m<=2 || m<=(R[1]+R[2]+R[4]+R[5])/8;
1936 }
1937
1938 /*
1939 ** Format a diff using a DiffBuilder object
1940 */
@@ -2055,25 +2039,23 @@
2039 int nAlign;
2040 unsigned char *alignment;
2041 ma = R[r+i*3+1]; /* Lines on left but not on right */
2042 mb = R[r+i*3+2]; /* Lines on right but not on left */
2043
2044 /* Try merging the current block with subsequent blocks, if the
2045 ** subsequent blocks are nearby and there result isn't too big.
2046 */
2047 while( i<nr-1 && smallGap(&R[r+i*3],ma,mb) ){
 
 
 
2048 i++;
2049 m = R[r+i*3];
2050 ma += R[r+i*3+1] + m;
2051 mb += R[r+i*3+2] + m;
 
 
2052 }
2053
2054 /* Try to find an alignment for the lines within this one block */
2055 alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign);
2056
2057 for(j=0; ma+mb>0; j++){
2058 assert( j<nAlign );
2059 switch( alignment[j] ){
2060 case 1: {
2061 /* Delete one line from the left */
2062

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button