Fossil SCM
Further improvements to diff alignment.
Commit
e5b1c70e2a6e0434eff9caa76cc8b268b8fb698b1a34efc15c2b2116aef1261a
Parent
f25a987bae16900…
1 file changed
+13
-31
+13
-31
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -1775,12 +1775,10 @@ | ||
| 1775 | 1775 | int i, j, k; /* Loop counters */ |
| 1776 | 1776 | int *a; /* One row of the Wagner matrix */ |
| 1777 | 1777 | int *pToFree; /* Space that needs to be freed */ |
| 1778 | 1778 | unsigned char *aM; /* Wagner result matrix */ |
| 1779 | 1779 | int nMatch, iMatch; /* Number of matching lines and match score */ |
| 1780 | - int mnLen; /* minInt(nLeft, nRight) */ | |
| 1781 | - int mxLen; /* MAX(nLeft, nRight) */ | |
| 1782 | 1780 | int aBuf[100]; /* Stack space for a[] if nRight not to big */ |
| 1783 | 1781 | |
| 1784 | 1782 | if( nLeft==0 ){ |
| 1785 | 1783 | aM = fossil_malloc( nRight + 2 ); |
| 1786 | 1784 | memset(aM, 2, nRight); |
| @@ -1797,11 +1795,10 @@ | ||
| 1797 | 1795 | /* For large alignments, use a divide and conquer algorithm that is |
| 1798 | 1796 | ** O(NlogN). The result is not as precise, but this whole thing is an |
| 1799 | 1797 | ** approximation anyhow, and the faster response time is an acceptable |
| 1800 | 1798 | ** trade-off for reduced precision. |
| 1801 | 1799 | */ |
| 1802 | - mnLen = nLeft<nRight ? nLeft : nRight; | |
| 1803 | 1800 | if( nLeft*nRight>DIFF_ALIGN_MX && (diffFlags & DIFF_SLOW_SBS)==0 ){ |
| 1804 | 1801 | const DLine *aSmall; /* The smaller of aLeft and aRight */ |
| 1805 | 1802 | const DLine *aBig; /* The larger of aLeft and aRight */ |
| 1806 | 1803 | int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ |
| 1807 | 1804 | int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ |
| @@ -1917,27 +1914,10 @@ | ||
| 1917 | 1914 | k++; |
| 1918 | 1915 | i = (nRight+1)*(nLeft+1) - k; |
| 1919 | 1916 | memmove(aM, &aM[k], i); |
| 1920 | 1917 | *pNResult = i; |
| 1921 | 1918 | |
| 1922 | - /* If: | |
| 1923 | - ** (1) the alignment is more than 25% longer than the longest side, and | |
| 1924 | - ** (2) the average match cost exceeds 15 | |
| 1925 | - ** Then this is probably an alignment that will be difficult for humans | |
| 1926 | - ** to read. So instead, just show all of the right side inserted followed | |
| 1927 | - ** by all of the left side deleted. | |
| 1928 | - ** | |
| 1929 | - ** The coefficients for conditions (1) and (2) above are determined by | |
| 1930 | - ** experimentation. | |
| 1931 | - */ | |
| 1932 | - mxLen = nLeft>nRight ? nLeft : nRight; | |
| 1933 | - if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){ | |
| 1934 | - memset(aM, 4, mnLen); *pNResult = mnLen; | |
| 1935 | - if( nLeft>mnLen ){ memset(aM+mnLen, 1, nLeft-mnLen); *pNResult = nLeft; } | |
| 1936 | - if( nRight>mnLen ){ memset(aM+mnLen, 2, nRight-mnLen); *pNResult = nRight; } | |
| 1937 | - } | |
| 1938 | - | |
| 1939 | 1919 | /* Return the result */ |
| 1940 | 1920 | fossil_free(pToFree); |
| 1941 | 1921 | return aM; |
| 1942 | 1922 | } |
| 1943 | 1923 | |
| @@ -1945,12 +1925,16 @@ | ||
| 1945 | 1925 | ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a |
| 1946 | 1926 | ** pair of adjacent differences. Return true if the gap between these |
| 1947 | 1927 | ** two differences is so small that they should be rendered as a single |
| 1948 | 1928 | ** edit. |
| 1949 | 1929 | */ |
| 1950 | -static int smallGap(const int *R){ | |
| 1951 | - return R[3]<=2 || R[3]<=(R[1]+R[2]+R[4]+R[5])/8; | |
| 1930 | +static int smallGap(const int *R, int ma, int mb){ | |
| 1931 | + int m = R[3]; | |
| 1932 | + ma += R[4] + m; | |
| 1933 | + mb += R[5] + m; | |
| 1934 | + if( ma*mb>DIFF_ALIGN_MX ) return 0; | |
| 1935 | + return m<=2 || m<=(R[1]+R[2]+R[4]+R[5])/8; | |
| 1952 | 1936 | } |
| 1953 | 1937 | |
| 1954 | 1938 | /* |
| 1955 | 1939 | ** Format a diff using a DiffBuilder object |
| 1956 | 1940 | */ |
| @@ -2055,25 +2039,23 @@ | ||
| 2055 | 2039 | int nAlign; |
| 2056 | 2040 | unsigned char *alignment; |
| 2057 | 2041 | ma = R[r+i*3+1]; /* Lines on left but not on right */ |
| 2058 | 2042 | mb = R[r+i*3+2]; /* Lines on right but not on left */ |
| 2059 | 2043 | |
| 2060 | - /* Try to find an alignment for the lines within this one block */ | |
| 2061 | - alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign); | |
| 2062 | - | |
| 2063 | - /* If we could not get a good alignment, try merging the current | |
| 2064 | - ** block with subsequent blocks, if the subsequent blocks are | |
| 2065 | - ** nearby */ | |
| 2066 | - while( alignment[0]==4 && i<nr-1 && smallGap(&R[r+i*3]) ){ | |
| 2044 | + /* Try merging the current block with subsequent blocks, if the | |
| 2045 | + ** subsequent blocks are nearby and there result isn't too big. | |
| 2046 | + */ | |
| 2047 | + while( i<nr-1 && smallGap(&R[r+i*3],ma,mb) ){ | |
| 2067 | 2048 | i++; |
| 2068 | 2049 | m = R[r+i*3]; |
| 2069 | 2050 | ma += R[r+i*3+1] + m; |
| 2070 | 2051 | mb += R[r+i*3+2] + m; |
| 2071 | - fossil_free(alignment); | |
| 2072 | - alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags,&nAlign); | |
| 2073 | 2052 | } |
| 2074 | 2053 | |
| 2054 | + /* Try to find an alignment for the lines within this one block */ | |
| 2055 | + alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign); | |
| 2056 | + | |
| 2075 | 2057 | for(j=0; ma+mb>0; j++){ |
| 2076 | 2058 | assert( j<nAlign ); |
| 2077 | 2059 | switch( alignment[j] ){ |
| 2078 | 2060 | case 1: { |
| 2079 | 2061 | /* Delete one line from the left */ |
| 2080 | 2062 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -1775,12 +1775,10 @@ | |
| 1775 | int i, j, k; /* Loop counters */ |
| 1776 | int *a; /* One row of the Wagner matrix */ |
| 1777 | int *pToFree; /* Space that needs to be freed */ |
| 1778 | unsigned char *aM; /* Wagner result matrix */ |
| 1779 | int nMatch, iMatch; /* Number of matching lines and match score */ |
| 1780 | int mnLen; /* minInt(nLeft, nRight) */ |
| 1781 | int mxLen; /* MAX(nLeft, nRight) */ |
| 1782 | int aBuf[100]; /* Stack space for a[] if nRight not to big */ |
| 1783 | |
| 1784 | if( nLeft==0 ){ |
| 1785 | aM = fossil_malloc( nRight + 2 ); |
| 1786 | memset(aM, 2, nRight); |
| @@ -1797,11 +1795,10 @@ | |
| 1797 | /* For large alignments, use a divide and conquer algorithm that is |
| 1798 | ** O(NlogN). The result is not as precise, but this whole thing is an |
| 1799 | ** approximation anyhow, and the faster response time is an acceptable |
| 1800 | ** trade-off for reduced precision. |
| 1801 | */ |
| 1802 | mnLen = nLeft<nRight ? nLeft : nRight; |
| 1803 | if( nLeft*nRight>DIFF_ALIGN_MX && (diffFlags & DIFF_SLOW_SBS)==0 ){ |
| 1804 | const DLine *aSmall; /* The smaller of aLeft and aRight */ |
| 1805 | const DLine *aBig; /* The larger of aLeft and aRight */ |
| 1806 | int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ |
| 1807 | int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ |
| @@ -1917,27 +1914,10 @@ | |
| 1917 | k++; |
| 1918 | i = (nRight+1)*(nLeft+1) - k; |
| 1919 | memmove(aM, &aM[k], i); |
| 1920 | *pNResult = i; |
| 1921 | |
| 1922 | /* If: |
| 1923 | ** (1) the alignment is more than 25% longer than the longest side, and |
| 1924 | ** (2) the average match cost exceeds 15 |
| 1925 | ** Then this is probably an alignment that will be difficult for humans |
| 1926 | ** to read. So instead, just show all of the right side inserted followed |
| 1927 | ** by all of the left side deleted. |
| 1928 | ** |
| 1929 | ** The coefficients for conditions (1) and (2) above are determined by |
| 1930 | ** experimentation. |
| 1931 | */ |
| 1932 | mxLen = nLeft>nRight ? nLeft : nRight; |
| 1933 | if( i*4>mxLen*5 && (nMatch==0 || iMatch/nMatch>15) ){ |
| 1934 | memset(aM, 4, mnLen); *pNResult = mnLen; |
| 1935 | if( nLeft>mnLen ){ memset(aM+mnLen, 1, nLeft-mnLen); *pNResult = nLeft; } |
| 1936 | if( nRight>mnLen ){ memset(aM+mnLen, 2, nRight-mnLen); *pNResult = nRight; } |
| 1937 | } |
| 1938 | |
| 1939 | /* Return the result */ |
| 1940 | fossil_free(pToFree); |
| 1941 | return aM; |
| 1942 | } |
| 1943 | |
| @@ -1945,12 +1925,16 @@ | |
| 1945 | ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a |
| 1946 | ** pair of adjacent differences. Return true if the gap between these |
| 1947 | ** two differences is so small that they should be rendered as a single |
| 1948 | ** edit. |
| 1949 | */ |
| 1950 | static int smallGap(const int *R){ |
| 1951 | return R[3]<=2 || R[3]<=(R[1]+R[2]+R[4]+R[5])/8; |
| 1952 | } |
| 1953 | |
| 1954 | /* |
| 1955 | ** Format a diff using a DiffBuilder object |
| 1956 | */ |
| @@ -2055,25 +2039,23 @@ | |
| 2055 | int nAlign; |
| 2056 | unsigned char *alignment; |
| 2057 | ma = R[r+i*3+1]; /* Lines on left but not on right */ |
| 2058 | mb = R[r+i*3+2]; /* Lines on right but not on left */ |
| 2059 | |
| 2060 | /* Try to find an alignment for the lines within this one block */ |
| 2061 | alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign); |
| 2062 | |
| 2063 | /* If we could not get a good alignment, try merging the current |
| 2064 | ** block with subsequent blocks, if the subsequent blocks are |
| 2065 | ** nearby */ |
| 2066 | while( alignment[0]==4 && i<nr-1 && smallGap(&R[r+i*3]) ){ |
| 2067 | i++; |
| 2068 | m = R[r+i*3]; |
| 2069 | ma += R[r+i*3+1] + m; |
| 2070 | mb += R[r+i*3+2] + m; |
| 2071 | fossil_free(alignment); |
| 2072 | alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags,&nAlign); |
| 2073 | } |
| 2074 | |
| 2075 | for(j=0; ma+mb>0; j++){ |
| 2076 | assert( j<nAlign ); |
| 2077 | switch( alignment[j] ){ |
| 2078 | case 1: { |
| 2079 | /* Delete one line from the left */ |
| 2080 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -1775,12 +1775,10 @@ | |
| 1775 | int i, j, k; /* Loop counters */ |
| 1776 | int *a; /* One row of the Wagner matrix */ |
| 1777 | int *pToFree; /* Space that needs to be freed */ |
| 1778 | unsigned char *aM; /* Wagner result matrix */ |
| 1779 | int nMatch, iMatch; /* Number of matching lines and match score */ |
| 1780 | int aBuf[100]; /* Stack space for a[] if nRight not to big */ |
| 1781 | |
| 1782 | if( nLeft==0 ){ |
| 1783 | aM = fossil_malloc( nRight + 2 ); |
| 1784 | memset(aM, 2, nRight); |
| @@ -1797,11 +1795,10 @@ | |
| 1795 | /* For large alignments, use a divide and conquer algorithm that is |
| 1796 | ** O(NlogN). The result is not as precise, but this whole thing is an |
| 1797 | ** approximation anyhow, and the faster response time is an acceptable |
| 1798 | ** trade-off for reduced precision. |
| 1799 | */ |
| 1800 | if( nLeft*nRight>DIFF_ALIGN_MX && (diffFlags & DIFF_SLOW_SBS)==0 ){ |
| 1801 | const DLine *aSmall; /* The smaller of aLeft and aRight */ |
| 1802 | const DLine *aBig; /* The larger of aLeft and aRight */ |
| 1803 | int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ |
| 1804 | int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ |
| @@ -1917,27 +1914,10 @@ | |
| 1914 | k++; |
| 1915 | i = (nRight+1)*(nLeft+1) - k; |
| 1916 | memmove(aM, &aM[k], i); |
| 1917 | *pNResult = i; |
| 1918 | |
| 1919 | /* Return the result */ |
| 1920 | fossil_free(pToFree); |
| 1921 | return aM; |
| 1922 | } |
| 1923 | |
| @@ -1945,12 +1925,16 @@ | |
| 1925 | ** R[] is an array of six integer, two COPY/DELETE/INSERT triples for a |
| 1926 | ** pair of adjacent differences. Return true if the gap between these |
| 1927 | ** two differences is so small that they should be rendered as a single |
| 1928 | ** edit. |
| 1929 | */ |
| 1930 | static int smallGap(const int *R, int ma, int mb){ |
| 1931 | int m = R[3]; |
| 1932 | ma += R[4] + m; |
| 1933 | mb += R[5] + m; |
| 1934 | if( ma*mb>DIFF_ALIGN_MX ) return 0; |
| 1935 | return m<=2 || m<=(R[1]+R[2]+R[4]+R[5])/8; |
| 1936 | } |
| 1937 | |
| 1938 | /* |
| 1939 | ** Format a diff using a DiffBuilder object |
| 1940 | */ |
| @@ -2055,25 +2039,23 @@ | |
| 2039 | int nAlign; |
| 2040 | unsigned char *alignment; |
| 2041 | ma = R[r+i*3+1]; /* Lines on left but not on right */ |
| 2042 | mb = R[r+i*3+2]; /* Lines on right but not on left */ |
| 2043 | |
| 2044 | /* Try merging the current block with subsequent blocks, if the |
| 2045 | ** subsequent blocks are nearby and there result isn't too big. |
| 2046 | */ |
| 2047 | while( i<nr-1 && smallGap(&R[r+i*3],ma,mb) ){ |
| 2048 | i++; |
| 2049 | m = R[r+i*3]; |
| 2050 | ma += R[r+i*3+1] + m; |
| 2051 | mb += R[r+i*3+2] + m; |
| 2052 | } |
| 2053 | |
| 2054 | /* Try to find an alignment for the lines within this one block */ |
| 2055 | alignment = diffBlockAlignment(&A[a], ma, &B[b], mb, diffFlags, &nAlign); |
| 2056 | |
| 2057 | for(j=0; ma+mb>0; j++){ |
| 2058 | assert( j<nAlign ); |
| 2059 | switch( alignment[j] ){ |
| 2060 | case 1: { |
| 2061 | /* Delete one line from the left */ |
| 2062 |