Fossil SCM
When trying to do an alignment of large blocks, first try an LCS on the same block using an ignore-whitespace comparison. If a large LCS is found, use that to subdivide the problem. Otherwise, continue with the usual divide-and-conquer technique.
Commit
c311efef078c87339dca5b919167d9fa9d94682c77279fa9cfdd97ec9177f2c2
Parent
fbdbc09b402b19d…
1 file changed
+169
-49
+169
-49
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -1854,10 +1854,169 @@ | ||
| 1854 | 1854 | b.z = g.argv[3]; |
| 1855 | 1855 | b.n = (int)strlen(b.z); |
| 1856 | 1856 | x = match_dline(&a, &b); |
| 1857 | 1857 | fossil_print("%d\n", x); |
| 1858 | 1858 | } |
| 1859 | + | |
| 1860 | +/* Forward declarations for recursion */ | |
| 1861 | +static unsigned char *diffBlockAlignment( | |
| 1862 | + DLine *aLeft, int nLeft, /* Text on the left */ | |
| 1863 | + DLine *aRight, int nRight, /* Text on the right */ | |
| 1864 | + DiffConfig *pCfg, /* Configuration options */ | |
| 1865 | + int *pNResult /* OUTPUT: Bytes of result */ | |
| 1866 | +); | |
| 1867 | +static void longestCommonSequence( | |
| 1868 | + DContext *p, /* Two files being compared */ | |
| 1869 | + int iS1, int iE1, /* Range of lines in p->aFrom[] */ | |
| 1870 | + int iS2, int iE2, /* Range of lines in p->aTo[] */ | |
| 1871 | + int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ | |
| 1872 | + int *piSY, int *piEY /* Write p->aTo[] common segment here */ | |
| 1873 | +); | |
| 1874 | + | |
| 1875 | +/* | |
| 1876 | +** Make a copy of a list of nLine DLine objects from one array to | |
| 1877 | +** another. Hash the new array to ignore whitespace. | |
| 1878 | +*/ | |
| 1879 | +static void diffDLineXfer( | |
| 1880 | + DLine *aTo, | |
| 1881 | + const DLine *aFrom, | |
| 1882 | + int nLine | |
| 1883 | +){ | |
| 1884 | + int i, j, k; | |
| 1885 | + u64 h, h2; | |
| 1886 | + for(i=0; i<nLine; i++) aTo[i].iHash = 0; | |
| 1887 | + for(i=0; i<nLine; i++){ | |
| 1888 | + const char *z = aFrom[i].z; | |
| 1889 | + int n = aFrom[i].n; | |
| 1890 | + for(j=0; j<n && diff_isspace(z[j]); j++){} | |
| 1891 | + aTo[i].z = &z[j]; | |
| 1892 | + for(k=aFrom[i].n; k>j && diff_isspace(z[k-1]); k--){} | |
| 1893 | + aTo[i].n = n = k-j; | |
| 1894 | + aTo[i].indent = 0; | |
| 1895 | + aTo[i].nw = 0; | |
| 1896 | + for(h=0; j<k; j++){ | |
| 1897 | + char c = z[j]; | |
| 1898 | + if( !diff_isspace(c) ){ | |
| 1899 | + h = (h^c)*9000000000000000041LL; | |
| 1900 | + } | |
| 1901 | + } | |
| 1902 | + aTo[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) | n; | |
| 1903 | + h2 = h % nLine; | |
| 1904 | + aTo[i].iNext = aTo[h2].iHash; | |
| 1905 | + aTo[h2].iHash = i+1; | |
| 1906 | + } | |
| 1907 | +} | |
| 1908 | + | |
| 1909 | + | |
| 1910 | +/* | |
| 1911 | +** For a difficult diff-block alignment that was originally for | |
| 1912 | +** the default consider-all-whitespace algorithm, try to find the | |
| 1913 | +** longest common subsequence between the two blocks that involves | |
| 1914 | +** only whitespace changes. | |
| 1915 | +*/ | |
| 1916 | +static unsigned char *diffBlockAlignmentIgnoreSpace( | |
| 1917 | + DLine *aLeft, int nLeft, /* Text on the left */ | |
| 1918 | + DLine *aRight, int nRight, /* Text on the right */ | |
| 1919 | + DiffConfig *pCfg, /* Configuration options */ | |
| 1920 | + int *pNResult /* OUTPUT: Bytes of result */ | |
| 1921 | +){ | |
| 1922 | + DContext dc; | |
| 1923 | + int iSX, iEX; /* Start and end of LCS on the left */ | |
| 1924 | + int iSY, iEY; /* Start and end of the LCS on the right */ | |
| 1925 | + unsigned char *a1, *a2; | |
| 1926 | + int n1, n2, nLCS; | |
| 1927 | + | |
| 1928 | + dc.aEdit = 0; | |
| 1929 | + dc.nEdit = 0; | |
| 1930 | + dc.nEditAlloc = 0; | |
| 1931 | + dc.nFrom = nLeft; | |
| 1932 | + dc.nTo = nRight; | |
| 1933 | + dc.xDiffer = compare_dline_ignore_allws; | |
| 1934 | + dc.aFrom = fossil_malloc( sizeof(DLine)*(nLeft+nRight) ); | |
| 1935 | + dc.aTo = &dc.aFrom[dc.nFrom]; | |
| 1936 | + diffDLineXfer(dc.aFrom, aLeft, nLeft); | |
| 1937 | + diffDLineXfer(dc.aTo, aRight, nRight); | |
| 1938 | + longestCommonSequence(&dc,0,nLeft,0,nRight,&iSX,&iEX,&iSY,&iEY); | |
| 1939 | + fossil_free(dc.aFrom); | |
| 1940 | + nLCS = iEX - iSX; | |
| 1941 | + if( nLCS<5 ) return 0; /* No good LCS was found */ | |
| 1942 | + | |
| 1943 | + a1 = diffBlockAlignment(aLeft,iSX,aRight,iSY,pCfg,&n1); | |
| 1944 | + a2 = diffBlockAlignment(aLeft+iEX, nLeft-iEX, | |
| 1945 | + aRight+iEY, nRight-iEY, | |
| 1946 | + pCfg, &n2); | |
| 1947 | + a1 = fossil_realloc(a1, n1+nLCS+n2); | |
| 1948 | + memcpy(a1+n1+nLCS,a2,n2); | |
| 1949 | + memset(a1+n1,3,nLCS); | |
| 1950 | + fossil_free(a2); | |
| 1951 | + *pNResult = n1+n2+nLCS; | |
| 1952 | + return a1; | |
| 1953 | +} | |
| 1954 | + | |
| 1955 | + | |
| 1956 | +/* | |
| 1957 | +** This is a helper route for diffBlockAlignment(). In this case, | |
| 1958 | +** a very large block is encountered that might be too expensive to | |
| 1959 | +** use the O(N*N) Wagner edit distance algorithm. So instead, this | |
| 1960 | +** block implements a less-precise but faster O(N*logN) divide-and-conquer | |
| 1961 | +** approach. | |
| 1962 | +*/ | |
| 1963 | +static unsigned char *diffBlockAlignmentDivideAndConquer( | |
| 1964 | + DLine *aLeft, int nLeft, /* Text on the left */ | |
| 1965 | + DLine *aRight, int nRight, /* Text on the right */ | |
| 1966 | + DiffConfig *pCfg, /* Configuration options */ | |
| 1967 | + int *pNResult /* OUTPUT: Bytes of result */ | |
| 1968 | +){ | |
| 1969 | + DLine *aSmall; /* The smaller of aLeft and aRight */ | |
| 1970 | + DLine *aBig; /* The larger of aLeft and aRight */ | |
| 1971 | + int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ | |
| 1972 | + int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ | |
| 1973 | + int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */ | |
| 1974 | + unsigned char *a1, *a2; /* Results of the alignments on two halves */ | |
| 1975 | + int n1, n2; /* Number of entries in a1 and a2 */ | |
| 1976 | + int score, bestScore; /* Score and best score seen so far */ | |
| 1977 | + int i; /* Loop counter */ | |
| 1978 | + | |
| 1979 | + if( nLeft>nRight ){ | |
| 1980 | + aSmall = aRight; | |
| 1981 | + nSmall = nRight; | |
| 1982 | + aBig = aLeft; | |
| 1983 | + nBig = nLeft; | |
| 1984 | + }else{ | |
| 1985 | + aSmall = aLeft; | |
| 1986 | + nSmall = nLeft; | |
| 1987 | + aBig = aRight; | |
| 1988 | + nBig = nRight; | |
| 1989 | + } | |
| 1990 | + iDivBig = nBig/2; | |
| 1991 | + iDivSmall = nSmall/2; | |
| 1992 | + bestScore = 10000; | |
| 1993 | + for(i=0; i<nSmall; i++){ | |
| 1994 | + score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2; | |
| 1995 | + if( score<bestScore ){ | |
| 1996 | + bestScore = score; | |
| 1997 | + iDivSmall = i; | |
| 1998 | + } | |
| 1999 | + } | |
| 2000 | + if( aSmall==aRight ){ | |
| 2001 | + iDivRight = iDivSmall; | |
| 2002 | + iDivLeft = iDivBig; | |
| 2003 | + }else{ | |
| 2004 | + iDivRight = iDivBig; | |
| 2005 | + iDivLeft = iDivSmall; | |
| 2006 | + } | |
| 2007 | + a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,pCfg,&n1); | |
| 2008 | + a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft, | |
| 2009 | + aRight+iDivRight, nRight-iDivRight, | |
| 2010 | + pCfg, &n2); | |
| 2011 | + a1 = fossil_realloc(a1, n1+n2 ); | |
| 2012 | + memcpy(a1+n1,a2,n2); | |
| 2013 | + fossil_free(a2); | |
| 2014 | + *pNResult = n1+n2; | |
| 2015 | + return a1; | |
| 2016 | +} | |
| 2017 | + | |
| 1859 | 2018 | |
| 1860 | 2019 | /* |
| 1861 | 2020 | ** The threshold at which diffBlockAlignment transitions from the |
| 1862 | 2021 | ** O(N*N) Wagner minimum-edit-distance algorithm to a less process |
| 1863 | 2022 | ** O(NlogN) divide-and-conquer approach. |
| @@ -1912,61 +2071,22 @@ | ||
| 1912 | 2071 | memset(aM, 1, nLeft); |
| 1913 | 2072 | *pNResult = nLeft; |
| 1914 | 2073 | return aM; |
| 1915 | 2074 | } |
| 1916 | 2075 | |
| 1917 | - /* For large alignments, use a divide and conquer algorithm that is | |
| 1918 | - ** O(NlogN). The result is not as precise, but this whole thing is an | |
| 1919 | - ** approximation anyhow, and the faster response time is an acceptable | |
| 1920 | - ** trade-off for reduced precision. | |
| 2076 | + /* For large alignments, try to use alternative algorithms that are | |
| 2077 | + ** faster than the O(N*N) Wagner edit distance. | |
| 1921 | 2078 | */ |
| 1922 | 2079 | if( nLeft*nRight>DIFF_ALIGN_MX && (pCfg->diffFlags & DIFF_SLOW_SBS)==0 ){ |
| 1923 | - DLine *aSmall; /* The smaller of aLeft and aRight */ | |
| 1924 | - DLine *aBig; /* The larger of aLeft and aRight */ | |
| 1925 | - int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ | |
| 1926 | - int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ | |
| 1927 | - int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */ | |
| 1928 | - unsigned char *a1, *a2; /* Results of the alignments on two halves */ | |
| 1929 | - int n1, n2; /* Number of entries in a1 and a2 */ | |
| 1930 | - int score, bestScore; /* Score and best score seen so far */ | |
| 1931 | - if( nLeft>nRight ){ | |
| 1932 | - aSmall = aRight; | |
| 1933 | - nSmall = nRight; | |
| 1934 | - aBig = aLeft; | |
| 1935 | - nBig = nLeft; | |
| 1936 | - }else{ | |
| 1937 | - aSmall = aLeft; | |
| 1938 | - nSmall = nLeft; | |
| 1939 | - aBig = aRight; | |
| 1940 | - nBig = nRight; | |
| 1941 | - } | |
| 1942 | - iDivBig = nBig/2; | |
| 1943 | - iDivSmall = nSmall/2; | |
| 1944 | - bestScore = 10000; | |
| 1945 | - for(i=0; i<nSmall; i++){ | |
| 1946 | - score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2; | |
| 1947 | - if( score<bestScore ){ | |
| 1948 | - bestScore = score; | |
| 1949 | - iDivSmall = i; | |
| 1950 | - } | |
| 1951 | - } | |
| 1952 | - if( aSmall==aRight ){ | |
| 1953 | - iDivRight = iDivSmall; | |
| 1954 | - iDivLeft = iDivBig; | |
| 1955 | - }else{ | |
| 1956 | - iDivRight = iDivBig; | |
| 1957 | - iDivLeft = iDivSmall; | |
| 1958 | - } | |
| 1959 | - a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,pCfg,&n1); | |
| 1960 | - a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft, | |
| 1961 | - aRight+iDivRight, nRight-iDivRight, | |
| 1962 | - pCfg, &n2); | |
| 1963 | - a1 = fossil_realloc(a1, n1+n2 ); | |
| 1964 | - memcpy(a1+n1,a2,n2); | |
| 1965 | - fossil_free(a2); | |
| 1966 | - *pNResult = n1+n2; | |
| 1967 | - return a1; | |
| 2080 | + if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==0 ){ | |
| 2081 | + unsigned char *aRes; | |
| 2082 | + aRes = diffBlockAlignmentIgnoreSpace( | |
| 2083 | + aLeft, nLeft,aRight, nRight,pCfg,pNResult); | |
| 2084 | + if( aRes ) return aRes; | |
| 2085 | + } | |
| 2086 | + return diffBlockAlignmentDivideAndConquer( | |
| 2087 | + aLeft, nLeft,aRight, nRight,pCfg,pNResult); | |
| 1968 | 2088 | } |
| 1969 | 2089 | |
| 1970 | 2090 | /* If we reach this point, we will be doing an O(N*N) Wagner minimum |
| 1971 | 2091 | ** edit distance to compute the alignment. |
| 1972 | 2092 | */ |
| 1973 | 2093 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -1854,10 +1854,169 @@ | |
| 1854 | b.z = g.argv[3]; |
| 1855 | b.n = (int)strlen(b.z); |
| 1856 | x = match_dline(&a, &b); |
| 1857 | fossil_print("%d\n", x); |
| 1858 | } |
| 1859 | |
| 1860 | /* |
| 1861 | ** The threshold at which diffBlockAlignment transitions from the |
| 1862 | ** O(N*N) Wagner minimum-edit-distance algorithm to a less process |
| 1863 | ** O(NlogN) divide-and-conquer approach. |
| @@ -1912,61 +2071,22 @@ | |
| 1912 | memset(aM, 1, nLeft); |
| 1913 | *pNResult = nLeft; |
| 1914 | return aM; |
| 1915 | } |
| 1916 | |
| 1917 | /* For large alignments, use a divide and conquer algorithm that is |
| 1918 | ** O(NlogN). The result is not as precise, but this whole thing is an |
| 1919 | ** approximation anyhow, and the faster response time is an acceptable |
| 1920 | ** trade-off for reduced precision. |
| 1921 | */ |
| 1922 | if( nLeft*nRight>DIFF_ALIGN_MX && (pCfg->diffFlags & DIFF_SLOW_SBS)==0 ){ |
| 1923 | DLine *aSmall; /* The smaller of aLeft and aRight */ |
| 1924 | DLine *aBig; /* The larger of aLeft and aRight */ |
| 1925 | int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ |
| 1926 | int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ |
| 1927 | int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */ |
| 1928 | unsigned char *a1, *a2; /* Results of the alignments on two halves */ |
| 1929 | int n1, n2; /* Number of entries in a1 and a2 */ |
| 1930 | int score, bestScore; /* Score and best score seen so far */ |
| 1931 | if( nLeft>nRight ){ |
| 1932 | aSmall = aRight; |
| 1933 | nSmall = nRight; |
| 1934 | aBig = aLeft; |
| 1935 | nBig = nLeft; |
| 1936 | }else{ |
| 1937 | aSmall = aLeft; |
| 1938 | nSmall = nLeft; |
| 1939 | aBig = aRight; |
| 1940 | nBig = nRight; |
| 1941 | } |
| 1942 | iDivBig = nBig/2; |
| 1943 | iDivSmall = nSmall/2; |
| 1944 | bestScore = 10000; |
| 1945 | for(i=0; i<nSmall; i++){ |
| 1946 | score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2; |
| 1947 | if( score<bestScore ){ |
| 1948 | bestScore = score; |
| 1949 | iDivSmall = i; |
| 1950 | } |
| 1951 | } |
| 1952 | if( aSmall==aRight ){ |
| 1953 | iDivRight = iDivSmall; |
| 1954 | iDivLeft = iDivBig; |
| 1955 | }else{ |
| 1956 | iDivRight = iDivBig; |
| 1957 | iDivLeft = iDivSmall; |
| 1958 | } |
| 1959 | a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,pCfg,&n1); |
| 1960 | a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft, |
| 1961 | aRight+iDivRight, nRight-iDivRight, |
| 1962 | pCfg, &n2); |
| 1963 | a1 = fossil_realloc(a1, n1+n2 ); |
| 1964 | memcpy(a1+n1,a2,n2); |
| 1965 | fossil_free(a2); |
| 1966 | *pNResult = n1+n2; |
| 1967 | return a1; |
| 1968 | } |
| 1969 | |
| 1970 | /* If we reach this point, we will be doing an O(N*N) Wagner minimum |
| 1971 | ** edit distance to compute the alignment. |
| 1972 | */ |
| 1973 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -1854,10 +1854,169 @@ | |
| 1854 | b.z = g.argv[3]; |
| 1855 | b.n = (int)strlen(b.z); |
| 1856 | x = match_dline(&a, &b); |
| 1857 | fossil_print("%d\n", x); |
| 1858 | } |
| 1859 | |
| 1860 | /* Forward declarations for recursion */ |
| 1861 | static unsigned char *diffBlockAlignment( |
| 1862 | DLine *aLeft, int nLeft, /* Text on the left */ |
| 1863 | DLine *aRight, int nRight, /* Text on the right */ |
| 1864 | DiffConfig *pCfg, /* Configuration options */ |
| 1865 | int *pNResult /* OUTPUT: Bytes of result */ |
| 1866 | ); |
| 1867 | static void longestCommonSequence( |
| 1868 | DContext *p, /* Two files being compared */ |
| 1869 | int iS1, int iE1, /* Range of lines in p->aFrom[] */ |
| 1870 | int iS2, int iE2, /* Range of lines in p->aTo[] */ |
| 1871 | int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ |
| 1872 | int *piSY, int *piEY /* Write p->aTo[] common segment here */ |
| 1873 | ); |
| 1874 | |
| 1875 | /* |
| 1876 | ** Make a copy of a list of nLine DLine objects from one array to |
| 1877 | ** another. Hash the new array to ignore whitespace. |
| 1878 | */ |
| 1879 | static void diffDLineXfer( |
| 1880 | DLine *aTo, |
| 1881 | const DLine *aFrom, |
| 1882 | int nLine |
| 1883 | ){ |
| 1884 | int i, j, k; |
| 1885 | u64 h, h2; |
| 1886 | for(i=0; i<nLine; i++) aTo[i].iHash = 0; |
| 1887 | for(i=0; i<nLine; i++){ |
| 1888 | const char *z = aFrom[i].z; |
| 1889 | int n = aFrom[i].n; |
| 1890 | for(j=0; j<n && diff_isspace(z[j]); j++){} |
| 1891 | aTo[i].z = &z[j]; |
| 1892 | for(k=aFrom[i].n; k>j && diff_isspace(z[k-1]); k--){} |
| 1893 | aTo[i].n = n = k-j; |
| 1894 | aTo[i].indent = 0; |
| 1895 | aTo[i].nw = 0; |
| 1896 | for(h=0; j<k; j++){ |
| 1897 | char c = z[j]; |
| 1898 | if( !diff_isspace(c) ){ |
| 1899 | h = (h^c)*9000000000000000041LL; |
| 1900 | } |
| 1901 | } |
| 1902 | aTo[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) | n; |
| 1903 | h2 = h % nLine; |
| 1904 | aTo[i].iNext = aTo[h2].iHash; |
| 1905 | aTo[h2].iHash = i+1; |
| 1906 | } |
| 1907 | } |
| 1908 | |
| 1909 | |
| 1910 | /* |
| 1911 | ** For a difficult diff-block alignment that was originally for |
| 1912 | ** the default consider-all-whitespace algorithm, try to find the |
| 1913 | ** longest common subsequence between the two blocks that involves |
| 1914 | ** only whitespace changes. |
| 1915 | */ |
| 1916 | static unsigned char *diffBlockAlignmentIgnoreSpace( |
| 1917 | DLine *aLeft, int nLeft, /* Text on the left */ |
| 1918 | DLine *aRight, int nRight, /* Text on the right */ |
| 1919 | DiffConfig *pCfg, /* Configuration options */ |
| 1920 | int *pNResult /* OUTPUT: Bytes of result */ |
| 1921 | ){ |
| 1922 | DContext dc; |
| 1923 | int iSX, iEX; /* Start and end of LCS on the left */ |
| 1924 | int iSY, iEY; /* Start and end of the LCS on the right */ |
| 1925 | unsigned char *a1, *a2; |
| 1926 | int n1, n2, nLCS; |
| 1927 | |
| 1928 | dc.aEdit = 0; |
| 1929 | dc.nEdit = 0; |
| 1930 | dc.nEditAlloc = 0; |
| 1931 | dc.nFrom = nLeft; |
| 1932 | dc.nTo = nRight; |
| 1933 | dc.xDiffer = compare_dline_ignore_allws; |
| 1934 | dc.aFrom = fossil_malloc( sizeof(DLine)*(nLeft+nRight) ); |
| 1935 | dc.aTo = &dc.aFrom[dc.nFrom]; |
| 1936 | diffDLineXfer(dc.aFrom, aLeft, nLeft); |
| 1937 | diffDLineXfer(dc.aTo, aRight, nRight); |
| 1938 | longestCommonSequence(&dc,0,nLeft,0,nRight,&iSX,&iEX,&iSY,&iEY); |
| 1939 | fossil_free(dc.aFrom); |
| 1940 | nLCS = iEX - iSX; |
| 1941 | if( nLCS<5 ) return 0; /* No good LCS was found */ |
| 1942 | |
| 1943 | a1 = diffBlockAlignment(aLeft,iSX,aRight,iSY,pCfg,&n1); |
| 1944 | a2 = diffBlockAlignment(aLeft+iEX, nLeft-iEX, |
| 1945 | aRight+iEY, nRight-iEY, |
| 1946 | pCfg, &n2); |
| 1947 | a1 = fossil_realloc(a1, n1+nLCS+n2); |
| 1948 | memcpy(a1+n1+nLCS,a2,n2); |
| 1949 | memset(a1+n1,3,nLCS); |
| 1950 | fossil_free(a2); |
| 1951 | *pNResult = n1+n2+nLCS; |
| 1952 | return a1; |
| 1953 | } |
| 1954 | |
| 1955 | |
| 1956 | /* |
| 1957 | ** This is a helper route for diffBlockAlignment(). In this case, |
| 1958 | ** a very large block is encountered that might be too expensive to |
| 1959 | ** use the O(N*N) Wagner edit distance algorithm. So instead, this |
| 1960 | ** block implements a less-precise but faster O(N*logN) divide-and-conquer |
| 1961 | ** approach. |
| 1962 | */ |
| 1963 | static unsigned char *diffBlockAlignmentDivideAndConquer( |
| 1964 | DLine *aLeft, int nLeft, /* Text on the left */ |
| 1965 | DLine *aRight, int nRight, /* Text on the right */ |
| 1966 | DiffConfig *pCfg, /* Configuration options */ |
| 1967 | int *pNResult /* OUTPUT: Bytes of result */ |
| 1968 | ){ |
| 1969 | DLine *aSmall; /* The smaller of aLeft and aRight */ |
| 1970 | DLine *aBig; /* The larger of aLeft and aRight */ |
| 1971 | int nSmall, nBig; /* Size of aSmall and aBig. nSmall<=nBig */ |
| 1972 | int iDivSmall, iDivBig; /* Divider point for aSmall and aBig */ |
| 1973 | int iDivLeft, iDivRight; /* Divider point for aLeft and aRight */ |
| 1974 | unsigned char *a1, *a2; /* Results of the alignments on two halves */ |
| 1975 | int n1, n2; /* Number of entries in a1 and a2 */ |
| 1976 | int score, bestScore; /* Score and best score seen so far */ |
| 1977 | int i; /* Loop counter */ |
| 1978 | |
| 1979 | if( nLeft>nRight ){ |
| 1980 | aSmall = aRight; |
| 1981 | nSmall = nRight; |
| 1982 | aBig = aLeft; |
| 1983 | nBig = nLeft; |
| 1984 | }else{ |
| 1985 | aSmall = aLeft; |
| 1986 | nSmall = nLeft; |
| 1987 | aBig = aRight; |
| 1988 | nBig = nRight; |
| 1989 | } |
| 1990 | iDivBig = nBig/2; |
| 1991 | iDivSmall = nSmall/2; |
| 1992 | bestScore = 10000; |
| 1993 | for(i=0; i<nSmall; i++){ |
| 1994 | score = match_dline(aBig+iDivBig, aSmall+i) + abs(i-nSmall/2)*2; |
| 1995 | if( score<bestScore ){ |
| 1996 | bestScore = score; |
| 1997 | iDivSmall = i; |
| 1998 | } |
| 1999 | } |
| 2000 | if( aSmall==aRight ){ |
| 2001 | iDivRight = iDivSmall; |
| 2002 | iDivLeft = iDivBig; |
| 2003 | }else{ |
| 2004 | iDivRight = iDivBig; |
| 2005 | iDivLeft = iDivSmall; |
| 2006 | } |
| 2007 | a1 = diffBlockAlignment(aLeft,iDivLeft,aRight,iDivRight,pCfg,&n1); |
| 2008 | a2 = diffBlockAlignment(aLeft+iDivLeft, nLeft-iDivLeft, |
| 2009 | aRight+iDivRight, nRight-iDivRight, |
| 2010 | pCfg, &n2); |
| 2011 | a1 = fossil_realloc(a1, n1+n2 ); |
| 2012 | memcpy(a1+n1,a2,n2); |
| 2013 | fossil_free(a2); |
| 2014 | *pNResult = n1+n2; |
| 2015 | return a1; |
| 2016 | } |
| 2017 | |
| 2018 | |
| 2019 | /* |
| 2020 | ** The threshold at which diffBlockAlignment transitions from the |
| 2021 | ** O(N*N) Wagner minimum-edit-distance algorithm to a less process |
| 2022 | ** O(NlogN) divide-and-conquer approach. |
| @@ -1912,61 +2071,22 @@ | |
| 2071 | memset(aM, 1, nLeft); |
| 2072 | *pNResult = nLeft; |
| 2073 | return aM; |
| 2074 | } |
| 2075 | |
| 2076 | /* For large alignments, try to use alternative algorithms that are |
| 2077 | ** faster than the O(N*N) Wagner edit distance. |
| 2078 | */ |
| 2079 | if( nLeft*nRight>DIFF_ALIGN_MX && (pCfg->diffFlags & DIFF_SLOW_SBS)==0 ){ |
| 2080 | if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==0 ){ |
| 2081 | unsigned char *aRes; |
| 2082 | aRes = diffBlockAlignmentIgnoreSpace( |
| 2083 | aLeft, nLeft,aRight, nRight,pCfg,pNResult); |
| 2084 | if( aRes ) return aRes; |
| 2085 | } |
| 2086 | return diffBlockAlignmentDivideAndConquer( |
| 2087 | aLeft, nLeft,aRight, nRight,pCfg,pNResult); |
| 2088 | } |
| 2089 | |
| 2090 | /* If we reach this point, we will be doing an O(N*N) Wagner minimum |
| 2091 | ** edit distance to compute the alignment. |
| 2092 | */ |
| 2093 |