Fossil SCM
Improvements to the alignment algorithm for block changes in side-by-side diff.
Commit
a484cfc2f2e66ae5a9dff121bef231bf6b96edfb
Parent
357d26bc36f40c8…
1 file changed
+175
-101
+175
-101
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -162,82 +162,10 @@ | ||
| 162 | 162 | ** Return true if two DLine elements are identical. |
| 163 | 163 | */ |
| 164 | 164 | static int same_dline(DLine *pA, DLine *pB){ |
| 165 | 165 | return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; |
| 166 | 166 | } |
| 167 | - | |
| 168 | -/* | |
| 169 | -** Return the number which is larger the closer pA and pB match. | |
| 170 | -** | |
| 171 | -** The number returned of characters that match in pA and pB. The | |
| 172 | -** number if artifically reduced if neither pA nor pB match very well. | |
| 173 | -*/ | |
| 174 | -static int match_dline(DLine *pA, DLine *pB){ | |
| 175 | - int *pToFree; | |
| 176 | - int *a; | |
| 177 | - const char *zA; | |
| 178 | - const char *zB; | |
| 179 | - int nA; | |
| 180 | - int nB; | |
| 181 | - int minDist; | |
| 182 | - int i, j, dist; | |
| 183 | - int aStatic[200]; | |
| 184 | - | |
| 185 | - zA = pA->z; | |
| 186 | - zB = pB->z; | |
| 187 | - nA = pA->h & LENGTH_MASK; | |
| 188 | - nB = pB->h & LENGTH_MASK; | |
| 189 | - minDist = nA; | |
| 190 | - if( minDist>nB ) minDist = nB; | |
| 191 | - minDist = (minDist)/2; | |
| 192 | - dist = 0; | |
| 193 | - | |
| 194 | - /* Remove any common prefix and suffix */ | |
| 195 | - while( nA && nB && zA[0]==zB[0] ){ | |
| 196 | - nA--; | |
| 197 | - nB--; | |
| 198 | - zA++; | |
| 199 | - zB++; | |
| 200 | - dist++; | |
| 201 | - } | |
| 202 | - while( nA && nB && zA[nA-1]==zB[nB-1] ){ | |
| 203 | - nA--; | |
| 204 | - nB--; | |
| 205 | - dist++; | |
| 206 | - } | |
| 207 | - | |
| 208 | - /* Early out if one or the other string is empty */ | |
| 209 | - if( nA==0 ) return dist; | |
| 210 | - if( nB==0 ) return dist; | |
| 211 | - | |
| 212 | - /* Allocate space of the dynamic programming array */ | |
| 213 | - if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){ | |
| 214 | - pToFree = 0; | |
| 215 | - a = aStatic; | |
| 216 | - }else{ | |
| 217 | - pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) ); | |
| 218 | - } | |
| 219 | - | |
| 220 | - /* Compute the length best sequence of matching characters */ | |
| 221 | - for(i=0; i<=nB; i++) a[i] = 0; | |
| 222 | - for(j=0; j<nA; j++){ | |
| 223 | - int p = 0; | |
| 224 | - for(i=0; i<nB; i++){ | |
| 225 | - int m = a[i]; | |
| 226 | - if( m<a[i+1] ) m = a[i+1]; | |
| 227 | - if( m<p+1 && zA[j]==zB[i] ) m = p+1; | |
| 228 | - p = a[i+1]; | |
| 229 | - a[i+1] = m; | |
| 230 | - } | |
| 231 | - } | |
| 232 | - dist += a[nB]; | |
| 233 | - | |
| 234 | - /* Return the result */ | |
| 235 | - fossil_free(pToFree); | |
| 236 | - return dist>minDist ? dist : 0; | |
| 237 | -} | |
| 238 | - | |
| 239 | 167 | |
| 240 | 168 | /* |
| 241 | 169 | ** Append a single line of context-diff output to pOut. |
| 242 | 170 | */ |
| 243 | 171 | static void appendDiffLine( |
| @@ -602,10 +530,152 @@ | ||
| 602 | 530 | p->iEnd = nRight - nSuffix; |
| 603 | 531 | sbsWriteText(p, pRight, SBS_NEWLINE); |
| 604 | 532 | } |
| 605 | 533 | } |
| 606 | 534 | |
| 535 | +/* | |
| 536 | +** Return the number between 0 and 100 that is smaller the closer pA and | |
| 537 | +** pB match. Return 0 for a perfect match. Return 100 if pA and pB are | |
| 538 | +** completely different. | |
| 539 | +*/ | |
| 540 | +static int match_dline(DLine *pA, DLine *pB){ | |
| 541 | + const char *zA; | |
| 542 | + const char *zB; | |
| 543 | + int nA; | |
| 544 | + int nB; | |
| 545 | + int minDist; | |
| 546 | + int i; | |
| 547 | + int nMatch; | |
| 548 | + int score; | |
| 549 | + | |
| 550 | + zA = pA->z; | |
| 551 | + zB = pB->z; | |
| 552 | + nA = pA->h & LENGTH_MASK; | |
| 553 | + nB = pB->h & LENGTH_MASK; | |
| 554 | + while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; } | |
| 555 | + while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; } | |
| 556 | + while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; } | |
| 557 | + while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; } | |
| 558 | + minDist = nA; | |
| 559 | + if( minDist>nB ) minDist = nB; | |
| 560 | + if( minDist==0 ){ | |
| 561 | + score = 100; | |
| 562 | + }else{ | |
| 563 | + for(i=0; i<nA && i<nB && zA[i]==zB[i]; i++){} | |
| 564 | + nMatch = i; | |
| 565 | + for(i=1; i<=nA && i<=nB && zA[nA-i]==zB[nB-i]; i++){} | |
| 566 | + nMatch += i-1; | |
| 567 | + score = (nMatch >= minDist) ? 0 : ((minDist - nMatch)*100)/minDist; | |
| 568 | + } | |
| 569 | + return score; | |
| 570 | +} | |
| 571 | + | |
| 572 | +/* | |
| 573 | +** There is a change block in which nLeft lines of text on the left are | |
| 574 | +** converted into nRight lines of text on the right. This routine computes | |
| 575 | +** how the lines on the left line up with the lines on the right. | |
| 576 | +** | |
| 577 | +** The return value is a buffer of unsigned characters, obtained from | |
| 578 | +** fossil_malloc(). (The caller needs to free the return value using | |
| 579 | +** fossil_free().) Entries in the returned array have values as follows: | |
| 580 | +** | |
| 581 | +** 1. Delete the next line of pLeft. | |
| 582 | +** 2. The next line of pLeft changes into the next line of pRight. | |
| 583 | +** 3. Insert the next line of pRight. | |
| 584 | +** | |
| 585 | +** The length of the returned array will be just large enough to cause | |
| 586 | +** all elements of pLeft and pRight to be consumed. | |
| 587 | +** | |
| 588 | +** Algorithm: Wagner's minimum edit-distance algorithm, modified by | |
| 589 | +** adding a cost to each match based on how well the two rows match | |
| 590 | +** each other. Insertion and deletion costs are 50. Match costs | |
| 591 | +** are between 0 and 100 where 0 is a perfect match 100 is a complete | |
| 592 | +** mismatch. | |
| 593 | +*/ | |
| 594 | +static unsigned char *sbsAlignment( | |
| 595 | + DLine *aLeft, int nLeft, /* Text on the left */ | |
| 596 | + DLine *aRight, int nRight /* Text on the right */ | |
| 597 | +){ | |
| 598 | + int i, j, k; /* Loop counters */ | |
| 599 | + int *a; /* One row of the Wagner matrix */ | |
| 600 | + int *pToFree; /* Space that needs to be freed */ | |
| 601 | + unsigned char *aM; /* Wagner result matrix */ | |
| 602 | + int aBuf[100]; /* Stack space for a[] if nRight not to big */ | |
| 603 | + | |
| 604 | + aM = fossil_malloc( (nLeft+1)*(nRight+1) ); | |
| 605 | + if( nLeft==0 ){ | |
| 606 | + memset(aM, 3, nRight); | |
| 607 | + return aM; | |
| 608 | + } | |
| 609 | + if( nRight==0 ){ | |
| 610 | + memset(aM, 1, nLeft); | |
| 611 | + return aM; | |
| 612 | + } | |
| 613 | + if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){ | |
| 614 | + pToFree = 0; | |
| 615 | + a = aBuf; | |
| 616 | + }else{ | |
| 617 | + a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) ); | |
| 618 | + } | |
| 619 | + | |
| 620 | + /* Compute the best alignment */ | |
| 621 | + for(i=0; i<=nRight; i++){ | |
| 622 | + aM[i] = 3; | |
| 623 | + a[i] = i*50; | |
| 624 | + } | |
| 625 | + aM[0] = 0; | |
| 626 | + for(j=1; j<=nLeft; j++){ | |
| 627 | + int p = a[0]; | |
| 628 | + a[0] = p+50; | |
| 629 | + aM[j*(nRight+1)] = 1; | |
| 630 | + for(i=1; i<=nRight; i++){ | |
| 631 | + int m = a[i-1]+50; | |
| 632 | + int d = 3; | |
| 633 | + if( m>a[i]+50 ){ | |
| 634 | + m = a[i]+50; | |
| 635 | + d = 1; | |
| 636 | + } | |
| 637 | + if( m>p ){ | |
| 638 | + int score = match_dline(&aLeft[j-1], &aRight[i-1]); | |
| 639 | + if( score<50 && m>p+score ){ | |
| 640 | + m = p+score; | |
| 641 | + d = 2; | |
| 642 | + } | |
| 643 | + } | |
| 644 | + p = a[i]; | |
| 645 | + a[i] = m; | |
| 646 | + aM[j*(nRight+1)+i] = d; | |
| 647 | + } | |
| 648 | + } | |
| 649 | + | |
| 650 | + /* Compute the lowest-cost path back through the matrix */ | |
| 651 | + i = nRight; | |
| 652 | + j = nLeft; | |
| 653 | + k = (nRight+1)*(nLeft+1)-1; | |
| 654 | + while( i+j>0 ){ | |
| 655 | + unsigned char c = aM[k--]; | |
| 656 | + if( c==2 ){ | |
| 657 | + assert( i>0 && j>0 ); | |
| 658 | + i--; | |
| 659 | + j--; | |
| 660 | + }else if( c==3 ){ | |
| 661 | + assert( i>0 ); | |
| 662 | + i--; | |
| 663 | + }else{ | |
| 664 | + assert( j>0 ); | |
| 665 | + j--; | |
| 666 | + } | |
| 667 | + aM[k] = aM[j*(nRight+1)+i]; | |
| 668 | + } | |
| 669 | + k++; | |
| 670 | + i = (nRight+1)*(nLeft+1) - k; | |
| 671 | + memmove(aM, &aM[k], i); | |
| 672 | + | |
| 673 | + /* Return the result */ | |
| 674 | + fossil_free(pToFree); | |
| 675 | + return aM; | |
| 676 | +} | |
| 607 | 677 | |
| 608 | 678 | /* |
| 609 | 679 | ** Given a diff context in which the aEdit[] array has been filled |
| 610 | 680 | ** in, compute a side-by-side diff into pOut. |
| 611 | 681 | */ |
| @@ -699,48 +769,52 @@ | ||
| 699 | 769 | a += m; |
| 700 | 770 | b += m; |
| 701 | 771 | |
| 702 | 772 | /* Show the differences */ |
| 703 | 773 | for(i=0; i<nr; i++){ |
| 774 | + unsigned char *alignment; | |
| 704 | 775 | ma = R[r+i*3+1]; /* Lines on left but not on right */ |
| 705 | 776 | mb = R[r+i*3+2]; /* Lines on right but not on left */ |
| 706 | - while( ma+mb>0 ){ | |
| 707 | - if( ma<mb && (ma==0 || | |
| 708 | - match_dline(&A[a],&B[b]) < match_dline(&A[a],&B[b+1]) ) ){ | |
| 777 | + alignment = sbsAlignment(&A[a], ma, &B[b], mb); | |
| 778 | + for(j=0; ma+mb>0; j++){ | |
| 779 | + if( alignment[j]==1 ){ | |
| 780 | + s.n = 0; | |
| 781 | + sbsWriteLineno(&s, a); | |
| 782 | + s.iStart = 0; | |
| 783 | + s.zStart = "<span class=\"diffrm\">"; | |
| 784 | + s.iEnd = s.width; | |
| 785 | + sbsWriteText(&s, &A[a], SBS_PAD); | |
| 786 | + sbsWrite(&s, " <\n", 3); | |
| 787 | + blob_append(pOut, s.zLine, s.n); | |
| 788 | + assert( ma>0 ); | |
| 789 | + ma--; | |
| 790 | + a++; | |
| 791 | + }else if( alignment[j]==2 ){ | |
| 792 | + s.n = 0; | |
| 793 | + sbsWriteLineChange(&s, &A[a], a, &B[b], b); | |
| 794 | + blob_append(pOut, s.zLine, s.n); | |
| 795 | + assert( ma>0 && mb>0 ); | |
| 796 | + ma--; | |
| 797 | + mb--; | |
| 798 | + a++; | |
| 799 | + b++; | |
| 800 | + }else{ | |
| 709 | 801 | s.n = 0; |
| 710 | 802 | sbsWriteSpace(&s, width + 7); |
| 711 | 803 | sbsWrite(&s, " > ", 3); |
| 712 | 804 | sbsWriteLineno(&s, b); |
| 713 | 805 | s.iStart = 0; |
| 714 | 806 | s.zStart = "<span class=\"diffadd\">"; |
| 715 | 807 | s.iEnd = s.width; |
| 716 | 808 | sbsWriteText(&s, &B[b], SBS_NEWLINE); |
| 717 | 809 | blob_append(pOut, s.zLine, s.n); |
| 718 | - mb--; | |
| 719 | - b++; | |
| 720 | - }else if( ma>mb && (mb==0 || | |
| 721 | - match_dline(&A[a],&B[b]) < match_dline(&A[a+1],&B[b])) ){ | |
| 722 | - s.n = 0; | |
| 723 | - sbsWriteLineno(&s, a); | |
| 724 | - s.iStart = 0; | |
| 725 | - s.zStart = "<span class=\"diffrm\">"; | |
| 726 | - s.iEnd = s.width; | |
| 727 | - sbsWriteText(&s, &A[a], SBS_PAD); | |
| 728 | - sbsWrite(&s, " <\n", 3); | |
| 729 | - blob_append(pOut, s.zLine, s.n); | |
| 730 | - ma--; | |
| 731 | - a++; | |
| 732 | - }else{ | |
| 733 | - s.n = 0; | |
| 734 | - sbsWriteLineChange(&s, &A[a], a, &B[b], b); | |
| 735 | - blob_append(pOut, s.zLine, s.n); | |
| 736 | - ma--; | |
| 737 | - mb--; | |
| 738 | - a++; | |
| 739 | - b++; | |
| 740 | - } | |
| 741 | - } | |
| 810 | + assert( mb>0 ); | |
| 811 | + mb--; | |
| 812 | + b++; | |
| 813 | + } | |
| 814 | + } | |
| 815 | + fossil_free(alignment); | |
| 742 | 816 | if( i<nr-1 ){ |
| 743 | 817 | m = R[r+i*3+3]; |
| 744 | 818 | for(j=0; j<m; j++){ |
| 745 | 819 | s.n = 0; |
| 746 | 820 | sbsWriteLineno(&s, a+j); |
| @@ -1107,11 +1181,11 @@ | ||
| 1107 | 1181 | p->aEdit[r+3]++; |
| 1108 | 1182 | cpy--; |
| 1109 | 1183 | } |
| 1110 | 1184 | |
| 1111 | 1185 | /* Shift insertions toward the end of the file */ |
| 1112 | - while( p->aEdit[r+3]>0 && del==0 && ins>0 ){ | |
| 1186 | + while( r+3<p->nEdit && p->aEdit[r+3]>0 && del==0 && ins>0 ){ | |
| 1113 | 1187 | DLine *pTop = &p->aTo[lnTo]; /* First line inserted */ |
| 1114 | 1188 | DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */ |
| 1115 | 1189 | if( same_dline(pTop, pBtm)==0 ) break; |
| 1116 | 1190 | if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break; |
| 1117 | 1191 | lnFrom++; |
| @@ -1133,11 +1207,11 @@ | ||
| 1133 | 1207 | p->aEdit[r+3]++; |
| 1134 | 1208 | cpy--; |
| 1135 | 1209 | } |
| 1136 | 1210 | |
| 1137 | 1211 | /* Shift deletions toward the end of the file */ |
| 1138 | - while( p->aEdit[r+3]>0 && del>0 && ins==0 ){ | |
| 1212 | + while( r+3<p->nEdit && p->aEdit[r+3]>0 && del>0 && ins==0 ){ | |
| 1139 | 1213 | DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */ |
| 1140 | 1214 | DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */ |
| 1141 | 1215 | if( same_dline(pTop, pBtm)==0 ) break; |
| 1142 | 1216 | if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break; |
| 1143 | 1217 | lnFrom++; |
| 1144 | 1218 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -162,82 +162,10 @@ | |
| 162 | ** Return true if two DLine elements are identical. |
| 163 | */ |
| 164 | static int same_dline(DLine *pA, DLine *pB){ |
| 165 | return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; |
| 166 | } |
| 167 | |
| 168 | /* |
| 169 | ** Return the number which is larger the closer pA and pB match. |
| 170 | ** |
| 171 | ** The number returned of characters that match in pA and pB. The |
| 172 | ** number if artifically reduced if neither pA nor pB match very well. |
| 173 | */ |
| 174 | static int match_dline(DLine *pA, DLine *pB){ |
| 175 | int *pToFree; |
| 176 | int *a; |
| 177 | const char *zA; |
| 178 | const char *zB; |
| 179 | int nA; |
| 180 | int nB; |
| 181 | int minDist; |
| 182 | int i, j, dist; |
| 183 | int aStatic[200]; |
| 184 | |
| 185 | zA = pA->z; |
| 186 | zB = pB->z; |
| 187 | nA = pA->h & LENGTH_MASK; |
| 188 | nB = pB->h & LENGTH_MASK; |
| 189 | minDist = nA; |
| 190 | if( minDist>nB ) minDist = nB; |
| 191 | minDist = (minDist)/2; |
| 192 | dist = 0; |
| 193 | |
| 194 | /* Remove any common prefix and suffix */ |
| 195 | while( nA && nB && zA[0]==zB[0] ){ |
| 196 | nA--; |
| 197 | nB--; |
| 198 | zA++; |
| 199 | zB++; |
| 200 | dist++; |
| 201 | } |
| 202 | while( nA && nB && zA[nA-1]==zB[nB-1] ){ |
| 203 | nA--; |
| 204 | nB--; |
| 205 | dist++; |
| 206 | } |
| 207 | |
| 208 | /* Early out if one or the other string is empty */ |
| 209 | if( nA==0 ) return dist; |
| 210 | if( nB==0 ) return dist; |
| 211 | |
| 212 | /* Allocate space of the dynamic programming array */ |
| 213 | if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){ |
| 214 | pToFree = 0; |
| 215 | a = aStatic; |
| 216 | }else{ |
| 217 | pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) ); |
| 218 | } |
| 219 | |
| 220 | /* Compute the length best sequence of matching characters */ |
| 221 | for(i=0; i<=nB; i++) a[i] = 0; |
| 222 | for(j=0; j<nA; j++){ |
| 223 | int p = 0; |
| 224 | for(i=0; i<nB; i++){ |
| 225 | int m = a[i]; |
| 226 | if( m<a[i+1] ) m = a[i+1]; |
| 227 | if( m<p+1 && zA[j]==zB[i] ) m = p+1; |
| 228 | p = a[i+1]; |
| 229 | a[i+1] = m; |
| 230 | } |
| 231 | } |
| 232 | dist += a[nB]; |
| 233 | |
| 234 | /* Return the result */ |
| 235 | fossil_free(pToFree); |
| 236 | return dist>minDist ? dist : 0; |
| 237 | } |
| 238 | |
| 239 | |
| 240 | /* |
| 241 | ** Append a single line of context-diff output to pOut. |
| 242 | */ |
| 243 | static void appendDiffLine( |
| @@ -602,10 +530,152 @@ | |
| 602 | p->iEnd = nRight - nSuffix; |
| 603 | sbsWriteText(p, pRight, SBS_NEWLINE); |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | |
| 608 | /* |
| 609 | ** Given a diff context in which the aEdit[] array has been filled |
| 610 | ** in, compute a side-by-side diff into pOut. |
| 611 | */ |
| @@ -699,48 +769,52 @@ | |
| 699 | a += m; |
| 700 | b += m; |
| 701 | |
| 702 | /* Show the differences */ |
| 703 | for(i=0; i<nr; i++){ |
| 704 | ma = R[r+i*3+1]; /* Lines on left but not on right */ |
| 705 | mb = R[r+i*3+2]; /* Lines on right but not on left */ |
| 706 | while( ma+mb>0 ){ |
| 707 | if( ma<mb && (ma==0 || |
| 708 | match_dline(&A[a],&B[b]) < match_dline(&A[a],&B[b+1]) ) ){ |
| 709 | s.n = 0; |
| 710 | sbsWriteSpace(&s, width + 7); |
| 711 | sbsWrite(&s, " > ", 3); |
| 712 | sbsWriteLineno(&s, b); |
| 713 | s.iStart = 0; |
| 714 | s.zStart = "<span class=\"diffadd\">"; |
| 715 | s.iEnd = s.width; |
| 716 | sbsWriteText(&s, &B[b], SBS_NEWLINE); |
| 717 | blob_append(pOut, s.zLine, s.n); |
| 718 | mb--; |
| 719 | b++; |
| 720 | }else if( ma>mb && (mb==0 || |
| 721 | match_dline(&A[a],&B[b]) < match_dline(&A[a+1],&B[b])) ){ |
| 722 | s.n = 0; |
| 723 | sbsWriteLineno(&s, a); |
| 724 | s.iStart = 0; |
| 725 | s.zStart = "<span class=\"diffrm\">"; |
| 726 | s.iEnd = s.width; |
| 727 | sbsWriteText(&s, &A[a], SBS_PAD); |
| 728 | sbsWrite(&s, " <\n", 3); |
| 729 | blob_append(pOut, s.zLine, s.n); |
| 730 | ma--; |
| 731 | a++; |
| 732 | }else{ |
| 733 | s.n = 0; |
| 734 | sbsWriteLineChange(&s, &A[a], a, &B[b], b); |
| 735 | blob_append(pOut, s.zLine, s.n); |
| 736 | ma--; |
| 737 | mb--; |
| 738 | a++; |
| 739 | b++; |
| 740 | } |
| 741 | } |
| 742 | if( i<nr-1 ){ |
| 743 | m = R[r+i*3+3]; |
| 744 | for(j=0; j<m; j++){ |
| 745 | s.n = 0; |
| 746 | sbsWriteLineno(&s, a+j); |
| @@ -1107,11 +1181,11 @@ | |
| 1107 | p->aEdit[r+3]++; |
| 1108 | cpy--; |
| 1109 | } |
| 1110 | |
| 1111 | /* Shift insertions toward the end of the file */ |
| 1112 | while( p->aEdit[r+3]>0 && del==0 && ins>0 ){ |
| 1113 | DLine *pTop = &p->aTo[lnTo]; /* First line inserted */ |
| 1114 | DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */ |
| 1115 | if( same_dline(pTop, pBtm)==0 ) break; |
| 1116 | if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break; |
| 1117 | lnFrom++; |
| @@ -1133,11 +1207,11 @@ | |
| 1133 | p->aEdit[r+3]++; |
| 1134 | cpy--; |
| 1135 | } |
| 1136 | |
| 1137 | /* Shift deletions toward the end of the file */ |
| 1138 | while( p->aEdit[r+3]>0 && del>0 && ins==0 ){ |
| 1139 | DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */ |
| 1140 | DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */ |
| 1141 | if( same_dline(pTop, pBtm)==0 ) break; |
| 1142 | if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break; |
| 1143 | lnFrom++; |
| 1144 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -162,82 +162,10 @@ | |
| 162 | ** Return true if two DLine elements are identical. |
| 163 | */ |
| 164 | static int same_dline(DLine *pA, DLine *pB){ |
| 165 | return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; |
| 166 | } |
| 167 | |
| 168 | /* |
| 169 | ** Append a single line of context-diff output to pOut. |
| 170 | */ |
| 171 | static void appendDiffLine( |
| @@ -602,10 +530,152 @@ | |
| 530 | p->iEnd = nRight - nSuffix; |
| 531 | sbsWriteText(p, pRight, SBS_NEWLINE); |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | /* |
| 536 | ** Return the number between 0 and 100 that is smaller the closer pA and |
| 537 | ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are |
| 538 | ** completely different. |
| 539 | */ |
| 540 | static int match_dline(DLine *pA, DLine *pB){ |
| 541 | const char *zA; |
| 542 | const char *zB; |
| 543 | int nA; |
| 544 | int nB; |
| 545 | int minDist; |
| 546 | int i; |
| 547 | int nMatch; |
| 548 | int score; |
| 549 | |
| 550 | zA = pA->z; |
| 551 | zB = pB->z; |
| 552 | nA = pA->h & LENGTH_MASK; |
| 553 | nB = pB->h & LENGTH_MASK; |
| 554 | while( nA>0 && fossil_isspace(zA[0]) ){ nA--; zA++; } |
| 555 | while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; } |
| 556 | while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; } |
| 557 | while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; } |
| 558 | minDist = nA; |
| 559 | if( minDist>nB ) minDist = nB; |
| 560 | if( minDist==0 ){ |
| 561 | score = 100; |
| 562 | }else{ |
| 563 | for(i=0; i<nA && i<nB && zA[i]==zB[i]; i++){} |
| 564 | nMatch = i; |
| 565 | for(i=1; i<=nA && i<=nB && zA[nA-i]==zB[nB-i]; i++){} |
| 566 | nMatch += i-1; |
| 567 | score = (nMatch >= minDist) ? 0 : ((minDist - nMatch)*100)/minDist; |
| 568 | } |
| 569 | return score; |
| 570 | } |
| 571 | |
| 572 | /* |
| 573 | ** There is a change block in which nLeft lines of text on the left are |
| 574 | ** converted into nRight lines of text on the right. This routine computes |
| 575 | ** how the lines on the left line up with the lines on the right. |
| 576 | ** |
| 577 | ** The return value is a buffer of unsigned characters, obtained from |
| 578 | ** fossil_malloc(). (The caller needs to free the return value using |
| 579 | ** fossil_free().) Entries in the returned array have values as follows: |
| 580 | ** |
| 581 | ** 1. Delete the next line of pLeft. |
| 582 | ** 2. The next line of pLeft changes into the next line of pRight. |
| 583 | ** 3. Insert the next line of pRight. |
| 584 | ** |
| 585 | ** The length of the returned array will be just large enough to cause |
| 586 | ** all elements of pLeft and pRight to be consumed. |
| 587 | ** |
| 588 | ** Algorithm: Wagner's minimum edit-distance algorithm, modified by |
| 589 | ** adding a cost to each match based on how well the two rows match |
| 590 | ** each other. Insertion and deletion costs are 50. Match costs |
| 591 | ** are between 0 and 100 where 0 is a perfect match 100 is a complete |
| 592 | ** mismatch. |
| 593 | */ |
| 594 | static unsigned char *sbsAlignment( |
| 595 | DLine *aLeft, int nLeft, /* Text on the left */ |
| 596 | DLine *aRight, int nRight /* Text on the right */ |
| 597 | ){ |
| 598 | int i, j, k; /* Loop counters */ |
| 599 | int *a; /* One row of the Wagner matrix */ |
| 600 | int *pToFree; /* Space that needs to be freed */ |
| 601 | unsigned char *aM; /* Wagner result matrix */ |
| 602 | int aBuf[100]; /* Stack space for a[] if nRight not to big */ |
| 603 | |
| 604 | aM = fossil_malloc( (nLeft+1)*(nRight+1) ); |
| 605 | if( nLeft==0 ){ |
| 606 | memset(aM, 3, nRight); |
| 607 | return aM; |
| 608 | } |
| 609 | if( nRight==0 ){ |
| 610 | memset(aM, 1, nLeft); |
| 611 | return aM; |
| 612 | } |
| 613 | if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){ |
| 614 | pToFree = 0; |
| 615 | a = aBuf; |
| 616 | }else{ |
| 617 | a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) ); |
| 618 | } |
| 619 | |
| 620 | /* Compute the best alignment */ |
| 621 | for(i=0; i<=nRight; i++){ |
| 622 | aM[i] = 3; |
| 623 | a[i] = i*50; |
| 624 | } |
| 625 | aM[0] = 0; |
| 626 | for(j=1; j<=nLeft; j++){ |
| 627 | int p = a[0]; |
| 628 | a[0] = p+50; |
| 629 | aM[j*(nRight+1)] = 1; |
| 630 | for(i=1; i<=nRight; i++){ |
| 631 | int m = a[i-1]+50; |
| 632 | int d = 3; |
| 633 | if( m>a[i]+50 ){ |
| 634 | m = a[i]+50; |
| 635 | d = 1; |
| 636 | } |
| 637 | if( m>p ){ |
| 638 | int score = match_dline(&aLeft[j-1], &aRight[i-1]); |
| 639 | if( score<50 && m>p+score ){ |
| 640 | m = p+score; |
| 641 | d = 2; |
| 642 | } |
| 643 | } |
| 644 | p = a[i]; |
| 645 | a[i] = m; |
| 646 | aM[j*(nRight+1)+i] = d; |
| 647 | } |
| 648 | } |
| 649 | |
| 650 | /* Compute the lowest-cost path back through the matrix */ |
| 651 | i = nRight; |
| 652 | j = nLeft; |
| 653 | k = (nRight+1)*(nLeft+1)-1; |
| 654 | while( i+j>0 ){ |
| 655 | unsigned char c = aM[k--]; |
| 656 | if( c==2 ){ |
| 657 | assert( i>0 && j>0 ); |
| 658 | i--; |
| 659 | j--; |
| 660 | }else if( c==3 ){ |
| 661 | assert( i>0 ); |
| 662 | i--; |
| 663 | }else{ |
| 664 | assert( j>0 ); |
| 665 | j--; |
| 666 | } |
| 667 | aM[k] = aM[j*(nRight+1)+i]; |
| 668 | } |
| 669 | k++; |
| 670 | i = (nRight+1)*(nLeft+1) - k; |
| 671 | memmove(aM, &aM[k], i); |
| 672 | |
| 673 | /* Return the result */ |
| 674 | fossil_free(pToFree); |
| 675 | return aM; |
| 676 | } |
| 677 | |
| 678 | /* |
| 679 | ** Given a diff context in which the aEdit[] array has been filled |
| 680 | ** in, compute a side-by-side diff into pOut. |
| 681 | */ |
| @@ -699,48 +769,52 @@ | |
| 769 | a += m; |
| 770 | b += m; |
| 771 | |
| 772 | /* Show the differences */ |
| 773 | for(i=0; i<nr; i++){ |
| 774 | unsigned char *alignment; |
| 775 | ma = R[r+i*3+1]; /* Lines on left but not on right */ |
| 776 | mb = R[r+i*3+2]; /* Lines on right but not on left */ |
| 777 | alignment = sbsAlignment(&A[a], ma, &B[b], mb); |
| 778 | for(j=0; ma+mb>0; j++){ |
| 779 | if( alignment[j]==1 ){ |
| 780 | s.n = 0; |
| 781 | sbsWriteLineno(&s, a); |
| 782 | s.iStart = 0; |
| 783 | s.zStart = "<span class=\"diffrm\">"; |
| 784 | s.iEnd = s.width; |
| 785 | sbsWriteText(&s, &A[a], SBS_PAD); |
| 786 | sbsWrite(&s, " <\n", 3); |
| 787 | blob_append(pOut, s.zLine, s.n); |
| 788 | assert( ma>0 ); |
| 789 | ma--; |
| 790 | a++; |
| 791 | }else if( alignment[j]==2 ){ |
| 792 | s.n = 0; |
| 793 | sbsWriteLineChange(&s, &A[a], a, &B[b], b); |
| 794 | blob_append(pOut, s.zLine, s.n); |
| 795 | assert( ma>0 && mb>0 ); |
| 796 | ma--; |
| 797 | mb--; |
| 798 | a++; |
| 799 | b++; |
| 800 | }else{ |
| 801 | s.n = 0; |
| 802 | sbsWriteSpace(&s, width + 7); |
| 803 | sbsWrite(&s, " > ", 3); |
| 804 | sbsWriteLineno(&s, b); |
| 805 | s.iStart = 0; |
| 806 | s.zStart = "<span class=\"diffadd\">"; |
| 807 | s.iEnd = s.width; |
| 808 | sbsWriteText(&s, &B[b], SBS_NEWLINE); |
| 809 | blob_append(pOut, s.zLine, s.n); |
| 810 | assert( mb>0 ); |
| 811 | mb--; |
| 812 | b++; |
| 813 | } |
| 814 | } |
| 815 | fossil_free(alignment); |
| 816 | if( i<nr-1 ){ |
| 817 | m = R[r+i*3+3]; |
| 818 | for(j=0; j<m; j++){ |
| 819 | s.n = 0; |
| 820 | sbsWriteLineno(&s, a+j); |
| @@ -1107,11 +1181,11 @@ | |
| 1181 | p->aEdit[r+3]++; |
| 1182 | cpy--; |
| 1183 | } |
| 1184 | |
| 1185 | /* Shift insertions toward the end of the file */ |
| 1186 | while( r+3<p->nEdit && p->aEdit[r+3]>0 && del==0 && ins>0 ){ |
| 1187 | DLine *pTop = &p->aTo[lnTo]; /* First line inserted */ |
| 1188 | DLine *pBtm = &p->aTo[lnTo+ins]; /* First line past end of insert */ |
| 1189 | if( same_dline(pTop, pBtm)==0 ) break; |
| 1190 | if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop+1)+LENGTH(pBtm) ) break; |
| 1191 | lnFrom++; |
| @@ -1133,11 +1207,11 @@ | |
| 1207 | p->aEdit[r+3]++; |
| 1208 | cpy--; |
| 1209 | } |
| 1210 | |
| 1211 | /* Shift deletions toward the end of the file */ |
| 1212 | while( r+3<p->nEdit && p->aEdit[r+3]>0 && del>0 && ins==0 ){ |
| 1213 | DLine *pTop = &p->aFrom[lnFrom]; /* First line deleted */ |
| 1214 | DLine *pBtm = &p->aFrom[lnFrom+del]; /* First line past end of delete */ |
| 1215 | if( same_dline(pTop, pBtm)==0 ) break; |
| 1216 | if( LENGTH(pTop)+LENGTH(pBtm-1)<=LENGTH(pTop)+LENGTH(pBtm) ) break; |
| 1217 | lnFrom++; |
| 1218 |