Fossil SCM
Convert the similarity measure for side-by-side diff alignment to use LCS instead of edit distance. LCS is faster and gives comparable results.
Commit
469462b69a560074cac7701f0d46cc30086e698e
Parent
8f85286cffef823…
1 file changed
+11
-47
+11
-47
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -536,19 +536,16 @@ | ||
| 536 | 536 | ** Return the number between 0 and 100 that is smaller the closer pA and |
| 537 | 537 | ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are |
| 538 | 538 | ** completely different. |
| 539 | 539 | */ |
| 540 | 540 | static int match_dline(DLine *pA, DLine *pB){ |
| 541 | - int *pToFree; | |
| 542 | - int *a; | |
| 543 | 541 | const char *zA; |
| 544 | 542 | const char *zB; |
| 545 | 543 | int nA; |
| 546 | 544 | int nB; |
| 547 | 545 | int avg; |
| 548 | - int i, j, dist, score; | |
| 549 | - int aStatic[200]; | |
| 546 | + int i, j, k, best, score; | |
| 550 | 547 | |
| 551 | 548 | zA = pA->z; |
| 552 | 549 | zB = pB->z; |
| 553 | 550 | nA = pA->h & LENGTH_MASK; |
| 554 | 551 | nB = pB->h & LENGTH_MASK; |
| @@ -556,57 +553,24 @@ | ||
| 556 | 553 | while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; } |
| 557 | 554 | while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; } |
| 558 | 555 | while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; } |
| 559 | 556 | avg = (nA+nB)/2; |
| 560 | 557 | if( avg==0 ) return 100; |
| 561 | - dist = 0; | |
| 562 | - | |
| 563 | - /* Remove any common prefix and suffix */ | |
| 564 | - while( nA && nB && zA[0]==zB[0] ){ | |
| 565 | - nA--; | |
| 566 | - nB--; | |
| 567 | - zA++; | |
| 568 | - zB++; | |
| 569 | - dist++; | |
| 570 | - } | |
| 571 | - while( nA && nB && zA[nA-1]==zB[nB-1] ){ | |
| 572 | - nA--; | |
| 573 | - nB--; | |
| 574 | - dist++; | |
| 575 | - } | |
| 576 | - | |
| 577 | - if( nA>0 && nB>0 ){ | |
| 578 | - /* Allocate space of the dynamic programming array */ | |
| 579 | - if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){ | |
| 580 | - pToFree = 0; | |
| 581 | - a = aStatic; | |
| 582 | - }else{ | |
| 583 | - pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) ); | |
| 584 | - } | |
| 585 | - | |
| 586 | - /* Compute the length of the best sequence of matching characters */ | |
| 587 | - for(i=0; i<=nB; i++) a[i] = 0; | |
| 588 | - for(j=0; j<nA; j++){ | |
| 589 | - int p = 0; | |
| 590 | - for(i=0; i<nB; i++){ | |
| 591 | - int m = a[i]; | |
| 592 | - if( m<a[i+1] ) m = a[i+1]; | |
| 593 | - if( m<p+1 && zA[j]==zB[i] ) m = p+1; | |
| 594 | - p = a[i+1]; | |
| 595 | - a[i+1] = m; | |
| 596 | - } | |
| 597 | - } | |
| 598 | - dist += a[nB]; | |
| 599 | - fossil_free(pToFree); | |
| 600 | - } | |
| 601 | - score = dist>avg ? 0 : (avg - dist)*100/avg; | |
| 558 | + best = 0; | |
| 559 | + for(i=0; i<nA-best; i++){ | |
| 560 | + char c = zA[i]; | |
| 561 | + for(j=0; j<nB-best; j++){ | |
| 562 | + if( c!=zB[j] ) continue; | |
| 563 | + for(k=1; k+i<nA && k+j<nB && zA[k+i]==zB[k+j]; k++){} | |
| 564 | + if( k>best ) best = k; | |
| 565 | + } | |
| 566 | + } | |
| 567 | + score = (best>avg) ? 0 : (avg - best)*100/avg; | |
| 602 | 568 | |
| 603 | 569 | /* Return the result */ |
| 604 | 570 | return score; |
| 605 | 571 | } |
| 606 | - | |
| 607 | - | |
| 608 | 572 | |
| 609 | 573 | /* |
| 610 | 574 | ** There is a change block in which nLeft lines of text on the left are |
| 611 | 575 | ** converted into nRight lines of text on the right. This routine computes |
| 612 | 576 | ** how the lines on the left line up with the lines on the right. |
| 613 | 577 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -536,19 +536,16 @@ | |
| 536 | ** Return the number between 0 and 100 that is smaller the closer pA and |
| 537 | ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are |
| 538 | ** completely different. |
| 539 | */ |
| 540 | static int match_dline(DLine *pA, DLine *pB){ |
| 541 | int *pToFree; |
| 542 | int *a; |
| 543 | const char *zA; |
| 544 | const char *zB; |
| 545 | int nA; |
| 546 | int nB; |
| 547 | int avg; |
| 548 | int i, j, dist, score; |
| 549 | int aStatic[200]; |
| 550 | |
| 551 | zA = pA->z; |
| 552 | zB = pB->z; |
| 553 | nA = pA->h & LENGTH_MASK; |
| 554 | nB = pB->h & LENGTH_MASK; |
| @@ -556,57 +553,24 @@ | |
| 556 | while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; } |
| 557 | while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; } |
| 558 | while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; } |
| 559 | avg = (nA+nB)/2; |
| 560 | if( avg==0 ) return 100; |
| 561 | dist = 0; |
| 562 | |
| 563 | /* Remove any common prefix and suffix */ |
| 564 | while( nA && nB && zA[0]==zB[0] ){ |
| 565 | nA--; |
| 566 | nB--; |
| 567 | zA++; |
| 568 | zB++; |
| 569 | dist++; |
| 570 | } |
| 571 | while( nA && nB && zA[nA-1]==zB[nB-1] ){ |
| 572 | nA--; |
| 573 | nB--; |
| 574 | dist++; |
| 575 | } |
| 576 | |
| 577 | if( nA>0 && nB>0 ){ |
| 578 | /* Allocate space of the dynamic programming array */ |
| 579 | if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){ |
| 580 | pToFree = 0; |
| 581 | a = aStatic; |
| 582 | }else{ |
| 583 | pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) ); |
| 584 | } |
| 585 | |
| 586 | /* Compute the length of the best sequence of matching characters */ |
| 587 | for(i=0; i<=nB; i++) a[i] = 0; |
| 588 | for(j=0; j<nA; j++){ |
| 589 | int p = 0; |
| 590 | for(i=0; i<nB; i++){ |
| 591 | int m = a[i]; |
| 592 | if( m<a[i+1] ) m = a[i+1]; |
| 593 | if( m<p+1 && zA[j]==zB[i] ) m = p+1; |
| 594 | p = a[i+1]; |
| 595 | a[i+1] = m; |
| 596 | } |
| 597 | } |
| 598 | dist += a[nB]; |
| 599 | fossil_free(pToFree); |
| 600 | } |
| 601 | score = dist>avg ? 0 : (avg - dist)*100/avg; |
| 602 | |
| 603 | /* Return the result */ |
| 604 | return score; |
| 605 | } |
| 606 | |
| 607 | |
| 608 | |
| 609 | /* |
| 610 | ** There is a change block in which nLeft lines of text on the left are |
| 611 | ** converted into nRight lines of text on the right. This routine computes |
| 612 | ** how the lines on the left line up with the lines on the right. |
| 613 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -536,19 +536,16 @@ | |
| 536 | ** Return the number between 0 and 100 that is smaller the closer pA and |
| 537 | ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are |
| 538 | ** completely different. |
| 539 | */ |
| 540 | static int match_dline(DLine *pA, DLine *pB){ |
| 541 | const char *zA; |
| 542 | const char *zB; |
| 543 | int nA; |
| 544 | int nB; |
| 545 | int avg; |
| 546 | int i, j, k, best, score; |
| 547 | |
| 548 | zA = pA->z; |
| 549 | zB = pB->z; |
| 550 | nA = pA->h & LENGTH_MASK; |
| 551 | nB = pB->h & LENGTH_MASK; |
| @@ -556,57 +553,24 @@ | |
| 553 | while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; } |
| 554 | while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; } |
| 555 | while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; } |
| 556 | avg = (nA+nB)/2; |
| 557 | if( avg==0 ) return 100; |
| 558 | best = 0; |
| 559 | for(i=0; i<nA-best; i++){ |
| 560 | char c = zA[i]; |
| 561 | for(j=0; j<nB-best; j++){ |
| 562 | if( c!=zB[j] ) continue; |
| 563 | for(k=1; k+i<nA && k+j<nB && zA[k+i]==zB[k+j]; k++){} |
| 564 | if( k>best ) best = k; |
| 565 | } |
| 566 | } |
| 567 | score = (best>avg) ? 0 : (avg - best)*100/avg; |
| 568 | |
| 569 | /* Return the result */ |
| 570 | return score; |
| 571 | } |
| 572 | |
| 573 | /* |
| 574 | ** There is a change block in which nLeft lines of text on the left are |
| 575 | ** converted into nRight lines of text on the right. This routine computes |
| 576 | ** how the lines on the left line up with the lines on the right. |
| 577 |