Fossil SCM

Convert the similarity measure for side-by-side diff alignment to use LCS instead of edit distance. LCS is faster and gives comparable results.

drh 2012-02-07 03:57 trunk
Commit 469462b69a560074cac7701f0d46cc30086e698e
1 file changed +11 -47
+11 -47
--- src/diff.c
+++ src/diff.c
@@ -536,19 +536,16 @@
536536
** Return the number between 0 and 100 that is smaller the closer pA and
537537
** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
538538
** completely different.
539539
*/
540540
static int match_dline(DLine *pA, DLine *pB){
541
- int *pToFree;
542
- int *a;
543541
const char *zA;
544542
const char *zB;
545543
int nA;
546544
int nB;
547545
int avg;
548
- int i, j, dist, score;
549
- int aStatic[200];
546
+ int i, j, k, best, score;
550547
551548
zA = pA->z;
552549
zB = pB->z;
553550
nA = pA->h & LENGTH_MASK;
554551
nB = pB->h & LENGTH_MASK;
@@ -556,57 +553,24 @@
556553
while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
557554
while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
558555
while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
559556
avg = (nA+nB)/2;
560557
if( avg==0 ) return 100;
561
- dist = 0;
562
-
563
- /* Remove any common prefix and suffix */
564
- while( nA && nB && zA[0]==zB[0] ){
565
- nA--;
566
- nB--;
567
- zA++;
568
- zB++;
569
- dist++;
570
- }
571
- while( nA && nB && zA[nA-1]==zB[nB-1] ){
572
- nA--;
573
- nB--;
574
- dist++;
575
- }
576
-
577
- if( nA>0 && nB>0 ){
578
- /* Allocate space of the dynamic programming array */
579
- if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){
580
- pToFree = 0;
581
- a = aStatic;
582
- }else{
583
- pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) );
584
- }
585
-
586
- /* Compute the length of the best sequence of matching characters */
587
- for(i=0; i<=nB; i++) a[i] = 0;
588
- for(j=0; j<nA; j++){
589
- int p = 0;
590
- for(i=0; i<nB; i++){
591
- int m = a[i];
592
- if( m<a[i+1] ) m = a[i+1];
593
- if( m<p+1 && zA[j]==zB[i] ) m = p+1;
594
- p = a[i+1];
595
- a[i+1] = m;
596
- }
597
- }
598
- dist += a[nB];
599
- fossil_free(pToFree);
600
- }
601
- score = dist>avg ? 0 : (avg - dist)*100/avg;
558
+ best = 0;
559
+ for(i=0; i<nA-best; i++){
560
+ char c = zA[i];
561
+ for(j=0; j<nB-best; j++){
562
+ if( c!=zB[j] ) continue;
563
+ for(k=1; k+i<nA && k+j<nB && zA[k+i]==zB[k+j]; k++){}
564
+ if( k>best ) best = k;
565
+ }
566
+ }
567
+ score = (best>avg) ? 0 : (avg - best)*100/avg;
602568
603569
/* Return the result */
604570
return score;
605571
}
606
-
607
-
608572
609573
/*
610574
** There is a change block in which nLeft lines of text on the left are
611575
** converted into nRight lines of text on the right. This routine computes
612576
** how the lines on the left line up with the lines on the right.
613577
--- src/diff.c
+++ src/diff.c
@@ -536,19 +536,16 @@
536 ** Return the number between 0 and 100 that is smaller the closer pA and
537 ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
538 ** completely different.
539 */
540 static int match_dline(DLine *pA, DLine *pB){
541 int *pToFree;
542 int *a;
543 const char *zA;
544 const char *zB;
545 int nA;
546 int nB;
547 int avg;
548 int i, j, dist, score;
549 int aStatic[200];
550
551 zA = pA->z;
552 zB = pB->z;
553 nA = pA->h & LENGTH_MASK;
554 nB = pB->h & LENGTH_MASK;
@@ -556,57 +553,24 @@
556 while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
557 while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
558 while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
559 avg = (nA+nB)/2;
560 if( avg==0 ) return 100;
561 dist = 0;
562
563 /* Remove any common prefix and suffix */
564 while( nA && nB && zA[0]==zB[0] ){
565 nA--;
566 nB--;
567 zA++;
568 zB++;
569 dist++;
570 }
571 while( nA && nB && zA[nA-1]==zB[nB-1] ){
572 nA--;
573 nB--;
574 dist++;
575 }
576
577 if( nA>0 && nB>0 ){
578 /* Allocate space of the dynamic programming array */
579 if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){
580 pToFree = 0;
581 a = aStatic;
582 }else{
583 pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) );
584 }
585
586 /* Compute the length of the best sequence of matching characters */
587 for(i=0; i<=nB; i++) a[i] = 0;
588 for(j=0; j<nA; j++){
589 int p = 0;
590 for(i=0; i<nB; i++){
591 int m = a[i];
592 if( m<a[i+1] ) m = a[i+1];
593 if( m<p+1 && zA[j]==zB[i] ) m = p+1;
594 p = a[i+1];
595 a[i+1] = m;
596 }
597 }
598 dist += a[nB];
599 fossil_free(pToFree);
600 }
601 score = dist>avg ? 0 : (avg - dist)*100/avg;
602
603 /* Return the result */
604 return score;
605 }
606
607
608
609 /*
610 ** There is a change block in which nLeft lines of text on the left are
611 ** converted into nRight lines of text on the right. This routine computes
612 ** how the lines on the left line up with the lines on the right.
613
--- src/diff.c
+++ src/diff.c
@@ -536,19 +536,16 @@
536 ** Return the number between 0 and 100 that is smaller the closer pA and
537 ** pB match. Return 0 for a perfect match. Return 100 if pA and pB are
538 ** completely different.
539 */
540 static int match_dline(DLine *pA, DLine *pB){
 
 
541 const char *zA;
542 const char *zB;
543 int nA;
544 int nB;
545 int avg;
546 int i, j, k, best, score;
 
547
548 zA = pA->z;
549 zB = pB->z;
550 nA = pA->h & LENGTH_MASK;
551 nB = pB->h & LENGTH_MASK;
@@ -556,57 +553,24 @@
553 while( nA>0 && fossil_isspace(zA[nA-1]) ){ nA--; }
554 while( nB>0 && fossil_isspace(zB[0]) ){ nB--; zB++; }
555 while( nB>0 && fossil_isspace(zB[nB-1]) ){ nB--; }
556 avg = (nA+nB)/2;
557 if( avg==0 ) return 100;
558 best = 0;
559 for(i=0; i<nA-best; i++){
560 char c = zA[i];
561 for(j=0; j<nB-best; j++){
562 if( c!=zB[j] ) continue;
563 for(k=1; k+i<nA && k+j<nB && zA[k+i]==zB[k+j]; k++){}
564 if( k>best ) best = k;
565 }
566 }
567 score = (best>avg) ? 0 : (avg - best)*100/avg;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
569 /* Return the result */
570 return score;
571 }
 
 
572
573 /*
574 ** There is a change block in which nLeft lines of text on the left are
575 ** converted into nRight lines of text on the right. This routine computes
576 ** how the lines on the left line up with the lines on the right.
577

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button