Fossil SCM

Trying out a greedy algorithm for aligning the two sides of a change with side-by-side diff. This helps in some cases, but we could probably benefit from a better algorithm.

drh 2012-02-06 01:55 UTC trunk
Commit 881b65141b48d7229c015e0e528eb4f091ddc66a
1 file changed +112 -37
+112 -37
--- src/diff.c
+++ src/diff.c
@@ -162,10 +162,82 @@
162162
** Return true if two DLine elements are identical.
163163
*/
164164
static int same_dline(DLine *pA, DLine *pB){
165165
return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
166166
}
167
+
168
+/*
169
+** Return the number which is larger the closer pA and pB match.
170
+**
171
+** The number returned of characters that match in pA and pB. The
172
+** number if artifically reduced if neither pA nor pB match very well.
173
+*/
174
+static int match_dline(DLine *pA, DLine *pB){
175
+ int *pToFree;
176
+ int *a;
177
+ const char *zA;
178
+ const char *zB;
179
+ int nA;
180
+ int nB;
181
+ int minDist;
182
+ int i, j, dist;
183
+ int aStatic[200];
184
+
185
+ zA = pA->z;
186
+ zB = pB->z;
187
+ nA = pA->h & LENGTH_MASK;
188
+ nB = pB->h & LENGTH_MASK;
189
+ minDist = nA;
190
+ if( minDist>nB ) minDist = nB;
191
+ minDist = (minDist)/2;
192
+ dist = 0;
193
+
194
+ /* Remove any common prefix and suffix */
195
+ while( nA && nB && zA[0]==zB[0] ){
196
+ nA--;
197
+ nB--;
198
+ zA++;
199
+ zB++;
200
+ dist++;
201
+ }
202
+ while( nA && nB && zA[nA-1]==zB[nB-1] ){
203
+ nA--;
204
+ nB--;
205
+ dist++;
206
+ }
207
+
208
+ /* Early out if one or the other string is empty */
209
+ if( nA==0 ) return dist;
210
+ if( nB==0 ) return dist;
211
+
212
+ /* Allocate space of the dynamic programming array */
213
+ if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){
214
+ pToFree = 0;
215
+ a = aStatic;
216
+ }else{
217
+ pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) );
218
+ }
219
+
220
+ /* Compute the length best sequence of matching characters */
221
+ for(i=0; i<=nB; i++) a[i] = 0;
222
+ for(j=0; j<nA; j++){
223
+ int p = 0;
224
+ for(i=0; i<nB; i++){
225
+ int m = a[i];
226
+ if( m<a[i+1] ) m = a[i+1];
227
+ if( m<p+1 && zA[j]==zB[i] ) m = p+1;
228
+ p = a[i+1];
229
+ a[i+1] = m;
230
+ }
231
+ }
232
+ dist += a[nB];
233
+
234
+ /* Return the result */
235
+ fossil_free(pToFree);
236
+ return dist>minDist ? dist : 0;
237
+}
238
+
167239
168240
/*
169241
** Append a single line of context-diff output to pOut.
170242
*/
171243
static void appendDiffLine(
@@ -538,47 +610,50 @@
538610
a += m;
539611
b += m;
540612
541613
/* Show the differences */
542614
for(i=0; i<nr; i++){
543
- ma = R[r+i*3+1];
544
- mb = R[r+i*3+2];
545
- m = ma<mb ? ma : mb;
546
- for(j=0; j<m; j++){
547
- s.n = 0;
548
- sbsWriteLineno(&s, a+j);
549
- sbsWriteHtml(&s, "<span class=\"diffchng\">");
550
- sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN);
551
- sbsWrite(&s, " | ", 3);
552
- sbsWriteLineno(&s, b+j);
553
- sbsWriteHtml(&s, "<span class=\"diffchng\">");
554
- sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN);
555
- blob_append(pOut, s.zLine, s.n);
556
- }
557
- a += m;
558
- b += m;
559
- ma -= m;
560
- mb -= m;
561
- for(j=0; j<ma; j++){
562
- s.n = 0;
563
- sbsWriteLineno(&s, a+j);
564
- sbsWriteHtml(&s, "<span class=\"diffrm\">");
565
- sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN);
566
- sbsWrite(&s, " <\n", 3);
567
- blob_append(pOut, s.zLine, s.n);
568
- }
569
- a += ma;
570
- for(j=0; j<mb; j++){
571
- s.n = 0;
572
- sbsWriteSpace(&s, width + 7);
573
- sbsWrite(&s, " > ", 3);
574
- sbsWriteLineno(&s, b+j);
575
- sbsWriteHtml(&s, "<span class=\"diffadd\">");
576
- sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN);
577
- blob_append(pOut, s.zLine, s.n);
578
- }
579
- b += mb;
615
+ ma = R[r+i*3+1]; /* Lines on left but not on right */
616
+ mb = R[r+i*3+2]; /* Lines on right but not on left */
617
+ while( ma+mb>0 ){
618
+ if( ma<mb && (ma==0 ||
619
+ match_dline(&A[a],&B[b]) < match_dline(&A[a],&B[b+1]) ) ){
620
+ s.n = 0;
621
+ sbsWriteSpace(&s, width + 7);
622
+ sbsWrite(&s, " > ", 3);
623
+ sbsWriteLineno(&s, b);
624
+ sbsWriteHtml(&s, "<span class=\"diffadd\">");
625
+ sbsWriteText(&s, &B[b], SBS_NEWLINE | SBS_ENDSPAN);
626
+ blob_append(pOut, s.zLine, s.n);
627
+ mb--;
628
+ b++;
629
+ }else if( ma>mb && (mb==0 ||
630
+ match_dline(&A[a],&B[b]) < match_dline(&A[a+1],&B[b])) ){
631
+ s.n = 0;
632
+ sbsWriteLineno(&s, a);
633
+ sbsWriteHtml(&s, "<span class=\"diffrm\">");
634
+ sbsWriteText(&s, &A[a], SBS_PAD | SBS_ENDSPAN);
635
+ sbsWrite(&s, " <\n", 3);
636
+ blob_append(pOut, s.zLine, s.n);
637
+ ma--;
638
+ a++;
639
+ }else{
640
+ s.n = 0;
641
+ sbsWriteLineno(&s, a);
642
+ sbsWriteHtml(&s, "<span class=\"diffchng\">");
643
+ sbsWriteText(&s, &A[a], SBS_PAD | SBS_ENDSPAN);
644
+ sbsWrite(&s, " | ", 3);
645
+ sbsWriteLineno(&s, b);
646
+ sbsWriteHtml(&s, "<span class=\"diffchng\">");
647
+ sbsWriteText(&s, &B[b], SBS_NEWLINE | SBS_ENDSPAN);
648
+ blob_append(pOut, s.zLine, s.n);
649
+ ma--;
650
+ mb--;
651
+ a++;
652
+ b++;
653
+ }
654
+ }
580655
if( i<nr-1 ){
581656
m = R[r+i*3+3];
582657
for(j=0; j<m; j++){
583658
s.n = 0;
584659
sbsWriteLineno(&s, a+j);
585660
--- src/diff.c
+++ src/diff.c
@@ -162,10 +162,82 @@
162 ** Return true if two DLine elements are identical.
163 */
164 static int same_dline(DLine *pA, DLine *pB){
165 return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
166 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
168 /*
169 ** Append a single line of context-diff output to pOut.
170 */
171 static void appendDiffLine(
@@ -538,47 +610,50 @@
538 a += m;
539 b += m;
540
541 /* Show the differences */
542 for(i=0; i<nr; i++){
543 ma = R[r+i*3+1];
544 mb = R[r+i*3+2];
545 m = ma<mb ? ma : mb;
546 for(j=0; j<m; j++){
547 s.n = 0;
548 sbsWriteLineno(&s, a+j);
549 sbsWriteHtml(&s, "<span class=\"diffchng\">");
550 sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN);
551 sbsWrite(&s, " | ", 3);
552 sbsWriteLineno(&s, b+j);
553 sbsWriteHtml(&s, "<span class=\"diffchng\">");
554 sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN);
555 blob_append(pOut, s.zLine, s.n);
556 }
557 a += m;
558 b += m;
559 ma -= m;
560 mb -= m;
561 for(j=0; j<ma; j++){
562 s.n = 0;
563 sbsWriteLineno(&s, a+j);
564 sbsWriteHtml(&s, "<span class=\"diffrm\">");
565 sbsWriteText(&s, &A[a+j], SBS_PAD | SBS_ENDSPAN);
566 sbsWrite(&s, " <\n", 3);
567 blob_append(pOut, s.zLine, s.n);
568 }
569 a += ma;
570 for(j=0; j<mb; j++){
571 s.n = 0;
572 sbsWriteSpace(&s, width + 7);
573 sbsWrite(&s, " > ", 3);
574 sbsWriteLineno(&s, b+j);
575 sbsWriteHtml(&s, "<span class=\"diffadd\">");
576 sbsWriteText(&s, &B[b+j], SBS_NEWLINE | SBS_ENDSPAN);
577 blob_append(pOut, s.zLine, s.n);
578 }
579 b += mb;
 
 
 
580 if( i<nr-1 ){
581 m = R[r+i*3+3];
582 for(j=0; j<m; j++){
583 s.n = 0;
584 sbsWriteLineno(&s, a+j);
585
--- src/diff.c
+++ src/diff.c
@@ -162,10 +162,82 @@
162 ** Return true if two DLine elements are identical.
163 */
164 static int same_dline(DLine *pA, DLine *pB){
165 return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
166 }
167
168 /*
169 ** Return the number which is larger the closer pA and pB match.
170 **
171 ** The number returned of characters that match in pA and pB. The
172 ** number if artifically reduced if neither pA nor pB match very well.
173 */
174 static int match_dline(DLine *pA, DLine *pB){
175 int *pToFree;
176 int *a;
177 const char *zA;
178 const char *zB;
179 int nA;
180 int nB;
181 int minDist;
182 int i, j, dist;
183 int aStatic[200];
184
185 zA = pA->z;
186 zB = pB->z;
187 nA = pA->h & LENGTH_MASK;
188 nB = pB->h & LENGTH_MASK;
189 minDist = nA;
190 if( minDist>nB ) minDist = nB;
191 minDist = (minDist)/2;
192 dist = 0;
193
194 /* Remove any common prefix and suffix */
195 while( nA && nB && zA[0]==zB[0] ){
196 nA--;
197 nB--;
198 zA++;
199 zB++;
200 dist++;
201 }
202 while( nA && nB && zA[nA-1]==zB[nB-1] ){
203 nA--;
204 nB--;
205 dist++;
206 }
207
208 /* Early out if one or the other string is empty */
209 if( nA==0 ) return dist;
210 if( nB==0 ) return dist;
211
212 /* Allocate space of the dynamic programming array */
213 if( nB<sizeof(aStatic)/sizeof(aStatic[0]) ){
214 pToFree = 0;
215 a = aStatic;
216 }else{
217 pToFree = a = fossil_malloc( (nB+1)*sizeof(a[0]) );
218 }
219
220 /* Compute the length best sequence of matching characters */
221 for(i=0; i<=nB; i++) a[i] = 0;
222 for(j=0; j<nA; j++){
223 int p = 0;
224 for(i=0; i<nB; i++){
225 int m = a[i];
226 if( m<a[i+1] ) m = a[i+1];
227 if( m<p+1 && zA[j]==zB[i] ) m = p+1;
228 p = a[i+1];
229 a[i+1] = m;
230 }
231 }
232 dist += a[nB];
233
234 /* Return the result */
235 fossil_free(pToFree);
236 return dist>minDist ? dist : 0;
237 }
238
239
240 /*
241 ** Append a single line of context-diff output to pOut.
242 */
243 static void appendDiffLine(
@@ -538,47 +610,50 @@
610 a += m;
611 b += m;
612
613 /* Show the differences */
614 for(i=0; i<nr; i++){
615 ma = R[r+i*3+1]; /* Lines on left but not on right */
616 mb = R[r+i*3+2]; /* Lines on right but not on left */
617 while( ma+mb>0 ){
618 if( ma<mb && (ma==0 ||
619 match_dline(&A[a],&B[b]) < match_dline(&A[a],&B[b+1]) ) ){
620 s.n = 0;
621 sbsWriteSpace(&s, width + 7);
622 sbsWrite(&s, " > ", 3);
623 sbsWriteLineno(&s, b);
624 sbsWriteHtml(&s, "<span class=\"diffadd\">");
625 sbsWriteText(&s, &B[b], SBS_NEWLINE | SBS_ENDSPAN);
626 blob_append(pOut, s.zLine, s.n);
627 mb--;
628 b++;
629 }else if( ma>mb && (mb==0 ||
630 match_dline(&A[a],&B[b]) < match_dline(&A[a+1],&B[b])) ){
631 s.n = 0;
632 sbsWriteLineno(&s, a);
633 sbsWriteHtml(&s, "<span class=\"diffrm\">");
634 sbsWriteText(&s, &A[a], SBS_PAD | SBS_ENDSPAN);
635 sbsWrite(&s, " <\n", 3);
636 blob_append(pOut, s.zLine, s.n);
637 ma--;
638 a++;
639 }else{
640 s.n = 0;
641 sbsWriteLineno(&s, a);
642 sbsWriteHtml(&s, "<span class=\"diffchng\">");
643 sbsWriteText(&s, &A[a], SBS_PAD | SBS_ENDSPAN);
644 sbsWrite(&s, " | ", 3);
645 sbsWriteLineno(&s, b);
646 sbsWriteHtml(&s, "<span class=\"diffchng\">");
647 sbsWriteText(&s, &B[b], SBS_NEWLINE | SBS_ENDSPAN);
648 blob_append(pOut, s.zLine, s.n);
649 ma--;
650 mb--;
651 a++;
652 b++;
653 }
654 }
655 if( i<nr-1 ){
656 m = R[r+i*3+3];
657 for(j=0; j<m; j++){
658 s.n = 0;
659 sbsWriteLineno(&s, a+j);
660

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button