Fossil SCM

Tweaks to the diff algorithm give a 4x performance increase. Now comparable to command-line diff.

drh 2008-02-04 13:53 trunk
Commit e8cf0061cc47c87521f9401c42c52fd40e664f25
1 file changed +70 -42
+70 -42
--- src/diff.c
+++ src/diff.c
@@ -305,19 +305,24 @@
305305
int iS1, int iE1,
306306
int iS2, int iE2,
307307
int *piSX, int *piEX,
308308
int *piSY, int *piEY
309309
){
310
- int bestScore = -1000000000;
311
- int i, j;
312
- int iSX, iSY, iEX, iEY;
313
- int score, skew, dist, mid;
314
-
315
- *piSX = iS1;
316
- *piEX = iS1;
317
- *piSY = iS2;
318
- *piEY = iS2;
310
+ double bestScore = -1e30; /* Best score seen so far */
311
+ int i, j; /* Loop counters */
312
+ int iSX, iSY, iEX, iEY; /* Current match */
313
+ double score; /* Current score */
314
+ int skew; /* How lopsided is the match */
315
+ int dist; /* Distance of match from center */
316
+ int mid; /* Center of the span */
317
+ int iSXb, iSYb, iEXb, iEYb; /* Best match so far */
318
+ int iSXp, iSYp, iEXp, iEYp; /* Previous match */
319
+
320
+ iSXb = iSXp = iS1;
321
+ iEXb = iEXp = iS1;
322
+ iSYb = iSYp = iS2;
323
+ iEYb = iEYp = iS2;
319324
mid = (iE1 + iS1)/2;
320325
for(i=iS1; i<iE1; i++){
321326
int limit = 0;
322327
j = p->aTo[p->aFrom[i].h % p->nTo].iHash;
323328
while( j>0
@@ -328,10 +333,13 @@
328333
break;
329334
}
330335
j = p->aTo[j-1].iNext;
331336
}
332337
if( j==0 ) continue;
338
+ assert( i>=iSXb && i>=iSXp );
339
+ if( i<iEXb && j>=iSYb && j<iEYb ) continue;
340
+ if( i<iEXp && j>=iSYp && j<iEYp ) continue;
333341
iSX = i;
334342
iSY = j-1;
335343
while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){
336344
iSX--;
337345
iSY--;
@@ -347,16 +355,25 @@
347355
dist = (iSX+iEX)/2 - mid;
348356
if( dist<0 ) dist = -dist;
349357
score = (iEX - iSX) - 0.05*skew - 0.05*dist;
350358
if( score>bestScore ){
351359
bestScore = score;
352
- *piSX = iSX;
353
- *piSY = iSY;
354
- *piEX = iEX;
355
- *piEY = iEY;
360
+ iSXb = iSX;
361
+ iSYb = iSY;
362
+ iEXb = iEX;
363
+ iEYb = iEY;
364
+ }else{
365
+ iSXp = iSX;
366
+ iSYp = iSY;
367
+ iEXp = iEX;
368
+ iEYp = iEY;
356369
}
357370
}
371
+ *piSX = iSXb;
372
+ *piSY = iSYb;
373
+ *piEX = iEXb;
374
+ *piEY = iEYb;
358375
/* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n",
359376
iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */
360377
}
361378
362379
/*
@@ -391,10 +408,45 @@
391408
diff_step(p, iEX, iE1, iEY, iE2);
392409
}else{
393410
appendTriple(p, 0, iE1-iS1, iE2-iS2);
394411
}
395412
}
413
+
414
+/*
415
+** Compute the differences between two files already loaded into
416
+** the DContext structure.
417
+*/
418
+static void diff_all(DContext *p){
419
+ int mnE, iS, iE1, iE2;
420
+
421
+ /* Carve off the common header and footer */
422
+ iE1 = p->nFrom;
423
+ iE2 = p->nTo;
424
+ while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){
425
+ iE1--;
426
+ iE2--;
427
+ }
428
+ mnE = iE1<iE2 ? iE1 : iE2;
429
+ for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){}
430
+
431
+ /* do the difference */
432
+ if( iS>0 ){
433
+ appendTriple(p, iS, 0, 0);
434
+ }
435
+ diff_step(p, iS, iE1, iS, iE2);
436
+ if( iE1<p->nFrom ){
437
+ appendTriple(p, p->nFrom - iE1, 0, 0);
438
+ }
439
+
440
+ /* Terminate the COPY/DELETE/INSERT triples with three zeros */
441
+ expandEdit(p, p->nEdit+3);
442
+ if( p->aEdit ){
443
+ p->aEdit[p->nEdit++] = 0;
444
+ p->aEdit[p->nEdit++] = 0;
445
+ p->aEdit[p->nEdit++] = 0;
446
+ }
447
+}
396448
397449
/*
398450
** Generate a report of the differences between files pA and pB.
399451
** If pOut is not NULL then a unified diff is appended there. It
400452
** is assumed that pOut has already been initialized. If pOut is
@@ -413,12 +465,12 @@
413465
Blob *pB_Blob, /* TO file */
414466
Blob *pOut, /* Write unified diff here if not NULL */
415467
int nContext /* Amount of context to unified diff */
416468
){
417469
DContext c;
418
- int mnE, iS, iE1, iE2;
419
-
470
+
471
+ /* Prepare the input files */
420472
memset(&c, 0, sizeof(c));
421473
c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom);
422474
c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo);
423475
if( c.aFrom==0 || c.aTo==0 ){
424476
free(c.aFrom);
@@ -427,35 +479,12 @@
427479
blob_appendf(pOut, "cannot compute difference between binary files\n");
428480
}
429481
return 0;
430482
}
431483
432
- /* Carve off the common header and footer */
433
- iE1 = c.nFrom;
434
- iE2 = c.nTo;
435
- while( iE1>0 && iE2>0 && same_dline(&c.aFrom[iE1-1], &c.aTo[iE2-1]) ){
436
- iE1--;
437
- iE2--;
438
- }
439
- mnE = iE1<iE2 ? iE1 : iE2;
440
- for(iS=0; iS<mnE && same_dline(&c.aFrom[iS],&c.aTo[iS]); iS++){}
441
-
442
- /* do the difference */
443
- if( iS>0 ){
444
- appendTriple(&c, iS, 0, 0);
445
- }
446
- diff_step(&c, iS, iE1, iS, iE2);
447
- if( iE1<c.nFrom ){
448
- appendTriple(&c, c.nFrom - iE1, 0, 0);
449
- }
450
-
451
- expandEdit(&c, c.nEdit+3);
452
- if( c.aEdit ){
453
- c.aEdit[c.nEdit++] = 0;
454
- c.aEdit[c.nEdit++] = 0;
455
- c.aEdit[c.nEdit++] = 0;
456
- }
484
+ /* Compute the difference */
485
+ diff_all(&c);
457486
458487
if( pOut ){
459488
/* Compute a context diff if requested */
460489
contextDiff(&c, pOut, nContext);
461490
free(c.aFrom);
@@ -462,12 +491,11 @@
462491
free(c.aTo);
463492
free(c.aEdit);
464493
return 0;
465494
}else{
466495
/* If a context diff is not requested, then return the
467
- ** array of COPY/DELETE/INSERT triples after terminating the
468
- ** array with a triple of all zeros.
496
+ ** array of COPY/DELETE/INSERT triples.
469497
*/
470498
free(c.aFrom);
471499
free(c.aTo);
472500
return c.aEdit;
473501
}
474502
--- src/diff.c
+++ src/diff.c
@@ -305,19 +305,24 @@
305 int iS1, int iE1,
306 int iS2, int iE2,
307 int *piSX, int *piEX,
308 int *piSY, int *piEY
309 ){
310 int bestScore = -1000000000;
311 int i, j;
312 int iSX, iSY, iEX, iEY;
313 int score, skew, dist, mid;
314
315 *piSX = iS1;
316 *piEX = iS1;
317 *piSY = iS2;
318 *piEY = iS2;
 
 
 
 
 
319 mid = (iE1 + iS1)/2;
320 for(i=iS1; i<iE1; i++){
321 int limit = 0;
322 j = p->aTo[p->aFrom[i].h % p->nTo].iHash;
323 while( j>0
@@ -328,10 +333,13 @@
328 break;
329 }
330 j = p->aTo[j-1].iNext;
331 }
332 if( j==0 ) continue;
 
 
 
333 iSX = i;
334 iSY = j-1;
335 while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){
336 iSX--;
337 iSY--;
@@ -347,16 +355,25 @@
347 dist = (iSX+iEX)/2 - mid;
348 if( dist<0 ) dist = -dist;
349 score = (iEX - iSX) - 0.05*skew - 0.05*dist;
350 if( score>bestScore ){
351 bestScore = score;
352 *piSX = iSX;
353 *piSY = iSY;
354 *piEX = iEX;
355 *piEY = iEY;
 
 
 
 
 
356 }
357 }
 
 
 
 
358 /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n",
359 iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */
360 }
361
362 /*
@@ -391,10 +408,45 @@
391 diff_step(p, iEX, iE1, iEY, iE2);
392 }else{
393 appendTriple(p, 0, iE1-iS1, iE2-iS2);
394 }
395 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
397 /*
398 ** Generate a report of the differences between files pA and pB.
399 ** If pOut is not NULL then a unified diff is appended there. It
400 ** is assumed that pOut has already been initialized. If pOut is
@@ -413,12 +465,12 @@
413 Blob *pB_Blob, /* TO file */
414 Blob *pOut, /* Write unified diff here if not NULL */
415 int nContext /* Amount of context to unified diff */
416 ){
417 DContext c;
418 int mnE, iS, iE1, iE2;
419
420 memset(&c, 0, sizeof(c));
421 c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom);
422 c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo);
423 if( c.aFrom==0 || c.aTo==0 ){
424 free(c.aFrom);
@@ -427,35 +479,12 @@
427 blob_appendf(pOut, "cannot compute difference between binary files\n");
428 }
429 return 0;
430 }
431
432 /* Carve off the common header and footer */
433 iE1 = c.nFrom;
434 iE2 = c.nTo;
435 while( iE1>0 && iE2>0 && same_dline(&c.aFrom[iE1-1], &c.aTo[iE2-1]) ){
436 iE1--;
437 iE2--;
438 }
439 mnE = iE1<iE2 ? iE1 : iE2;
440 for(iS=0; iS<mnE && same_dline(&c.aFrom[iS],&c.aTo[iS]); iS++){}
441
442 /* do the difference */
443 if( iS>0 ){
444 appendTriple(&c, iS, 0, 0);
445 }
446 diff_step(&c, iS, iE1, iS, iE2);
447 if( iE1<c.nFrom ){
448 appendTriple(&c, c.nFrom - iE1, 0, 0);
449 }
450
451 expandEdit(&c, c.nEdit+3);
452 if( c.aEdit ){
453 c.aEdit[c.nEdit++] = 0;
454 c.aEdit[c.nEdit++] = 0;
455 c.aEdit[c.nEdit++] = 0;
456 }
457
458 if( pOut ){
459 /* Compute a context diff if requested */
460 contextDiff(&c, pOut, nContext);
461 free(c.aFrom);
@@ -462,12 +491,11 @@
462 free(c.aTo);
463 free(c.aEdit);
464 return 0;
465 }else{
466 /* If a context diff is not requested, then return the
467 ** array of COPY/DELETE/INSERT triples after terminating the
468 ** array with a triple of all zeros.
469 */
470 free(c.aFrom);
471 free(c.aTo);
472 return c.aEdit;
473 }
474
--- src/diff.c
+++ src/diff.c
@@ -305,19 +305,24 @@
305 int iS1, int iE1,
306 int iS2, int iE2,
307 int *piSX, int *piEX,
308 int *piSY, int *piEY
309 ){
310 double bestScore = -1e30; /* Best score seen so far */
311 int i, j; /* Loop counters */
312 int iSX, iSY, iEX, iEY; /* Current match */
313 double score; /* Current score */
314 int skew; /* How lopsided is the match */
315 int dist; /* Distance of match from center */
316 int mid; /* Center of the span */
317 int iSXb, iSYb, iEXb, iEYb; /* Best match so far */
318 int iSXp, iSYp, iEXp, iEYp; /* Previous match */
319
320 iSXb = iSXp = iS1;
321 iEXb = iEXp = iS1;
322 iSYb = iSYp = iS2;
323 iEYb = iEYp = iS2;
324 mid = (iE1 + iS1)/2;
325 for(i=iS1; i<iE1; i++){
326 int limit = 0;
327 j = p->aTo[p->aFrom[i].h % p->nTo].iHash;
328 while( j>0
@@ -328,10 +333,13 @@
333 break;
334 }
335 j = p->aTo[j-1].iNext;
336 }
337 if( j==0 ) continue;
338 assert( i>=iSXb && i>=iSXp );
339 if( i<iEXb && j>=iSYb && j<iEYb ) continue;
340 if( i<iEXp && j>=iSYp && j<iEYp ) continue;
341 iSX = i;
342 iSY = j-1;
343 while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){
344 iSX--;
345 iSY--;
@@ -347,16 +355,25 @@
355 dist = (iSX+iEX)/2 - mid;
356 if( dist<0 ) dist = -dist;
357 score = (iEX - iSX) - 0.05*skew - 0.05*dist;
358 if( score>bestScore ){
359 bestScore = score;
360 iSXb = iSX;
361 iSYb = iSY;
362 iEXb = iEX;
363 iEYb = iEY;
364 }else{
365 iSXp = iSX;
366 iSYp = iSY;
367 iEXp = iEX;
368 iEYp = iEY;
369 }
370 }
371 *piSX = iSXb;
372 *piSY = iSYb;
373 *piEX = iEXb;
374 *piEY = iEYb;
375 /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n",
376 iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */
377 }
378
379 /*
@@ -391,10 +408,45 @@
408 diff_step(p, iEX, iE1, iEY, iE2);
409 }else{
410 appendTriple(p, 0, iE1-iS1, iE2-iS2);
411 }
412 }
413
414 /*
415 ** Compute the differences between two files already loaded into
416 ** the DContext structure.
417 */
418 static void diff_all(DContext *p){
419 int mnE, iS, iE1, iE2;
420
421 /* Carve off the common header and footer */
422 iE1 = p->nFrom;
423 iE2 = p->nTo;
424 while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){
425 iE1--;
426 iE2--;
427 }
428 mnE = iE1<iE2 ? iE1 : iE2;
429 for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){}
430
431 /* do the difference */
432 if( iS>0 ){
433 appendTriple(p, iS, 0, 0);
434 }
435 diff_step(p, iS, iE1, iS, iE2);
436 if( iE1<p->nFrom ){
437 appendTriple(p, p->nFrom - iE1, 0, 0);
438 }
439
440 /* Terminate the COPY/DELETE/INSERT triples with three zeros */
441 expandEdit(p, p->nEdit+3);
442 if( p->aEdit ){
443 p->aEdit[p->nEdit++] = 0;
444 p->aEdit[p->nEdit++] = 0;
445 p->aEdit[p->nEdit++] = 0;
446 }
447 }
448
449 /*
450 ** Generate a report of the differences between files pA and pB.
451 ** If pOut is not NULL then a unified diff is appended there. It
452 ** is assumed that pOut has already been initialized. If pOut is
@@ -413,12 +465,12 @@
465 Blob *pB_Blob, /* TO file */
466 Blob *pOut, /* Write unified diff here if not NULL */
467 int nContext /* Amount of context to unified diff */
468 ){
469 DContext c;
470
471 /* Prepare the input files */
472 memset(&c, 0, sizeof(c));
473 c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom);
474 c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo);
475 if( c.aFrom==0 || c.aTo==0 ){
476 free(c.aFrom);
@@ -427,35 +479,12 @@
479 blob_appendf(pOut, "cannot compute difference between binary files\n");
480 }
481 return 0;
482 }
483
484 /* Compute the difference */
485 diff_all(&c);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
487 if( pOut ){
488 /* Compute a context diff if requested */
489 contextDiff(&c, pOut, nContext);
490 free(c.aFrom);
@@ -462,12 +491,11 @@
491 free(c.aTo);
492 free(c.aEdit);
493 return 0;
494 }else{
495 /* If a context diff is not requested, then return the
496 ** array of COPY/DELETE/INSERT triples.
 
497 */
498 free(c.aFrom);
499 free(c.aTo);
500 return c.aEdit;
501 }
502

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button