Fossil SCM
Tweaks to the diff algorithm give a 4x performance increase. Now comparable to command-line diff.
Commit
e8cf0061cc47c87521f9401c42c52fd40e664f25
Parent
97ff24dec72a290…
1 file changed
+70
-42
+70
-42
| --- src/diff.c | ||
| +++ src/diff.c | ||
| @@ -305,19 +305,24 @@ | ||
| 305 | 305 | int iS1, int iE1, |
| 306 | 306 | int iS2, int iE2, |
| 307 | 307 | int *piSX, int *piEX, |
| 308 | 308 | int *piSY, int *piEY |
| 309 | 309 | ){ |
| 310 | - int bestScore = -1000000000; | |
| 311 | - int i, j; | |
| 312 | - int iSX, iSY, iEX, iEY; | |
| 313 | - int score, skew, dist, mid; | |
| 314 | - | |
| 315 | - *piSX = iS1; | |
| 316 | - *piEX = iS1; | |
| 317 | - *piSY = iS2; | |
| 318 | - *piEY = iS2; | |
| 310 | + double bestScore = -1e30; /* Best score seen so far */ | |
| 311 | + int i, j; /* Loop counters */ | |
| 312 | + int iSX, iSY, iEX, iEY; /* Current match */ | |
| 313 | + double score; /* Current score */ | |
| 314 | + int skew; /* How lopsided is the match */ | |
| 315 | + int dist; /* Distance of match from center */ | |
| 316 | + int mid; /* Center of the span */ | |
| 317 | + int iSXb, iSYb, iEXb, iEYb; /* Best match so far */ | |
| 318 | + int iSXp, iSYp, iEXp, iEYp; /* Previous match */ | |
| 319 | + | |
| 320 | + iSXb = iSXp = iS1; | |
| 321 | + iEXb = iEXp = iS1; | |
| 322 | + iSYb = iSYp = iS2; | |
| 323 | + iEYb = iEYp = iS2; | |
| 319 | 324 | mid = (iE1 + iS1)/2; |
| 320 | 325 | for(i=iS1; i<iE1; i++){ |
| 321 | 326 | int limit = 0; |
| 322 | 327 | j = p->aTo[p->aFrom[i].h % p->nTo].iHash; |
| 323 | 328 | while( j>0 |
| @@ -328,10 +333,13 @@ | ||
| 328 | 333 | break; |
| 329 | 334 | } |
| 330 | 335 | j = p->aTo[j-1].iNext; |
| 331 | 336 | } |
| 332 | 337 | if( j==0 ) continue; |
| 338 | + assert( i>=iSXb && i>=iSXp ); | |
| 339 | + if( i<iEXb && j>=iSYb && j<iEYb ) continue; | |
| 340 | + if( i<iEXp && j>=iSYp && j<iEYp ) continue; | |
| 333 | 341 | iSX = i; |
| 334 | 342 | iSY = j-1; |
| 335 | 343 | while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){ |
| 336 | 344 | iSX--; |
| 337 | 345 | iSY--; |
| @@ -347,16 +355,25 @@ | ||
| 347 | 355 | dist = (iSX+iEX)/2 - mid; |
| 348 | 356 | if( dist<0 ) dist = -dist; |
| 349 | 357 | score = (iEX - iSX) - 0.05*skew - 0.05*dist; |
| 350 | 358 | if( score>bestScore ){ |
| 351 | 359 | bestScore = score; |
| 352 | - *piSX = iSX; | |
| 353 | - *piSY = iSY; | |
| 354 | - *piEX = iEX; | |
| 355 | - *piEY = iEY; | |
| 360 | + iSXb = iSX; | |
| 361 | + iSYb = iSY; | |
| 362 | + iEXb = iEX; | |
| 363 | + iEYb = iEY; | |
| 364 | + }else{ | |
| 365 | + iSXp = iSX; | |
| 366 | + iSYp = iSY; | |
| 367 | + iEXp = iEX; | |
| 368 | + iEYp = iEY; | |
| 356 | 369 | } |
| 357 | 370 | } |
| 371 | + *piSX = iSXb; | |
| 372 | + *piSY = iSYb; | |
| 373 | + *piEX = iEXb; | |
| 374 | + *piEY = iEYb; | |
| 358 | 375 | /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n", |
| 359 | 376 | iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */ |
| 360 | 377 | } |
| 361 | 378 | |
| 362 | 379 | /* |
| @@ -391,10 +408,45 @@ | ||
| 391 | 408 | diff_step(p, iEX, iE1, iEY, iE2); |
| 392 | 409 | }else{ |
| 393 | 410 | appendTriple(p, 0, iE1-iS1, iE2-iS2); |
| 394 | 411 | } |
| 395 | 412 | } |
| 413 | + | |
| 414 | +/* | |
| 415 | +** Compute the differences between two files already loaded into | |
| 416 | +** the DContext structure. | |
| 417 | +*/ | |
| 418 | +static void diff_all(DContext *p){ | |
| 419 | + int mnE, iS, iE1, iE2; | |
| 420 | + | |
| 421 | + /* Carve off the common header and footer */ | |
| 422 | + iE1 = p->nFrom; | |
| 423 | + iE2 = p->nTo; | |
| 424 | + while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){ | |
| 425 | + iE1--; | |
| 426 | + iE2--; | |
| 427 | + } | |
| 428 | + mnE = iE1<iE2 ? iE1 : iE2; | |
| 429 | + for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){} | |
| 430 | + | |
| 431 | + /* do the difference */ | |
| 432 | + if( iS>0 ){ | |
| 433 | + appendTriple(p, iS, 0, 0); | |
| 434 | + } | |
| 435 | + diff_step(p, iS, iE1, iS, iE2); | |
| 436 | + if( iE1<p->nFrom ){ | |
| 437 | + appendTriple(p, p->nFrom - iE1, 0, 0); | |
| 438 | + } | |
| 439 | + | |
| 440 | + /* Terminate the COPY/DELETE/INSERT triples with three zeros */ | |
| 441 | + expandEdit(p, p->nEdit+3); | |
| 442 | + if( p->aEdit ){ | |
| 443 | + p->aEdit[p->nEdit++] = 0; | |
| 444 | + p->aEdit[p->nEdit++] = 0; | |
| 445 | + p->aEdit[p->nEdit++] = 0; | |
| 446 | + } | |
| 447 | +} | |
| 396 | 448 | |
| 397 | 449 | /* |
| 398 | 450 | ** Generate a report of the differences between files pA and pB. |
| 399 | 451 | ** If pOut is not NULL then a unified diff is appended there. It |
| 400 | 452 | ** is assumed that pOut has already been initialized. If pOut is |
| @@ -413,12 +465,12 @@ | ||
| 413 | 465 | Blob *pB_Blob, /* TO file */ |
| 414 | 466 | Blob *pOut, /* Write unified diff here if not NULL */ |
| 415 | 467 | int nContext /* Amount of context to unified diff */ |
| 416 | 468 | ){ |
| 417 | 469 | DContext c; |
| 418 | - int mnE, iS, iE1, iE2; | |
| 419 | - | |
| 470 | + | |
| 471 | + /* Prepare the input files */ | |
| 420 | 472 | memset(&c, 0, sizeof(c)); |
| 421 | 473 | c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom); |
| 422 | 474 | c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo); |
| 423 | 475 | if( c.aFrom==0 || c.aTo==0 ){ |
| 424 | 476 | free(c.aFrom); |
| @@ -427,35 +479,12 @@ | ||
| 427 | 479 | blob_appendf(pOut, "cannot compute difference between binary files\n"); |
| 428 | 480 | } |
| 429 | 481 | return 0; |
| 430 | 482 | } |
| 431 | 483 | |
| 432 | - /* Carve off the common header and footer */ | |
| 433 | - iE1 = c.nFrom; | |
| 434 | - iE2 = c.nTo; | |
| 435 | - while( iE1>0 && iE2>0 && same_dline(&c.aFrom[iE1-1], &c.aTo[iE2-1]) ){ | |
| 436 | - iE1--; | |
| 437 | - iE2--; | |
| 438 | - } | |
| 439 | - mnE = iE1<iE2 ? iE1 : iE2; | |
| 440 | - for(iS=0; iS<mnE && same_dline(&c.aFrom[iS],&c.aTo[iS]); iS++){} | |
| 441 | - | |
| 442 | - /* do the difference */ | |
| 443 | - if( iS>0 ){ | |
| 444 | - appendTriple(&c, iS, 0, 0); | |
| 445 | - } | |
| 446 | - diff_step(&c, iS, iE1, iS, iE2); | |
| 447 | - if( iE1<c.nFrom ){ | |
| 448 | - appendTriple(&c, c.nFrom - iE1, 0, 0); | |
| 449 | - } | |
| 450 | - | |
| 451 | - expandEdit(&c, c.nEdit+3); | |
| 452 | - if( c.aEdit ){ | |
| 453 | - c.aEdit[c.nEdit++] = 0; | |
| 454 | - c.aEdit[c.nEdit++] = 0; | |
| 455 | - c.aEdit[c.nEdit++] = 0; | |
| 456 | - } | |
| 484 | + /* Compute the difference */ | |
| 485 | + diff_all(&c); | |
| 457 | 486 | |
| 458 | 487 | if( pOut ){ |
| 459 | 488 | /* Compute a context diff if requested */ |
| 460 | 489 | contextDiff(&c, pOut, nContext); |
| 461 | 490 | free(c.aFrom); |
| @@ -462,12 +491,11 @@ | ||
| 462 | 491 | free(c.aTo); |
| 463 | 492 | free(c.aEdit); |
| 464 | 493 | return 0; |
| 465 | 494 | }else{ |
| 466 | 495 | /* If a context diff is not requested, then return the |
| 467 | - ** array of COPY/DELETE/INSERT triples after terminating the | |
| 468 | - ** array with a triple of all zeros. | |
| 496 | + ** array of COPY/DELETE/INSERT triples. | |
| 469 | 497 | */ |
| 470 | 498 | free(c.aFrom); |
| 471 | 499 | free(c.aTo); |
| 472 | 500 | return c.aEdit; |
| 473 | 501 | } |
| 474 | 502 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -305,19 +305,24 @@ | |
| 305 | int iS1, int iE1, |
| 306 | int iS2, int iE2, |
| 307 | int *piSX, int *piEX, |
| 308 | int *piSY, int *piEY |
| 309 | ){ |
| 310 | int bestScore = -1000000000; |
| 311 | int i, j; |
| 312 | int iSX, iSY, iEX, iEY; |
| 313 | int score, skew, dist, mid; |
| 314 | |
| 315 | *piSX = iS1; |
| 316 | *piEX = iS1; |
| 317 | *piSY = iS2; |
| 318 | *piEY = iS2; |
| 319 | mid = (iE1 + iS1)/2; |
| 320 | for(i=iS1; i<iE1; i++){ |
| 321 | int limit = 0; |
| 322 | j = p->aTo[p->aFrom[i].h % p->nTo].iHash; |
| 323 | while( j>0 |
| @@ -328,10 +333,13 @@ | |
| 328 | break; |
| 329 | } |
| 330 | j = p->aTo[j-1].iNext; |
| 331 | } |
| 332 | if( j==0 ) continue; |
| 333 | iSX = i; |
| 334 | iSY = j-1; |
| 335 | while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){ |
| 336 | iSX--; |
| 337 | iSY--; |
| @@ -347,16 +355,25 @@ | |
| 347 | dist = (iSX+iEX)/2 - mid; |
| 348 | if( dist<0 ) dist = -dist; |
| 349 | score = (iEX - iSX) - 0.05*skew - 0.05*dist; |
| 350 | if( score>bestScore ){ |
| 351 | bestScore = score; |
| 352 | *piSX = iSX; |
| 353 | *piSY = iSY; |
| 354 | *piEX = iEX; |
| 355 | *piEY = iEY; |
| 356 | } |
| 357 | } |
| 358 | /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n", |
| 359 | iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */ |
| 360 | } |
| 361 | |
| 362 | /* |
| @@ -391,10 +408,45 @@ | |
| 391 | diff_step(p, iEX, iE1, iEY, iE2); |
| 392 | }else{ |
| 393 | appendTriple(p, 0, iE1-iS1, iE2-iS2); |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | /* |
| 398 | ** Generate a report of the differences between files pA and pB. |
| 399 | ** If pOut is not NULL then a unified diff is appended there. It |
| 400 | ** is assumed that pOut has already been initialized. If pOut is |
| @@ -413,12 +465,12 @@ | |
| 413 | Blob *pB_Blob, /* TO file */ |
| 414 | Blob *pOut, /* Write unified diff here if not NULL */ |
| 415 | int nContext /* Amount of context to unified diff */ |
| 416 | ){ |
| 417 | DContext c; |
| 418 | int mnE, iS, iE1, iE2; |
| 419 | |
| 420 | memset(&c, 0, sizeof(c)); |
| 421 | c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom); |
| 422 | c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo); |
| 423 | if( c.aFrom==0 || c.aTo==0 ){ |
| 424 | free(c.aFrom); |
| @@ -427,35 +479,12 @@ | |
| 427 | blob_appendf(pOut, "cannot compute difference between binary files\n"); |
| 428 | } |
| 429 | return 0; |
| 430 | } |
| 431 | |
| 432 | /* Carve off the common header and footer */ |
| 433 | iE1 = c.nFrom; |
| 434 | iE2 = c.nTo; |
| 435 | while( iE1>0 && iE2>0 && same_dline(&c.aFrom[iE1-1], &c.aTo[iE2-1]) ){ |
| 436 | iE1--; |
| 437 | iE2--; |
| 438 | } |
| 439 | mnE = iE1<iE2 ? iE1 : iE2; |
| 440 | for(iS=0; iS<mnE && same_dline(&c.aFrom[iS],&c.aTo[iS]); iS++){} |
| 441 | |
| 442 | /* do the difference */ |
| 443 | if( iS>0 ){ |
| 444 | appendTriple(&c, iS, 0, 0); |
| 445 | } |
| 446 | diff_step(&c, iS, iE1, iS, iE2); |
| 447 | if( iE1<c.nFrom ){ |
| 448 | appendTriple(&c, c.nFrom - iE1, 0, 0); |
| 449 | } |
| 450 | |
| 451 | expandEdit(&c, c.nEdit+3); |
| 452 | if( c.aEdit ){ |
| 453 | c.aEdit[c.nEdit++] = 0; |
| 454 | c.aEdit[c.nEdit++] = 0; |
| 455 | c.aEdit[c.nEdit++] = 0; |
| 456 | } |
| 457 | |
| 458 | if( pOut ){ |
| 459 | /* Compute a context diff if requested */ |
| 460 | contextDiff(&c, pOut, nContext); |
| 461 | free(c.aFrom); |
| @@ -462,12 +491,11 @@ | |
| 462 | free(c.aTo); |
| 463 | free(c.aEdit); |
| 464 | return 0; |
| 465 | }else{ |
| 466 | /* If a context diff is not requested, then return the |
| 467 | ** array of COPY/DELETE/INSERT triples after terminating the |
| 468 | ** array with a triple of all zeros. |
| 469 | */ |
| 470 | free(c.aFrom); |
| 471 | free(c.aTo); |
| 472 | return c.aEdit; |
| 473 | } |
| 474 |
| --- src/diff.c | |
| +++ src/diff.c | |
| @@ -305,19 +305,24 @@ | |
| 305 | int iS1, int iE1, |
| 306 | int iS2, int iE2, |
| 307 | int *piSX, int *piEX, |
| 308 | int *piSY, int *piEY |
| 309 | ){ |
| 310 | double bestScore = -1e30; /* Best score seen so far */ |
| 311 | int i, j; /* Loop counters */ |
| 312 | int iSX, iSY, iEX, iEY; /* Current match */ |
| 313 | double score; /* Current score */ |
| 314 | int skew; /* How lopsided is the match */ |
| 315 | int dist; /* Distance of match from center */ |
| 316 | int mid; /* Center of the span */ |
| 317 | int iSXb, iSYb, iEXb, iEYb; /* Best match so far */ |
| 318 | int iSXp, iSYp, iEXp, iEYp; /* Previous match */ |
| 319 | |
| 320 | iSXb = iSXp = iS1; |
| 321 | iEXb = iEXp = iS1; |
| 322 | iSYb = iSYp = iS2; |
| 323 | iEYb = iEYp = iS2; |
| 324 | mid = (iE1 + iS1)/2; |
| 325 | for(i=iS1; i<iE1; i++){ |
| 326 | int limit = 0; |
| 327 | j = p->aTo[p->aFrom[i].h % p->nTo].iHash; |
| 328 | while( j>0 |
| @@ -328,10 +333,13 @@ | |
| 333 | break; |
| 334 | } |
| 335 | j = p->aTo[j-1].iNext; |
| 336 | } |
| 337 | if( j==0 ) continue; |
| 338 | assert( i>=iSXb && i>=iSXp ); |
| 339 | if( i<iEXb && j>=iSYb && j<iEYb ) continue; |
| 340 | if( i<iEXp && j>=iSYp && j<iEYp ) continue; |
| 341 | iSX = i; |
| 342 | iSY = j-1; |
| 343 | while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){ |
| 344 | iSX--; |
| 345 | iSY--; |
| @@ -347,16 +355,25 @@ | |
| 355 | dist = (iSX+iEX)/2 - mid; |
| 356 | if( dist<0 ) dist = -dist; |
| 357 | score = (iEX - iSX) - 0.05*skew - 0.05*dist; |
| 358 | if( score>bestScore ){ |
| 359 | bestScore = score; |
| 360 | iSXb = iSX; |
| 361 | iSYb = iSY; |
| 362 | iEXb = iEX; |
| 363 | iEYb = iEY; |
| 364 | }else{ |
| 365 | iSXp = iSX; |
| 366 | iSYp = iSY; |
| 367 | iEXp = iEX; |
| 368 | iEYp = iEY; |
| 369 | } |
| 370 | } |
| 371 | *piSX = iSXb; |
| 372 | *piSY = iSYb; |
| 373 | *piEX = iEXb; |
| 374 | *piEY = iEYb; |
| 375 | /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n", |
| 376 | iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */ |
| 377 | } |
| 378 | |
| 379 | /* |
| @@ -391,10 +408,45 @@ | |
| 408 | diff_step(p, iEX, iE1, iEY, iE2); |
| 409 | }else{ |
| 410 | appendTriple(p, 0, iE1-iS1, iE2-iS2); |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | /* |
| 415 | ** Compute the differences between two files already loaded into |
| 416 | ** the DContext structure. |
| 417 | */ |
| 418 | static void diff_all(DContext *p){ |
| 419 | int mnE, iS, iE1, iE2; |
| 420 | |
| 421 | /* Carve off the common header and footer */ |
| 422 | iE1 = p->nFrom; |
| 423 | iE2 = p->nTo; |
| 424 | while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){ |
| 425 | iE1--; |
| 426 | iE2--; |
| 427 | } |
| 428 | mnE = iE1<iE2 ? iE1 : iE2; |
| 429 | for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){} |
| 430 | |
| 431 | /* do the difference */ |
| 432 | if( iS>0 ){ |
| 433 | appendTriple(p, iS, 0, 0); |
| 434 | } |
| 435 | diff_step(p, iS, iE1, iS, iE2); |
| 436 | if( iE1<p->nFrom ){ |
| 437 | appendTriple(p, p->nFrom - iE1, 0, 0); |
| 438 | } |
| 439 | |
| 440 | /* Terminate the COPY/DELETE/INSERT triples with three zeros */ |
| 441 | expandEdit(p, p->nEdit+3); |
| 442 | if( p->aEdit ){ |
| 443 | p->aEdit[p->nEdit++] = 0; |
| 444 | p->aEdit[p->nEdit++] = 0; |
| 445 | p->aEdit[p->nEdit++] = 0; |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | /* |
| 450 | ** Generate a report of the differences between files pA and pB. |
| 451 | ** If pOut is not NULL then a unified diff is appended there. It |
| 452 | ** is assumed that pOut has already been initialized. If pOut is |
| @@ -413,12 +465,12 @@ | |
| 465 | Blob *pB_Blob, /* TO file */ |
| 466 | Blob *pOut, /* Write unified diff here if not NULL */ |
| 467 | int nContext /* Amount of context to unified diff */ |
| 468 | ){ |
| 469 | DContext c; |
| 470 | |
| 471 | /* Prepare the input files */ |
| 472 | memset(&c, 0, sizeof(c)); |
| 473 | c.aFrom = break_into_lines(blob_str(pA_Blob), &c.nFrom); |
| 474 | c.aTo = break_into_lines(blob_str(pB_Blob), &c.nTo); |
| 475 | if( c.aFrom==0 || c.aTo==0 ){ |
| 476 | free(c.aFrom); |
| @@ -427,35 +479,12 @@ | |
| 479 | blob_appendf(pOut, "cannot compute difference between binary files\n"); |
| 480 | } |
| 481 | return 0; |
| 482 | } |
| 483 | |
| 484 | /* Compute the difference */ |
| 485 | diff_all(&c); |
| 486 | |
| 487 | if( pOut ){ |
| 488 | /* Compute a context diff if requested */ |
| 489 | contextDiff(&c, pOut, nContext); |
| 490 | free(c.aFrom); |
| @@ -462,12 +491,11 @@ | |
| 491 | free(c.aTo); |
| 492 | free(c.aEdit); |
| 493 | return 0; |
| 494 | }else{ |
| 495 | /* If a context diff is not requested, then return the |
| 496 | ** array of COPY/DELETE/INSERT triples. |
| 497 | */ |
| 498 | free(c.aFrom); |
| 499 | free(c.aTo); |
| 500 | return c.aEdit; |
| 501 | } |
| 502 |