Fossil SCM
Improvements to the merge algorithm so that it works better for common changes. Still more work needed.
Commit
ac6bb3ce069523ecc7b664ff41a8b5cc3fc7f657
Parent
929d28e358248b1…
1 file changed
+113
-73
+113
-73
| --- src/merge3.c | ||
| +++ src/merge3.c | ||
| @@ -273,10 +273,11 @@ | ||
| 273 | 273 | const char *zOut, /* The target file */ |
| 274 | 274 | int lenOut, /* Length of the target file */ |
| 275 | 275 | Mapping *pMap /* Write the map of similaries here */ |
| 276 | 276 | ){ |
| 277 | 277 | int i, j, base, prefix; |
| 278 | + int changes; | |
| 278 | 279 | hash h; |
| 279 | 280 | int *collide; |
| 280 | 281 | int origLenOut = lenOut; |
| 281 | 282 | struct Mapping_entry *aMap; |
| 282 | 283 | int landmark[MX_LANDMARK]; |
| @@ -420,84 +421,91 @@ | ||
| 420 | 421 | hash_next(&h, zOut[base+i+NHASH]); |
| 421 | 422 | i++; |
| 422 | 423 | } |
| 423 | 424 | } |
| 424 | 425 | free(collide); |
| 426 | +#if 0 | |
| 425 | 427 | DEBUG1( |
| 426 | 428 | printf("after creation:\n"); |
| 427 | 429 | MappingPrint(pMap); |
| 428 | 430 | ) |
| 431 | +#endif | |
| 429 | 432 | |
| 430 | 433 | /* In this step, we will remove overlapping entries from the mapping. |
| 431 | 434 | ** |
| 432 | 435 | ** We use a greedy algorithm. Select the largest mapping first and |
| 433 | 436 | ** remove all overlapping mappings. Then take the next largest |
| 434 | 437 | ** mapping and remove others that overlap with it. Keep going until |
| 435 | 438 | ** all mappings have been processed. |
| 436 | 439 | */ |
| 437 | 440 | MappingSortSize(pMap); |
| 438 | - for(i=0; i<pMap->nUsed; i++){ | |
| 439 | - int sortNeeded = 0; | |
| 440 | - int purgeNeeded = 0; | |
| 441 | - struct Mapping_entry *pA; | |
| 442 | - pA = &pMap->aMap[i]; | |
| 443 | - for(j=i+1; j<pMap->nUsed; j++){ | |
| 444 | - int diff; | |
| 445 | - struct Mapping_entry *pB; | |
| 446 | - pB = &pMap->aMap[j]; | |
| 447 | - if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){ | |
| 448 | - /* No overlap. Do nothing */ | |
| 449 | - }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){ | |
| 450 | - /* B is contained entirely within A. Drop B */ | |
| 451 | - pB->fromFirst = -1; | |
| 452 | - purgeNeeded = 1; | |
| 453 | - continue; | |
| 454 | - }else if( pB->fromFirst<pA->fromFirst ){ | |
| 455 | - /* The tail B overlaps the head of A */ | |
| 456 | - assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast ); | |
| 457 | - diff = pB->fromLast + 1 - pA->fromFirst; | |
| 458 | - pB->fromLast -= diff; | |
| 459 | - pB->toLast -= diff; | |
| 460 | - sortNeeded = 1; | |
| 461 | - }else{ | |
| 462 | - /* The head of B overlaps the tail of A */ | |
| 463 | - assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast ); | |
| 464 | - diff = pA->fromLast + 1 - pB->fromFirst; | |
| 465 | - pB->fromFirst += diff; | |
| 466 | - pB->toFirst += diff; | |
| 467 | - sortNeeded = 1; | |
| 468 | - } | |
| 469 | - if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){ | |
| 470 | - /* No overlap. Do nothing */ | |
| 471 | - }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){ | |
| 472 | - /* B is contained entirely within A. Drop B */ | |
| 473 | - pB->fromFirst = -1; | |
| 474 | - purgeNeeded = 1; | |
| 475 | - }else if( pB->toFirst<pA->toFirst ){ | |
| 476 | - /* The tail of B overlaps the head of A */ | |
| 477 | - assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast ); | |
| 478 | - diff = pB->toLast + 1 - pA->toFirst; | |
| 479 | - pB->fromLast -= diff; | |
| 480 | - pB->toLast -= diff; | |
| 481 | - sortNeeded = 1; | |
| 482 | - }else{ | |
| 483 | - /* The head of B overlaps the tail of A */ | |
| 484 | - assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast ); | |
| 485 | - diff = pA->toLast + 1 - pB->toFirst; | |
| 486 | - pB->fromFirst += diff; | |
| 487 | - pB->toFirst += diff; | |
| 488 | - sortNeeded = 1; | |
| 489 | - } | |
| 490 | - } | |
| 491 | - if( purgeNeeded ){ | |
| 492 | - MappingPurgeDeletedEntries(pMap); | |
| 493 | - } | |
| 494 | - if( sortNeeded && i<pMap->nUsed-2 ){ | |
| 495 | - MappingSortSize(pMap); | |
| 496 | - } | |
| 497 | - } | |
| 498 | - | |
| 441 | + do{ | |
| 442 | + changes = 0; | |
| 443 | + for(i=0; i<pMap->nUsed; i++){ | |
| 444 | + int sortNeeded = 0; | |
| 445 | + int purgeNeeded = 0; | |
| 446 | + struct Mapping_entry *pA; | |
| 447 | + pA = &pMap->aMap[i]; | |
| 448 | + for(j=i+1; j<pMap->nUsed; j++){ | |
| 449 | + int diff; | |
| 450 | + struct Mapping_entry *pB; | |
| 451 | + pB = &pMap->aMap[j]; | |
| 452 | + if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){ | |
| 453 | + /* No overlap. Do nothing */ | |
| 454 | + }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){ | |
| 455 | + /* B is contained entirely within A. Drop B */ | |
| 456 | + pB->fromFirst = -1; | |
| 457 | + purgeNeeded = 1; | |
| 458 | + continue; | |
| 459 | + }else if( pB->fromFirst<pA->fromFirst ){ | |
| 460 | + /* The tail B overlaps the head of A */ | |
| 461 | + assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast ); | |
| 462 | + diff = pB->fromLast + 1 - pA->fromFirst; | |
| 463 | + pB->fromLast -= diff; | |
| 464 | + pB->toLast -= diff; | |
| 465 | + sortNeeded = 1; | |
| 466 | + }else{ | |
| 467 | + /* The head of B overlaps the tail of A */ | |
| 468 | + assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast ); | |
| 469 | + diff = pA->fromLast + 1 - pB->fromFirst; | |
| 470 | + pB->fromFirst += diff; | |
| 471 | + pB->toFirst += diff; | |
| 472 | + sortNeeded = 1; | |
| 473 | + } | |
| 474 | + if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){ | |
| 475 | + /* No overlap. Do nothing */ | |
| 476 | + }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){ | |
| 477 | + /* B is contained entirely within A. Drop B */ | |
| 478 | + pB->fromFirst = -1; | |
| 479 | + purgeNeeded = 1; | |
| 480 | + }else if( pB->toFirst<pA->toFirst ){ | |
| 481 | + /* The tail of B overlaps the head of A */ | |
| 482 | + assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast ); | |
| 483 | + diff = pB->toLast + 1 - pA->toFirst; | |
| 484 | + pB->fromLast -= diff; | |
| 485 | + pB->toLast -= diff; | |
| 486 | + sortNeeded = 1; | |
| 487 | + }else{ | |
| 488 | + /* The head of B overlaps the tail of A */ | |
| 489 | + assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast ); | |
| 490 | + diff = pA->toLast + 1 - pB->toFirst; | |
| 491 | + pB->fromFirst += diff; | |
| 492 | + pB->toFirst += diff; | |
| 493 | + sortNeeded = 1; | |
| 494 | + } | |
| 495 | + } | |
| 496 | + if( purgeNeeded ){ | |
| 497 | + MappingPurgeDeletedEntries(pMap); | |
| 498 | + /* changes++; */ | |
| 499 | + } | |
| 500 | + if( sortNeeded && i<pMap->nUsed-2 ){ | |
| 501 | + MappingSortSize(pMap); | |
| 502 | + /* changes++; */ | |
| 503 | + } | |
| 504 | + } | |
| 505 | + }while( changes ); | |
| 506 | + | |
| 499 | 507 | /* Final step: Arrange the mapping entires so that they are in the |
| 500 | 508 | ** order of the output file. Then fill in the nExtra values. |
| 501 | 509 | */ |
| 502 | 510 | add_nextra: |
| 503 | 511 | MappingSortTo(pMap); |
| @@ -547,36 +555,41 @@ | ||
| 547 | 555 | |
| 548 | 556 | /* |
| 549 | 557 | ** Do a three-way merge. Initialize pOut to contain the result. |
| 550 | 558 | */ |
| 551 | 559 | void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){ |
| 552 | - Mapping map1, map2; | |
| 553 | - int i; | |
| 560 | + Mapping map1, map2, map3; | |
| 561 | + int i, j; | |
| 554 | 562 | const char *zV1, *zV2; |
| 555 | 563 | blob_zero(pOut); |
| 556 | - DEBUG1( printf("map1:\n"); ) | |
| 557 | 564 | MappingInit( |
| 558 | 565 | blob_buffer(pPivot), blob_size(pPivot), |
| 559 | 566 | blob_buffer(pV1), blob_size(pV1), |
| 560 | 567 | &map1); |
| 561 | 568 | MappingSortFrom(&map1); |
| 562 | 569 | DEBUG1( |
| 563 | 570 | printf("map1-final:\n"); |
| 564 | 571 | MappingPrint(&map1); |
| 565 | - printf("map2:\n"); | |
| 566 | 572 | ) |
| 567 | 573 | MappingInit( |
| 568 | 574 | blob_buffer(pPivot), blob_size(pPivot), |
| 569 | 575 | blob_buffer(pV2), blob_size(pV2), |
| 570 | 576 | &map2); |
| 571 | 577 | DEBUG1( |
| 572 | 578 | printf("map2-final:\n"); |
| 573 | 579 | MappingPrint(&map2); |
| 574 | 580 | ) |
| 581 | + MappingInit( | |
| 582 | + blob_buffer(pV1), blob_size(pV1), | |
| 583 | + blob_buffer(pV2), blob_size(pV2), | |
| 584 | + &map3); | |
| 585 | + DEBUG1( | |
| 586 | + printf("map3-final:\n"); | |
| 587 | + MappingPrint(&map3); | |
| 588 | + ) | |
| 575 | 589 | zV1 = blob_buffer(pV1); |
| 576 | 590 | zV2 = blob_buffer(pV2); |
| 577 | - if( map2.nUsed==0 ) return; | |
| 578 | 591 | if( map1.aMap[0].toFirst>0 ){ |
| 579 | 592 | blob_append(pOut, zV1, map1.aMap[0].toFirst); |
| 580 | 593 | DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n", |
| 581 | 594 | map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); ) |
| 582 | 595 | } |
| @@ -583,22 +596,39 @@ | ||
| 583 | 596 | if( map2.aMap[0].toFirst>0 ){ |
| 584 | 597 | blob_append(pOut, zV2, map2.aMap[0].toFirst); |
| 585 | 598 | DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n", |
| 586 | 599 | map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); ) |
| 587 | 600 | } |
| 588 | - for(i=0; i<map2.nUsed; i++){ | |
| 589 | - int iFirst, iLast, nInsert; | |
| 601 | + for(i=j=0; i<map2.nUsed; i++){ | |
| 602 | + int iFirst, iLast, nInsert, iTail; | |
| 590 | 603 | struct Mapping_entry *p = &map2.aMap[i]; |
| 604 | + while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; } | |
| 605 | + DEBUG1( | |
| 606 | + printf("map2: %6d..%-6d %6d..%-6d %d\n", | |
| 607 | + p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra); | |
| 608 | + printf("map3: j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst, | |
| 609 | + map3.aMap[j].toLast); | |
| 610 | + ); | |
| 611 | + iTail = p->toLast + p->nExtra; | |
| 612 | + if( i<map2.nUsed-1 && | |
| 613 | + map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){ | |
| 614 | + blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1); | |
| 615 | + DEBUG1( | |
| 616 | + printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1, | |
| 617 | + p->toFirst, iTail); | |
| 618 | + ) | |
| 619 | + continue; | |
| 620 | + } | |
| 591 | 621 | iFirst = MappingTranslate(&map1, p->fromFirst, 0); |
| 592 | 622 | iLast = MappingTranslate(&map1, p->fromLast, &nInsert); |
| 593 | 623 | blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1); |
| 594 | 624 | DEBUG1( |
| 595 | 625 | printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast); |
| 596 | 626 | ) |
| 597 | 627 | if( p->nExtra>0 ){ |
| 598 | - if( p->nExtra==nInsert | |
| 599 | - && memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){ | |
| 628 | + if( p->nExtra==nInsert && | |
| 629 | + memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){ | |
| 600 | 630 | /* Omit a duplicate insert */ |
| 601 | 631 | DEBUG1( printf("OMIT duplicate insert\n"); ) |
| 602 | 632 | }else{ |
| 603 | 633 | blob_append(pOut, &zV2[p->toLast+1], p->nExtra); |
| 604 | 634 | DEBUG1( |
| @@ -608,10 +638,11 @@ | ||
| 608 | 638 | } |
| 609 | 639 | } |
| 610 | 640 | } |
| 611 | 641 | MappingClear(&map1); |
| 612 | 642 | MappingClear(&map2); |
| 643 | + MappingClear(&map3); | |
| 613 | 644 | } |
| 614 | 645 | |
| 615 | 646 | /* |
| 616 | 647 | ** COMMAND: test-3-way-merge |
| 617 | 648 | ** |
| @@ -651,11 +682,11 @@ | ||
| 651 | 682 | /* |
| 652 | 683 | ** COMMAND: test-mapping |
| 653 | 684 | */ |
| 654 | 685 | void mapping_test(void){ |
| 655 | 686 | int i; |
| 656 | - const char *z; | |
| 687 | + const char *za, *zb; | |
| 657 | 688 | Blob a, b; |
| 658 | 689 | Mapping map; |
| 659 | 690 | if( g.argc!=4 ){ |
| 660 | 691 | usage("FILE1 FILE2"); |
| 661 | 692 | } |
| @@ -663,15 +694,24 @@ | ||
| 663 | 694 | blob_read_from_file(&b, g.argv[3]); |
| 664 | 695 | memset(&map, 0, sizeof(map)); |
| 665 | 696 | MappingInit(blob_buffer(&a), blob_size(&a), |
| 666 | 697 | blob_buffer(&b), blob_size(&b), |
| 667 | 698 | &map); |
| 668 | - z = blob_buffer(&a); | |
| 699 | + DEBUG1( | |
| 700 | + printf("map-final:\n"); | |
| 701 | + MappingPrint(&map); | |
| 702 | + ) | |
| 703 | + za = blob_buffer(&a); | |
| 704 | + zb = blob_buffer(&b); | |
| 669 | 705 | for(i=0; i<map.nUsed; i++){ |
| 670 | 706 | printf("======= %6d..%-6d %6d..%-6d %d\n", |
| 671 | 707 | map.aMap[i].fromFirst, map.aMap[i].fromLast, |
| 672 | 708 | map.aMap[i].toFirst, map.aMap[i].toLast, |
| 673 | 709 | map.aMap[i].nExtra); |
| 674 | 710 | printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1, |
| 675 | - &z[map.aMap[i].fromFirst]); | |
| 711 | + &za[map.aMap[i].fromFirst]); | |
| 712 | + if( map.aMap[i].nExtra ){ | |
| 713 | + printf("======= EXTRA:\n"); | |
| 714 | + printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]); | |
| 715 | + } | |
| 676 | 716 | } |
| 677 | 717 | } |
| 678 | 718 |
| --- src/merge3.c | |
| +++ src/merge3.c | |
| @@ -273,10 +273,11 @@ | |
| 273 | const char *zOut, /* The target file */ |
| 274 | int lenOut, /* Length of the target file */ |
| 275 | Mapping *pMap /* Write the map of similaries here */ |
| 276 | ){ |
| 277 | int i, j, base, prefix; |
| 278 | hash h; |
| 279 | int *collide; |
| 280 | int origLenOut = lenOut; |
| 281 | struct Mapping_entry *aMap; |
| 282 | int landmark[MX_LANDMARK]; |
| @@ -420,84 +421,91 @@ | |
| 420 | hash_next(&h, zOut[base+i+NHASH]); |
| 421 | i++; |
| 422 | } |
| 423 | } |
| 424 | free(collide); |
| 425 | DEBUG1( |
| 426 | printf("after creation:\n"); |
| 427 | MappingPrint(pMap); |
| 428 | ) |
| 429 | |
| 430 | /* In this step, we will remove overlapping entries from the mapping. |
| 431 | ** |
| 432 | ** We use a greedy algorithm. Select the largest mapping first and |
| 433 | ** remove all overlapping mappings. Then take the next largest |
| 434 | ** mapping and remove others that overlap with it. Keep going until |
| 435 | ** all mappings have been processed. |
| 436 | */ |
| 437 | MappingSortSize(pMap); |
| 438 | for(i=0; i<pMap->nUsed; i++){ |
| 439 | int sortNeeded = 0; |
| 440 | int purgeNeeded = 0; |
| 441 | struct Mapping_entry *pA; |
| 442 | pA = &pMap->aMap[i]; |
| 443 | for(j=i+1; j<pMap->nUsed; j++){ |
| 444 | int diff; |
| 445 | struct Mapping_entry *pB; |
| 446 | pB = &pMap->aMap[j]; |
| 447 | if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){ |
| 448 | /* No overlap. Do nothing */ |
| 449 | }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){ |
| 450 | /* B is contained entirely within A. Drop B */ |
| 451 | pB->fromFirst = -1; |
| 452 | purgeNeeded = 1; |
| 453 | continue; |
| 454 | }else if( pB->fromFirst<pA->fromFirst ){ |
| 455 | /* The tail B overlaps the head of A */ |
| 456 | assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast ); |
| 457 | diff = pB->fromLast + 1 - pA->fromFirst; |
| 458 | pB->fromLast -= diff; |
| 459 | pB->toLast -= diff; |
| 460 | sortNeeded = 1; |
| 461 | }else{ |
| 462 | /* The head of B overlaps the tail of A */ |
| 463 | assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast ); |
| 464 | diff = pA->fromLast + 1 - pB->fromFirst; |
| 465 | pB->fromFirst += diff; |
| 466 | pB->toFirst += diff; |
| 467 | sortNeeded = 1; |
| 468 | } |
| 469 | if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){ |
| 470 | /* No overlap. Do nothing */ |
| 471 | }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){ |
| 472 | /* B is contained entirely within A. Drop B */ |
| 473 | pB->fromFirst = -1; |
| 474 | purgeNeeded = 1; |
| 475 | }else if( pB->toFirst<pA->toFirst ){ |
| 476 | /* The tail of B overlaps the head of A */ |
| 477 | assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast ); |
| 478 | diff = pB->toLast + 1 - pA->toFirst; |
| 479 | pB->fromLast -= diff; |
| 480 | pB->toLast -= diff; |
| 481 | sortNeeded = 1; |
| 482 | }else{ |
| 483 | /* The head of B overlaps the tail of A */ |
| 484 | assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast ); |
| 485 | diff = pA->toLast + 1 - pB->toFirst; |
| 486 | pB->fromFirst += diff; |
| 487 | pB->toFirst += diff; |
| 488 | sortNeeded = 1; |
| 489 | } |
| 490 | } |
| 491 | if( purgeNeeded ){ |
| 492 | MappingPurgeDeletedEntries(pMap); |
| 493 | } |
| 494 | if( sortNeeded && i<pMap->nUsed-2 ){ |
| 495 | MappingSortSize(pMap); |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | /* Final step: Arrange the mapping entires so that they are in the |
| 500 | ** order of the output file. Then fill in the nExtra values. |
| 501 | */ |
| 502 | add_nextra: |
| 503 | MappingSortTo(pMap); |
| @@ -547,36 +555,41 @@ | |
| 547 | |
| 548 | /* |
| 549 | ** Do a three-way merge. Initialize pOut to contain the result. |
| 550 | */ |
| 551 | void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){ |
| 552 | Mapping map1, map2; |
| 553 | int i; |
| 554 | const char *zV1, *zV2; |
| 555 | blob_zero(pOut); |
| 556 | DEBUG1( printf("map1:\n"); ) |
| 557 | MappingInit( |
| 558 | blob_buffer(pPivot), blob_size(pPivot), |
| 559 | blob_buffer(pV1), blob_size(pV1), |
| 560 | &map1); |
| 561 | MappingSortFrom(&map1); |
| 562 | DEBUG1( |
| 563 | printf("map1-final:\n"); |
| 564 | MappingPrint(&map1); |
| 565 | printf("map2:\n"); |
| 566 | ) |
| 567 | MappingInit( |
| 568 | blob_buffer(pPivot), blob_size(pPivot), |
| 569 | blob_buffer(pV2), blob_size(pV2), |
| 570 | &map2); |
| 571 | DEBUG1( |
| 572 | printf("map2-final:\n"); |
| 573 | MappingPrint(&map2); |
| 574 | ) |
| 575 | zV1 = blob_buffer(pV1); |
| 576 | zV2 = blob_buffer(pV2); |
| 577 | if( map2.nUsed==0 ) return; |
| 578 | if( map1.aMap[0].toFirst>0 ){ |
| 579 | blob_append(pOut, zV1, map1.aMap[0].toFirst); |
| 580 | DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n", |
| 581 | map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); ) |
| 582 | } |
| @@ -583,22 +596,39 @@ | |
| 583 | if( map2.aMap[0].toFirst>0 ){ |
| 584 | blob_append(pOut, zV2, map2.aMap[0].toFirst); |
| 585 | DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n", |
| 586 | map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); ) |
| 587 | } |
| 588 | for(i=0; i<map2.nUsed; i++){ |
| 589 | int iFirst, iLast, nInsert; |
| 590 | struct Mapping_entry *p = &map2.aMap[i]; |
| 591 | iFirst = MappingTranslate(&map1, p->fromFirst, 0); |
| 592 | iLast = MappingTranslate(&map1, p->fromLast, &nInsert); |
| 593 | blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1); |
| 594 | DEBUG1( |
| 595 | printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast); |
| 596 | ) |
| 597 | if( p->nExtra>0 ){ |
| 598 | if( p->nExtra==nInsert |
| 599 | && memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){ |
| 600 | /* Omit a duplicate insert */ |
| 601 | DEBUG1( printf("OMIT duplicate insert\n"); ) |
| 602 | }else{ |
| 603 | blob_append(pOut, &zV2[p->toLast+1], p->nExtra); |
| 604 | DEBUG1( |
| @@ -608,10 +638,11 @@ | |
| 608 | } |
| 609 | } |
| 610 | } |
| 611 | MappingClear(&map1); |
| 612 | MappingClear(&map2); |
| 613 | } |
| 614 | |
| 615 | /* |
| 616 | ** COMMAND: test-3-way-merge |
| 617 | ** |
| @@ -651,11 +682,11 @@ | |
| 651 | /* |
| 652 | ** COMMAND: test-mapping |
| 653 | */ |
| 654 | void mapping_test(void){ |
| 655 | int i; |
| 656 | const char *z; |
| 657 | Blob a, b; |
| 658 | Mapping map; |
| 659 | if( g.argc!=4 ){ |
| 660 | usage("FILE1 FILE2"); |
| 661 | } |
| @@ -663,15 +694,24 @@ | |
| 663 | blob_read_from_file(&b, g.argv[3]); |
| 664 | memset(&map, 0, sizeof(map)); |
| 665 | MappingInit(blob_buffer(&a), blob_size(&a), |
| 666 | blob_buffer(&b), blob_size(&b), |
| 667 | &map); |
| 668 | z = blob_buffer(&a); |
| 669 | for(i=0; i<map.nUsed; i++){ |
| 670 | printf("======= %6d..%-6d %6d..%-6d %d\n", |
| 671 | map.aMap[i].fromFirst, map.aMap[i].fromLast, |
| 672 | map.aMap[i].toFirst, map.aMap[i].toLast, |
| 673 | map.aMap[i].nExtra); |
| 674 | printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1, |
| 675 | &z[map.aMap[i].fromFirst]); |
| 676 | } |
| 677 | } |
| 678 |
| --- src/merge3.c | |
| +++ src/merge3.c | |
| @@ -273,10 +273,11 @@ | |
| 273 | const char *zOut, /* The target file */ |
| 274 | int lenOut, /* Length of the target file */ |
| 275 | Mapping *pMap /* Write the map of similaries here */ |
| 276 | ){ |
| 277 | int i, j, base, prefix; |
| 278 | int changes; |
| 279 | hash h; |
| 280 | int *collide; |
| 281 | int origLenOut = lenOut; |
| 282 | struct Mapping_entry *aMap; |
| 283 | int landmark[MX_LANDMARK]; |
| @@ -420,84 +421,91 @@ | |
| 421 | hash_next(&h, zOut[base+i+NHASH]); |
| 422 | i++; |
| 423 | } |
| 424 | } |
| 425 | free(collide); |
| 426 | #if 0 |
| 427 | DEBUG1( |
| 428 | printf("after creation:\n"); |
| 429 | MappingPrint(pMap); |
| 430 | ) |
| 431 | #endif |
| 432 | |
| 433 | /* In this step, we will remove overlapping entries from the mapping. |
| 434 | ** |
| 435 | ** We use a greedy algorithm. Select the largest mapping first and |
| 436 | ** remove all overlapping mappings. Then take the next largest |
| 437 | ** mapping and remove others that overlap with it. Keep going until |
| 438 | ** all mappings have been processed. |
| 439 | */ |
| 440 | MappingSortSize(pMap); |
| 441 | do{ |
| 442 | changes = 0; |
| 443 | for(i=0; i<pMap->nUsed; i++){ |
| 444 | int sortNeeded = 0; |
| 445 | int purgeNeeded = 0; |
| 446 | struct Mapping_entry *pA; |
| 447 | pA = &pMap->aMap[i]; |
| 448 | for(j=i+1; j<pMap->nUsed; j++){ |
| 449 | int diff; |
| 450 | struct Mapping_entry *pB; |
| 451 | pB = &pMap->aMap[j]; |
| 452 | if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){ |
| 453 | /* No overlap. Do nothing */ |
| 454 | }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){ |
| 455 | /* B is contained entirely within A. Drop B */ |
| 456 | pB->fromFirst = -1; |
| 457 | purgeNeeded = 1; |
| 458 | continue; |
| 459 | }else if( pB->fromFirst<pA->fromFirst ){ |
| 460 | /* The tail B overlaps the head of A */ |
| 461 | assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast ); |
| 462 | diff = pB->fromLast + 1 - pA->fromFirst; |
| 463 | pB->fromLast -= diff; |
| 464 | pB->toLast -= diff; |
| 465 | sortNeeded = 1; |
| 466 | }else{ |
| 467 | /* The head of B overlaps the tail of A */ |
| 468 | assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast ); |
| 469 | diff = pA->fromLast + 1 - pB->fromFirst; |
| 470 | pB->fromFirst += diff; |
| 471 | pB->toFirst += diff; |
| 472 | sortNeeded = 1; |
| 473 | } |
| 474 | if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){ |
| 475 | /* No overlap. Do nothing */ |
| 476 | }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){ |
| 477 | /* B is contained entirely within A. Drop B */ |
| 478 | pB->fromFirst = -1; |
| 479 | purgeNeeded = 1; |
| 480 | }else if( pB->toFirst<pA->toFirst ){ |
| 481 | /* The tail of B overlaps the head of A */ |
| 482 | assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast ); |
| 483 | diff = pB->toLast + 1 - pA->toFirst; |
| 484 | pB->fromLast -= diff; |
| 485 | pB->toLast -= diff; |
| 486 | sortNeeded = 1; |
| 487 | }else{ |
| 488 | /* The head of B overlaps the tail of A */ |
| 489 | assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast ); |
| 490 | diff = pA->toLast + 1 - pB->toFirst; |
| 491 | pB->fromFirst += diff; |
| 492 | pB->toFirst += diff; |
| 493 | sortNeeded = 1; |
| 494 | } |
| 495 | } |
| 496 | if( purgeNeeded ){ |
| 497 | MappingPurgeDeletedEntries(pMap); |
| 498 | /* changes++; */ |
| 499 | } |
| 500 | if( sortNeeded && i<pMap->nUsed-2 ){ |
| 501 | MappingSortSize(pMap); |
| 502 | /* changes++; */ |
| 503 | } |
| 504 | } |
| 505 | }while( changes ); |
| 506 | |
| 507 | /* Final step: Arrange the mapping entires so that they are in the |
| 508 | ** order of the output file. Then fill in the nExtra values. |
| 509 | */ |
| 510 | add_nextra: |
| 511 | MappingSortTo(pMap); |
| @@ -547,36 +555,41 @@ | |
| 555 | |
| 556 | /* |
| 557 | ** Do a three-way merge. Initialize pOut to contain the result. |
| 558 | */ |
| 559 | void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){ |
| 560 | Mapping map1, map2, map3; |
| 561 | int i, j; |
| 562 | const char *zV1, *zV2; |
| 563 | blob_zero(pOut); |
| 564 | MappingInit( |
| 565 | blob_buffer(pPivot), blob_size(pPivot), |
| 566 | blob_buffer(pV1), blob_size(pV1), |
| 567 | &map1); |
| 568 | MappingSortFrom(&map1); |
| 569 | DEBUG1( |
| 570 | printf("map1-final:\n"); |
| 571 | MappingPrint(&map1); |
| 572 | ) |
| 573 | MappingInit( |
| 574 | blob_buffer(pPivot), blob_size(pPivot), |
| 575 | blob_buffer(pV2), blob_size(pV2), |
| 576 | &map2); |
| 577 | DEBUG1( |
| 578 | printf("map2-final:\n"); |
| 579 | MappingPrint(&map2); |
| 580 | ) |
| 581 | MappingInit( |
| 582 | blob_buffer(pV1), blob_size(pV1), |
| 583 | blob_buffer(pV2), blob_size(pV2), |
| 584 | &map3); |
| 585 | DEBUG1( |
| 586 | printf("map3-final:\n"); |
| 587 | MappingPrint(&map3); |
| 588 | ) |
| 589 | zV1 = blob_buffer(pV1); |
| 590 | zV2 = blob_buffer(pV2); |
| 591 | if( map1.aMap[0].toFirst>0 ){ |
| 592 | blob_append(pOut, zV1, map1.aMap[0].toFirst); |
| 593 | DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n", |
| 594 | map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); ) |
| 595 | } |
| @@ -583,22 +596,39 @@ | |
| 596 | if( map2.aMap[0].toFirst>0 ){ |
| 597 | blob_append(pOut, zV2, map2.aMap[0].toFirst); |
| 598 | DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n", |
| 599 | map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); ) |
| 600 | } |
| 601 | for(i=j=0; i<map2.nUsed; i++){ |
| 602 | int iFirst, iLast, nInsert, iTail; |
| 603 | struct Mapping_entry *p = &map2.aMap[i]; |
| 604 | while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; } |
| 605 | DEBUG1( |
| 606 | printf("map2: %6d..%-6d %6d..%-6d %d\n", |
| 607 | p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra); |
| 608 | printf("map3: j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst, |
| 609 | map3.aMap[j].toLast); |
| 610 | ); |
| 611 | iTail = p->toLast + p->nExtra; |
| 612 | if( i<map2.nUsed-1 && |
| 613 | map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){ |
| 614 | blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1); |
| 615 | DEBUG1( |
| 616 | printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1, |
| 617 | p->toFirst, iTail); |
| 618 | ) |
| 619 | continue; |
| 620 | } |
| 621 | iFirst = MappingTranslate(&map1, p->fromFirst, 0); |
| 622 | iLast = MappingTranslate(&map1, p->fromLast, &nInsert); |
| 623 | blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1); |
| 624 | DEBUG1( |
| 625 | printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast); |
| 626 | ) |
| 627 | if( p->nExtra>0 ){ |
| 628 | if( p->nExtra==nInsert && |
| 629 | memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){ |
| 630 | /* Omit a duplicate insert */ |
| 631 | DEBUG1( printf("OMIT duplicate insert\n"); ) |
| 632 | }else{ |
| 633 | blob_append(pOut, &zV2[p->toLast+1], p->nExtra); |
| 634 | DEBUG1( |
| @@ -608,10 +638,11 @@ | |
| 638 | } |
| 639 | } |
| 640 | } |
| 641 | MappingClear(&map1); |
| 642 | MappingClear(&map2); |
| 643 | MappingClear(&map3); |
| 644 | } |
| 645 | |
| 646 | /* |
| 647 | ** COMMAND: test-3-way-merge |
| 648 | ** |
| @@ -651,11 +682,11 @@ | |
| 682 | /* |
| 683 | ** COMMAND: test-mapping |
| 684 | */ |
| 685 | void mapping_test(void){ |
| 686 | int i; |
| 687 | const char *za, *zb; |
| 688 | Blob a, b; |
| 689 | Mapping map; |
| 690 | if( g.argc!=4 ){ |
| 691 | usage("FILE1 FILE2"); |
| 692 | } |
| @@ -663,15 +694,24 @@ | |
| 694 | blob_read_from_file(&b, g.argv[3]); |
| 695 | memset(&map, 0, sizeof(map)); |
| 696 | MappingInit(blob_buffer(&a), blob_size(&a), |
| 697 | blob_buffer(&b), blob_size(&b), |
| 698 | &map); |
| 699 | DEBUG1( |
| 700 | printf("map-final:\n"); |
| 701 | MappingPrint(&map); |
| 702 | ) |
| 703 | za = blob_buffer(&a); |
| 704 | zb = blob_buffer(&b); |
| 705 | for(i=0; i<map.nUsed; i++){ |
| 706 | printf("======= %6d..%-6d %6d..%-6d %d\n", |
| 707 | map.aMap[i].fromFirst, map.aMap[i].fromLast, |
| 708 | map.aMap[i].toFirst, map.aMap[i].toLast, |
| 709 | map.aMap[i].nExtra); |
| 710 | printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1, |
| 711 | &za[map.aMap[i].fromFirst]); |
| 712 | if( map.aMap[i].nExtra ){ |
| 713 | printf("======= EXTRA:\n"); |
| 714 | printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]); |
| 715 | } |
| 716 | } |
| 717 | } |
| 718 |