Fossil SCM

Improvements to the merge algorithm so that it works better for common changes. Still more work needed.

drh 2007-11-07 22:22 trunk
Commit ac6bb3ce069523ecc7b664ff41a8b5cc3fc7f657
1 file changed +113 -73
+113 -73
--- src/merge3.c
+++ src/merge3.c
@@ -273,10 +273,11 @@
273273
const char *zOut, /* The target file */
274274
int lenOut, /* Length of the target file */
275275
Mapping *pMap /* Write the map of similaries here */
276276
){
277277
int i, j, base, prefix;
278
+ int changes;
278279
hash h;
279280
int *collide;
280281
int origLenOut = lenOut;
281282
struct Mapping_entry *aMap;
282283
int landmark[MX_LANDMARK];
@@ -420,84 +421,91 @@
420421
hash_next(&h, zOut[base+i+NHASH]);
421422
i++;
422423
}
423424
}
424425
free(collide);
426
+#if 0
425427
DEBUG1(
426428
printf("after creation:\n");
427429
MappingPrint(pMap);
428430
)
431
+#endif
429432
430433
/* In this step, we will remove overlapping entries from the mapping.
431434
**
432435
** We use a greedy algorithm. Select the largest mapping first and
433436
** remove all overlapping mappings. Then take the next largest
434437
** mapping and remove others that overlap with it. Keep going until
435438
** all mappings have been processed.
436439
*/
437440
MappingSortSize(pMap);
438
- for(i=0; i<pMap->nUsed; i++){
439
- int sortNeeded = 0;
440
- int purgeNeeded = 0;
441
- struct Mapping_entry *pA;
442
- pA = &pMap->aMap[i];
443
- for(j=i+1; j<pMap->nUsed; j++){
444
- int diff;
445
- struct Mapping_entry *pB;
446
- pB = &pMap->aMap[j];
447
- if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){
448
- /* No overlap. Do nothing */
449
- }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
450
- /* B is contained entirely within A. Drop B */
451
- pB->fromFirst = -1;
452
- purgeNeeded = 1;
453
- continue;
454
- }else if( pB->fromFirst<pA->fromFirst ){
455
- /* The tail B overlaps the head of A */
456
- assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
457
- diff = pB->fromLast + 1 - pA->fromFirst;
458
- pB->fromLast -= diff;
459
- pB->toLast -= diff;
460
- sortNeeded = 1;
461
- }else{
462
- /* The head of B overlaps the tail of A */
463
- assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
464
- diff = pA->fromLast + 1 - pB->fromFirst;
465
- pB->fromFirst += diff;
466
- pB->toFirst += diff;
467
- sortNeeded = 1;
468
- }
469
- if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){
470
- /* No overlap. Do nothing */
471
- }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
472
- /* B is contained entirely within A. Drop B */
473
- pB->fromFirst = -1;
474
- purgeNeeded = 1;
475
- }else if( pB->toFirst<pA->toFirst ){
476
- /* The tail of B overlaps the head of A */
477
- assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
478
- diff = pB->toLast + 1 - pA->toFirst;
479
- pB->fromLast -= diff;
480
- pB->toLast -= diff;
481
- sortNeeded = 1;
482
- }else{
483
- /* The head of B overlaps the tail of A */
484
- assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
485
- diff = pA->toLast + 1 - pB->toFirst;
486
- pB->fromFirst += diff;
487
- pB->toFirst += diff;
488
- sortNeeded = 1;
489
- }
490
- }
491
- if( purgeNeeded ){
492
- MappingPurgeDeletedEntries(pMap);
493
- }
494
- if( sortNeeded && i<pMap->nUsed-2 ){
495
- MappingSortSize(pMap);
496
- }
497
- }
498
-
441
+ do{
442
+ changes = 0;
443
+ for(i=0; i<pMap->nUsed; i++){
444
+ int sortNeeded = 0;
445
+ int purgeNeeded = 0;
446
+ struct Mapping_entry *pA;
447
+ pA = &pMap->aMap[i];
448
+ for(j=i+1; j<pMap->nUsed; j++){
449
+ int diff;
450
+ struct Mapping_entry *pB;
451
+ pB = &pMap->aMap[j];
452
+ if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){
453
+ /* No overlap. Do nothing */
454
+ }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
455
+ /* B is contained entirely within A. Drop B */
456
+ pB->fromFirst = -1;
457
+ purgeNeeded = 1;
458
+ continue;
459
+ }else if( pB->fromFirst<pA->fromFirst ){
460
+ /* The tail B overlaps the head of A */
461
+ assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
462
+ diff = pB->fromLast + 1 - pA->fromFirst;
463
+ pB->fromLast -= diff;
464
+ pB->toLast -= diff;
465
+ sortNeeded = 1;
466
+ }else{
467
+ /* The head of B overlaps the tail of A */
468
+ assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
469
+ diff = pA->fromLast + 1 - pB->fromFirst;
470
+ pB->fromFirst += diff;
471
+ pB->toFirst += diff;
472
+ sortNeeded = 1;
473
+ }
474
+ if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){
475
+ /* No overlap. Do nothing */
476
+ }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
477
+ /* B is contained entirely within A. Drop B */
478
+ pB->fromFirst = -1;
479
+ purgeNeeded = 1;
480
+ }else if( pB->toFirst<pA->toFirst ){
481
+ /* The tail of B overlaps the head of A */
482
+ assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
483
+ diff = pB->toLast + 1 - pA->toFirst;
484
+ pB->fromLast -= diff;
485
+ pB->toLast -= diff;
486
+ sortNeeded = 1;
487
+ }else{
488
+ /* The head of B overlaps the tail of A */
489
+ assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
490
+ diff = pA->toLast + 1 - pB->toFirst;
491
+ pB->fromFirst += diff;
492
+ pB->toFirst += diff;
493
+ sortNeeded = 1;
494
+ }
495
+ }
496
+ if( purgeNeeded ){
497
+ MappingPurgeDeletedEntries(pMap);
498
+ /* changes++; */
499
+ }
500
+ if( sortNeeded && i<pMap->nUsed-2 ){
501
+ MappingSortSize(pMap);
502
+ /* changes++; */
503
+ }
504
+ }
505
+ }while( changes );
506
+
499507
/* Final step: Arrange the mapping entires so that they are in the
500508
** order of the output file. Then fill in the nExtra values.
501509
*/
502510
add_nextra:
503511
MappingSortTo(pMap);
@@ -547,36 +555,41 @@
547555
548556
/*
549557
** Do a three-way merge. Initialize pOut to contain the result.
550558
*/
551559
void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){
552
- Mapping map1, map2;
553
- int i;
560
+ Mapping map1, map2, map3;
561
+ int i, j;
554562
const char *zV1, *zV2;
555563
blob_zero(pOut);
556
- DEBUG1( printf("map1:\n"); )
557564
MappingInit(
558565
blob_buffer(pPivot), blob_size(pPivot),
559566
blob_buffer(pV1), blob_size(pV1),
560567
&map1);
561568
MappingSortFrom(&map1);
562569
DEBUG1(
563570
printf("map1-final:\n");
564571
MappingPrint(&map1);
565
- printf("map2:\n");
566572
)
567573
MappingInit(
568574
blob_buffer(pPivot), blob_size(pPivot),
569575
blob_buffer(pV2), blob_size(pV2),
570576
&map2);
571577
DEBUG1(
572578
printf("map2-final:\n");
573579
MappingPrint(&map2);
574580
)
581
+ MappingInit(
582
+ blob_buffer(pV1), blob_size(pV1),
583
+ blob_buffer(pV2), blob_size(pV2),
584
+ &map3);
585
+ DEBUG1(
586
+ printf("map3-final:\n");
587
+ MappingPrint(&map3);
588
+ )
575589
zV1 = blob_buffer(pV1);
576590
zV2 = blob_buffer(pV2);
577
- if( map2.nUsed==0 ) return;
578591
if( map1.aMap[0].toFirst>0 ){
579592
blob_append(pOut, zV1, map1.aMap[0].toFirst);
580593
DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n",
581594
map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); )
582595
}
@@ -583,22 +596,39 @@
583596
if( map2.aMap[0].toFirst>0 ){
584597
blob_append(pOut, zV2, map2.aMap[0].toFirst);
585598
DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n",
586599
map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); )
587600
}
588
- for(i=0; i<map2.nUsed; i++){
589
- int iFirst, iLast, nInsert;
601
+ for(i=j=0; i<map2.nUsed; i++){
602
+ int iFirst, iLast, nInsert, iTail;
590603
struct Mapping_entry *p = &map2.aMap[i];
604
+ while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; }
605
+ DEBUG1(
606
+ printf("map2: %6d..%-6d %6d..%-6d %d\n",
607
+ p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra);
608
+ printf("map3: j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst,
609
+ map3.aMap[j].toLast);
610
+ );
611
+ iTail = p->toLast + p->nExtra;
612
+ if( i<map2.nUsed-1 &&
613
+ map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){
614
+ blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1);
615
+ DEBUG1(
616
+ printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1,
617
+ p->toFirst, iTail);
618
+ )
619
+ continue;
620
+ }
591621
iFirst = MappingTranslate(&map1, p->fromFirst, 0);
592622
iLast = MappingTranslate(&map1, p->fromLast, &nInsert);
593623
blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1);
594624
DEBUG1(
595625
printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast);
596626
)
597627
if( p->nExtra>0 ){
598
- if( p->nExtra==nInsert
599
- && memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
628
+ if( p->nExtra==nInsert &&
629
+ memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
600630
/* Omit a duplicate insert */
601631
DEBUG1( printf("OMIT duplicate insert\n"); )
602632
}else{
603633
blob_append(pOut, &zV2[p->toLast+1], p->nExtra);
604634
DEBUG1(
@@ -608,10 +638,11 @@
608638
}
609639
}
610640
}
611641
MappingClear(&map1);
612642
MappingClear(&map2);
643
+ MappingClear(&map3);
613644
}
614645
615646
/*
616647
** COMMAND: test-3-way-merge
617648
**
@@ -651,11 +682,11 @@
651682
/*
652683
** COMMAND: test-mapping
653684
*/
654685
void mapping_test(void){
655686
int i;
656
- const char *z;
687
+ const char *za, *zb;
657688
Blob a, b;
658689
Mapping map;
659690
if( g.argc!=4 ){
660691
usage("FILE1 FILE2");
661692
}
@@ -663,15 +694,24 @@
663694
blob_read_from_file(&b, g.argv[3]);
664695
memset(&map, 0, sizeof(map));
665696
MappingInit(blob_buffer(&a), blob_size(&a),
666697
blob_buffer(&b), blob_size(&b),
667698
&map);
668
- z = blob_buffer(&a);
699
+ DEBUG1(
700
+ printf("map-final:\n");
701
+ MappingPrint(&map);
702
+ )
703
+ za = blob_buffer(&a);
704
+ zb = blob_buffer(&b);
669705
for(i=0; i<map.nUsed; i++){
670706
printf("======= %6d..%-6d %6d..%-6d %d\n",
671707
map.aMap[i].fromFirst, map.aMap[i].fromLast,
672708
map.aMap[i].toFirst, map.aMap[i].toLast,
673709
map.aMap[i].nExtra);
674710
printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1,
675
- &z[map.aMap[i].fromFirst]);
711
+ &za[map.aMap[i].fromFirst]);
712
+ if( map.aMap[i].nExtra ){
713
+ printf("======= EXTRA:\n");
714
+ printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]);
715
+ }
676716
}
677717
}
678718
--- src/merge3.c
+++ src/merge3.c
@@ -273,10 +273,11 @@
273 const char *zOut, /* The target file */
274 int lenOut, /* Length of the target file */
275 Mapping *pMap /* Write the map of similaries here */
276 ){
277 int i, j, base, prefix;
 
278 hash h;
279 int *collide;
280 int origLenOut = lenOut;
281 struct Mapping_entry *aMap;
282 int landmark[MX_LANDMARK];
@@ -420,84 +421,91 @@
420 hash_next(&h, zOut[base+i+NHASH]);
421 i++;
422 }
423 }
424 free(collide);
 
425 DEBUG1(
426 printf("after creation:\n");
427 MappingPrint(pMap);
428 )
 
429
430 /* In this step, we will remove overlapping entries from the mapping.
431 **
432 ** We use a greedy algorithm. Select the largest mapping first and
433 ** remove all overlapping mappings. Then take the next largest
434 ** mapping and remove others that overlap with it. Keep going until
435 ** all mappings have been processed.
436 */
437 MappingSortSize(pMap);
438 for(i=0; i<pMap->nUsed; i++){
439 int sortNeeded = 0;
440 int purgeNeeded = 0;
441 struct Mapping_entry *pA;
442 pA = &pMap->aMap[i];
443 for(j=i+1; j<pMap->nUsed; j++){
444 int diff;
445 struct Mapping_entry *pB;
446 pB = &pMap->aMap[j];
447 if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){
448 /* No overlap. Do nothing */
449 }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
450 /* B is contained entirely within A. Drop B */
451 pB->fromFirst = -1;
452 purgeNeeded = 1;
453 continue;
454 }else if( pB->fromFirst<pA->fromFirst ){
455 /* The tail B overlaps the head of A */
456 assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
457 diff = pB->fromLast + 1 - pA->fromFirst;
458 pB->fromLast -= diff;
459 pB->toLast -= diff;
460 sortNeeded = 1;
461 }else{
462 /* The head of B overlaps the tail of A */
463 assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
464 diff = pA->fromLast + 1 - pB->fromFirst;
465 pB->fromFirst += diff;
466 pB->toFirst += diff;
467 sortNeeded = 1;
468 }
469 if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){
470 /* No overlap. Do nothing */
471 }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
472 /* B is contained entirely within A. Drop B */
473 pB->fromFirst = -1;
474 purgeNeeded = 1;
475 }else if( pB->toFirst<pA->toFirst ){
476 /* The tail of B overlaps the head of A */
477 assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
478 diff = pB->toLast + 1 - pA->toFirst;
479 pB->fromLast -= diff;
480 pB->toLast -= diff;
481 sortNeeded = 1;
482 }else{
483 /* The head of B overlaps the tail of A */
484 assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
485 diff = pA->toLast + 1 - pB->toFirst;
486 pB->fromFirst += diff;
487 pB->toFirst += diff;
488 sortNeeded = 1;
489 }
490 }
491 if( purgeNeeded ){
492 MappingPurgeDeletedEntries(pMap);
493 }
494 if( sortNeeded && i<pMap->nUsed-2 ){
495 MappingSortSize(pMap);
496 }
497 }
498
 
 
 
 
 
499 /* Final step: Arrange the mapping entires so that they are in the
500 ** order of the output file. Then fill in the nExtra values.
501 */
502 add_nextra:
503 MappingSortTo(pMap);
@@ -547,36 +555,41 @@
547
548 /*
549 ** Do a three-way merge. Initialize pOut to contain the result.
550 */
551 void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){
552 Mapping map1, map2;
553 int i;
554 const char *zV1, *zV2;
555 blob_zero(pOut);
556 DEBUG1( printf("map1:\n"); )
557 MappingInit(
558 blob_buffer(pPivot), blob_size(pPivot),
559 blob_buffer(pV1), blob_size(pV1),
560 &map1);
561 MappingSortFrom(&map1);
562 DEBUG1(
563 printf("map1-final:\n");
564 MappingPrint(&map1);
565 printf("map2:\n");
566 )
567 MappingInit(
568 blob_buffer(pPivot), blob_size(pPivot),
569 blob_buffer(pV2), blob_size(pV2),
570 &map2);
571 DEBUG1(
572 printf("map2-final:\n");
573 MappingPrint(&map2);
574 )
 
 
 
 
 
 
 
 
575 zV1 = blob_buffer(pV1);
576 zV2 = blob_buffer(pV2);
577 if( map2.nUsed==0 ) return;
578 if( map1.aMap[0].toFirst>0 ){
579 blob_append(pOut, zV1, map1.aMap[0].toFirst);
580 DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n",
581 map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); )
582 }
@@ -583,22 +596,39 @@
583 if( map2.aMap[0].toFirst>0 ){
584 blob_append(pOut, zV2, map2.aMap[0].toFirst);
585 DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n",
586 map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); )
587 }
588 for(i=0; i<map2.nUsed; i++){
589 int iFirst, iLast, nInsert;
590 struct Mapping_entry *p = &map2.aMap[i];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591 iFirst = MappingTranslate(&map1, p->fromFirst, 0);
592 iLast = MappingTranslate(&map1, p->fromLast, &nInsert);
593 blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1);
594 DEBUG1(
595 printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast);
596 )
597 if( p->nExtra>0 ){
598 if( p->nExtra==nInsert
599 && memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
600 /* Omit a duplicate insert */
601 DEBUG1( printf("OMIT duplicate insert\n"); )
602 }else{
603 blob_append(pOut, &zV2[p->toLast+1], p->nExtra);
604 DEBUG1(
@@ -608,10 +638,11 @@
608 }
609 }
610 }
611 MappingClear(&map1);
612 MappingClear(&map2);
 
613 }
614
615 /*
616 ** COMMAND: test-3-way-merge
617 **
@@ -651,11 +682,11 @@
651 /*
652 ** COMMAND: test-mapping
653 */
654 void mapping_test(void){
655 int i;
656 const char *z;
657 Blob a, b;
658 Mapping map;
659 if( g.argc!=4 ){
660 usage("FILE1 FILE2");
661 }
@@ -663,15 +694,24 @@
663 blob_read_from_file(&b, g.argv[3]);
664 memset(&map, 0, sizeof(map));
665 MappingInit(blob_buffer(&a), blob_size(&a),
666 blob_buffer(&b), blob_size(&b),
667 &map);
668 z = blob_buffer(&a);
 
 
 
 
 
669 for(i=0; i<map.nUsed; i++){
670 printf("======= %6d..%-6d %6d..%-6d %d\n",
671 map.aMap[i].fromFirst, map.aMap[i].fromLast,
672 map.aMap[i].toFirst, map.aMap[i].toLast,
673 map.aMap[i].nExtra);
674 printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1,
675 &z[map.aMap[i].fromFirst]);
 
 
 
 
676 }
677 }
678
--- src/merge3.c
+++ src/merge3.c
@@ -273,10 +273,11 @@
273 const char *zOut, /* The target file */
274 int lenOut, /* Length of the target file */
275 Mapping *pMap /* Write the map of similaries here */
276 ){
277 int i, j, base, prefix;
278 int changes;
279 hash h;
280 int *collide;
281 int origLenOut = lenOut;
282 struct Mapping_entry *aMap;
283 int landmark[MX_LANDMARK];
@@ -420,84 +421,91 @@
421 hash_next(&h, zOut[base+i+NHASH]);
422 i++;
423 }
424 }
425 free(collide);
426 #if 0
427 DEBUG1(
428 printf("after creation:\n");
429 MappingPrint(pMap);
430 )
431 #endif
432
433 /* In this step, we will remove overlapping entries from the mapping.
434 **
435 ** We use a greedy algorithm. Select the largest mapping first and
436 ** remove all overlapping mappings. Then take the next largest
437 ** mapping and remove others that overlap with it. Keep going until
438 ** all mappings have been processed.
439 */
440 MappingSortSize(pMap);
441 do{
442 changes = 0;
443 for(i=0; i<pMap->nUsed; i++){
444 int sortNeeded = 0;
445 int purgeNeeded = 0;
446 struct Mapping_entry *pA;
447 pA = &pMap->aMap[i];
448 for(j=i+1; j<pMap->nUsed; j++){
449 int diff;
450 struct Mapping_entry *pB;
451 pB = &pMap->aMap[j];
452 if( pB->fromLast<pA->fromFirst || pB->fromFirst>pA->fromLast ){
453 /* No overlap. Do nothing */
454 }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
455 /* B is contained entirely within A. Drop B */
456 pB->fromFirst = -1;
457 purgeNeeded = 1;
458 continue;
459 }else if( pB->fromFirst<pA->fromFirst ){
460 /* The tail B overlaps the head of A */
461 assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
462 diff = pB->fromLast + 1 - pA->fromFirst;
463 pB->fromLast -= diff;
464 pB->toLast -= diff;
465 sortNeeded = 1;
466 }else{
467 /* The head of B overlaps the tail of A */
468 assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
469 diff = pA->fromLast + 1 - pB->fromFirst;
470 pB->fromFirst += diff;
471 pB->toFirst += diff;
472 sortNeeded = 1;
473 }
474 if( pB->toLast<pA->toFirst || pB->toFirst>pA->toLast ){
475 /* No overlap. Do nothing */
476 }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
477 /* B is contained entirely within A. Drop B */
478 pB->fromFirst = -1;
479 purgeNeeded = 1;
480 }else if( pB->toFirst<pA->toFirst ){
481 /* The tail of B overlaps the head of A */
482 assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
483 diff = pB->toLast + 1 - pA->toFirst;
484 pB->fromLast -= diff;
485 pB->toLast -= diff;
486 sortNeeded = 1;
487 }else{
488 /* The head of B overlaps the tail of A */
489 assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
490 diff = pA->toLast + 1 - pB->toFirst;
491 pB->fromFirst += diff;
492 pB->toFirst += diff;
493 sortNeeded = 1;
494 }
495 }
496 if( purgeNeeded ){
497 MappingPurgeDeletedEntries(pMap);
498 /* changes++; */
499 }
500 if( sortNeeded && i<pMap->nUsed-2 ){
501 MappingSortSize(pMap);
502 /* changes++; */
503 }
504 }
505 }while( changes );
506
507 /* Final step: Arrange the mapping entires so that they are in the
508 ** order of the output file. Then fill in the nExtra values.
509 */
510 add_nextra:
511 MappingSortTo(pMap);
@@ -547,36 +555,41 @@
555
556 /*
557 ** Do a three-way merge. Initialize pOut to contain the result.
558 */
559 void blob_merge(Blob *pPivot, Blob *pV1, Blob *pV2, Blob *pOut){
560 Mapping map1, map2, map3;
561 int i, j;
562 const char *zV1, *zV2;
563 blob_zero(pOut);
 
564 MappingInit(
565 blob_buffer(pPivot), blob_size(pPivot),
566 blob_buffer(pV1), blob_size(pV1),
567 &map1);
568 MappingSortFrom(&map1);
569 DEBUG1(
570 printf("map1-final:\n");
571 MappingPrint(&map1);
 
572 )
573 MappingInit(
574 blob_buffer(pPivot), blob_size(pPivot),
575 blob_buffer(pV2), blob_size(pV2),
576 &map2);
577 DEBUG1(
578 printf("map2-final:\n");
579 MappingPrint(&map2);
580 )
581 MappingInit(
582 blob_buffer(pV1), blob_size(pV1),
583 blob_buffer(pV2), blob_size(pV2),
584 &map3);
585 DEBUG1(
586 printf("map3-final:\n");
587 MappingPrint(&map3);
588 )
589 zV1 = blob_buffer(pV1);
590 zV2 = blob_buffer(pV2);
 
591 if( map1.aMap[0].toFirst>0 ){
592 blob_append(pOut, zV1, map1.aMap[0].toFirst);
593 DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n",
594 map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); )
595 }
@@ -583,22 +596,39 @@
596 if( map2.aMap[0].toFirst>0 ){
597 blob_append(pOut, zV2, map2.aMap[0].toFirst);
598 DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n",
599 map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); )
600 }
601 for(i=j=0; i<map2.nUsed; i++){
602 int iFirst, iLast, nInsert, iTail;
603 struct Mapping_entry *p = &map2.aMap[i];
604 while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; }
605 DEBUG1(
606 printf("map2: %6d..%-6d %6d..%-6d %d\n",
607 p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra);
608 printf("map3: j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst,
609 map3.aMap[j].toLast);
610 );
611 iTail = p->toLast + p->nExtra;
612 if( i<map2.nUsed-1 &&
613 map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){
614 blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1);
615 DEBUG1(
616 printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1,
617 p->toFirst, iTail);
618 )
619 continue;
620 }
621 iFirst = MappingTranslate(&map1, p->fromFirst, 0);
622 iLast = MappingTranslate(&map1, p->fromLast, &nInsert);
623 blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1);
624 DEBUG1(
625 printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast);
626 )
627 if( p->nExtra>0 ){
628 if( p->nExtra==nInsert &&
629 memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
630 /* Omit a duplicate insert */
631 DEBUG1( printf("OMIT duplicate insert\n"); )
632 }else{
633 blob_append(pOut, &zV2[p->toLast+1], p->nExtra);
634 DEBUG1(
@@ -608,10 +638,11 @@
638 }
639 }
640 }
641 MappingClear(&map1);
642 MappingClear(&map2);
643 MappingClear(&map3);
644 }
645
646 /*
647 ** COMMAND: test-3-way-merge
648 **
@@ -651,11 +682,11 @@
682 /*
683 ** COMMAND: test-mapping
684 */
685 void mapping_test(void){
686 int i;
687 const char *za, *zb;
688 Blob a, b;
689 Mapping map;
690 if( g.argc!=4 ){
691 usage("FILE1 FILE2");
692 }
@@ -663,15 +694,24 @@
694 blob_read_from_file(&b, g.argv[3]);
695 memset(&map, 0, sizeof(map));
696 MappingInit(blob_buffer(&a), blob_size(&a),
697 blob_buffer(&b), blob_size(&b),
698 &map);
699 DEBUG1(
700 printf("map-final:\n");
701 MappingPrint(&map);
702 )
703 za = blob_buffer(&a);
704 zb = blob_buffer(&b);
705 for(i=0; i<map.nUsed; i++){
706 printf("======= %6d..%-6d %6d..%-6d %d\n",
707 map.aMap[i].fromFirst, map.aMap[i].fromLast,
708 map.aMap[i].toFirst, map.aMap[i].toLast,
709 map.aMap[i].nExtra);
710 printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1,
711 &za[map.aMap[i].fromFirst]);
712 if( map.aMap[i].nExtra ){
713 printf("======= EXTRA:\n");
714 printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]);
715 }
716 }
717 }
718

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button