Fossil SCM

Steps toward doing a better job of automatically resolving merge conflicts. Compiles but does not work. This is an incremental check-in.

drh 2024-12-05 12:15 trunk
Commit 849c7eb6ca68061212b4f944e7f57fe11f265215c4fffb4cc1cef5389a5ac16d
3 files changed +48 -1 +135 -5 +100 -1
+48 -1
--- src/blob.c
+++ src/blob.c
@@ -665,11 +665,12 @@
665665
pBlob->nUsed = dehttpize(pBlob->aData);
666666
}
667667
668668
/*
669669
** Extract N bytes from blob pFrom and use it to initialize blob pTo.
670
-** Return the actual number of bytes extracted.
670
+** Return the actual number of bytes extracted. The cursor position
671
+** is advanced by the number of bytes extracted.
671672
**
672673
** After this call completes, pTo will be an ephemeral blob.
673674
*/
674675
int blob_extract(Blob *pFrom, int N, Blob *pTo){
675676
blob_is_init(pFrom);
@@ -687,10 +688,56 @@
687688
pTo->iCursor = 0;
688689
pTo->xRealloc = blobReallocStatic;
689690
pFrom->iCursor += N;
690691
return N;
691692
}
693
+
694
+/*
695
+** Extract N **lines** of text from blob pFrom beginning at the current
696
+** cursor position and use that text to initialize blob pTo. Unlike the
697
+** blob_extract() routine, the cursor position is unchanged.
698
+**
699
+** pTo is assumed to be uninitialized.
700
+**
701
+** After this call completes, pTo will be an ephemeral blob.
702
+*/
703
+int blob_extract_lines(Blob *pFrom, int N, Blob *pTo){
704
+ int i;
705
+ int mx;
706
+ int iStart;
707
+ int n;
708
+ const char *z;
709
+
710
+ blob_zero(pTo);
711
+ z = pFrom->aData;
712
+ i = pFrom->iCursor;
713
+ mx = pFrom->nUsed;
714
+ while( N>0 ){
715
+ while( i<mx && z[i]!='\n' ){ i++; }
716
+ if( i>=mx ) break;
717
+ i++;
718
+ }
719
+ iStart = pFrom->iCursor;
720
+ n = blob_extract(pFrom, i-pFrom->iCursor, pTo);
721
+ pFrom->iCursor = iStart;
722
+ return n;
723
+}
724
+
725
+/*
726
+** Return the number of lines of text in the blob. If the last
727
+** line is incomplete (if it does not have a \n at the end) then
728
+** it still counts.
729
+*/
730
+int blob_linecount(Blob *p){
731
+ int n = 0;
732
+ int i;
733
+ for(i=0; i<p->nUsed; i++){
734
+ if( p->aData[i]=='\n' ) n++;
735
+ }
736
+ if( p->nUsed>0 && p->aData[p->nUsed-1]!='\n' ) n++;
737
+ return n;
738
+}
692739
693740
/*
694741
** Rewind the cursor on a blob back to the beginning.
695742
*/
696743
void blob_rewind(Blob *p){
697744
--- src/blob.c
+++ src/blob.c
@@ -665,11 +665,12 @@
665 pBlob->nUsed = dehttpize(pBlob->aData);
666 }
667
668 /*
669 ** Extract N bytes from blob pFrom and use it to initialize blob pTo.
670 ** Return the actual number of bytes extracted.
 
671 **
672 ** After this call completes, pTo will be an ephemeral blob.
673 */
674 int blob_extract(Blob *pFrom, int N, Blob *pTo){
675 blob_is_init(pFrom);
@@ -687,10 +688,56 @@
687 pTo->iCursor = 0;
688 pTo->xRealloc = blobReallocStatic;
689 pFrom->iCursor += N;
690 return N;
691 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
693 /*
694 ** Rewind the cursor on a blob back to the beginning.
695 */
696 void blob_rewind(Blob *p){
697
--- src/blob.c
+++ src/blob.c
@@ -665,11 +665,12 @@
665 pBlob->nUsed = dehttpize(pBlob->aData);
666 }
667
668 /*
669 ** Extract N bytes from blob pFrom and use it to initialize blob pTo.
670 ** Return the actual number of bytes extracted. The cursor position
671 ** is advanced by the number of bytes extracted.
672 **
673 ** After this call completes, pTo will be an ephemeral blob.
674 */
675 int blob_extract(Blob *pFrom, int N, Blob *pTo){
676 blob_is_init(pFrom);
@@ -687,10 +688,56 @@
688 pTo->iCursor = 0;
689 pTo->xRealloc = blobReallocStatic;
690 pFrom->iCursor += N;
691 return N;
692 }
693
694 /*
695 ** Extract N **lines** of text from blob pFrom beginning at the current
696 ** cursor position and use that text to initialize blob pTo. Unlike the
697 ** blob_extract() routine, the cursor position is unchanged.
698 **
699 ** pTo is assumed to be uninitialized.
700 **
701 ** After this call completes, pTo will be an ephemeral blob.
702 */
703 int blob_extract_lines(Blob *pFrom, int N, Blob *pTo){
704 int i;
705 int mx;
706 int iStart;
707 int n;
708 const char *z;
709
710 blob_zero(pTo);
711 z = pFrom->aData;
712 i = pFrom->iCursor;
713 mx = pFrom->nUsed;
714 while( N>0 ){
715 while( i<mx && z[i]!='\n' ){ i++; }
716 if( i>=mx ) break;
717 i++;
718 }
719 iStart = pFrom->iCursor;
720 n = blob_extract(pFrom, i-pFrom->iCursor, pTo);
721 pFrom->iCursor = iStart;
722 return n;
723 }
724
725 /*
726 ** Return the number of lines of text in the blob. If the last
727 ** line is incomplete (if it does not have a \n at the end) then
728 ** it still counts.
729 */
730 int blob_linecount(Blob *p){
731 int n = 0;
732 int i;
733 for(i=0; i<p->nUsed; i++){
734 if( p->aData[i]=='\n' ) n++;
735 }
736 if( p->nUsed>0 && p->aData[p->nUsed-1]!='\n' ) n++;
737 return n;
738 }
739
740 /*
741 ** Rewind the cursor on a blob back to the beginning.
742 */
743 void blob_rewind(Blob *p){
744
+135 -5
--- src/diff.c
+++ src/diff.c
@@ -50,10 +50,11 @@
5050
#define DIFF_RAW 0x00040000 /* Raw triples - for debugging */
5151
#define DIFF_TCL 0x00080000 /* For the --tk option */
5252
#define DIFF_INCBINARY 0x00100000 /* The --diff-binary option */
5353
#define DIFF_SHOW_VERS 0x00200000 /* Show compared versions */
5454
#define DIFF_DARKMODE 0x00400000 /* Use dark mode for HTML */
55
+#define DIFF_BY_TOKEN 0x01000000 /* Split on tokens, not lines */
5556
5657
/*
5758
** Per file information that may influence output.
5859
*/
5960
#define DIFF_FILE_ADDED 0x40000000 /* Added or rename destination */
@@ -319,10 +320,113 @@
319320
320321
/* Return results */
321322
*pnLine = nLine;
322323
return a;
323324
}
325
+
326
+/*
327
+** Character classes for the purpose of tokenization.
328
+**
329
+** 1 - alphanumeric
330
+** 2 - whitespace
331
+** 3 - punctuation
332
+*/
333
+static char aTCharClass[256] = {
334
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
335
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
336
+ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
337
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3,
338
+
339
+ 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
341
+ 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
343
+
344
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
345
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
346
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
347
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348
+
349
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
353
+};
354
+
355
+/*
356
+** Count the number of tokens in the given string.
357
+*/
358
+static int count_tokens(const unsigned char *p, int n){
359
+ int nToken = 0;
360
+ int iPrev = 0;
361
+ int i;
362
+ for(i=0; i<n; i++){
363
+ char x = aTCharClass[p[i]];
364
+ if( x!=iPrev ){
365
+ iPrev = x;
366
+ nToken++;
367
+ }
368
+ }
369
+ return nToken;
370
+}
371
+
372
+/*
373
+** Return an array of DLine objects containing a pointer to the
374
+** start of each token and a hash of that token. The lower
375
+** bits of the hash store the length of each token.
376
+**
377
+** This is like break_into_lines() except that it works with tokens
378
+** instead of lines. A token is:
379
+**
380
+** * A contiguous sequence of alphanumeric characters.
381
+** * A contiguous sequence of whitespace
382
+** * A contiguous sequence of punctuation characters.
383
+**
384
+** Return 0 if the file is binary or contains a line that is
385
+** too long.
386
+*/
387
+static DLine *break_into_tokens(
388
+ const char *z,
389
+ int n,
390
+ int *pnToken,
391
+ u64 diffFlags
392
+){
393
+ int nToken, i, k;
394
+ u64 h, h2;
395
+ DLine *a;
396
+ unsigned char *p = (unsigned char*)z;
397
+
398
+ nToken = count_tokens(p, n);
399
+ a = fossil_malloc( sizeof(a[0])*(nToken+1) );
400
+ memset(a, 0, sizeof(a[0])*(nToken+1));
401
+ if( n==0 ){
402
+ *pnToken = 0;
403
+ return a;
404
+ }
405
+ i = 0;
406
+ while( n>0 ){
407
+ char x = aTCharClass[*p];
408
+ h = 0xcbf29ce484222325LL;
409
+ for(k=1; k<n && aTCharClass[p[k]]==x; k++){
410
+ h ^= p[k];
411
+ h *= 0x100000001b3LL;
412
+ }
413
+ a[i].z = (char*)p;
414
+ a[i].n = k;
415
+ a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) | k;
416
+ h2 = h % nToken;
417
+ a[i].iNext = a[h2].iHash;
418
+ a[h2].iHash = i+1;
419
+ p += k; n -= k;
420
+ i++;
421
+ };
422
+ assert( i==nToken );
423
+
424
+ /* Return results */
425
+ *pnToken = nToken;
426
+ return a;
427
+}
324428
325429
/*
326430
** Return zero if two DLine elements are identical.
327431
*/
328432
static int compare_dline(const DLine *pA, const DLine *pB){
@@ -2997,14 +3101,21 @@
29973101
if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){
29983102
c.xDiffer = compare_dline_ignore_allws;
29993103
}else{
30003104
c.xDiffer = compare_dline;
30013105
}
3002
- c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
3003
- &c.nFrom, pCfg->diffFlags);
3004
- c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
3005
- &c.nTo, pCfg->diffFlags);
3106
+ if( pCfg->diffFlags & DIFF_BY_TOKEN ){
3107
+ c.aFrom = break_into_tokens(blob_str(pA_Blob), blob_size(pA_Blob),
3108
+ &c.nFrom, pCfg->diffFlags);
3109
+ c.aTo = break_into_tokens(blob_str(pB_Blob), blob_size(pB_Blob),
3110
+ &c.nTo, pCfg->diffFlags);
3111
+ }else{
3112
+ c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
3113
+ &c.nFrom, pCfg->diffFlags);
3114
+ c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
3115
+ &c.nTo, pCfg->diffFlags);
3116
+ }
30063117
if( c.aFrom==0 || c.aTo==0 ){
30073118
fossil_free(c.aFrom);
30083119
fossil_free(c.aTo);
30093120
if( pOut ){
30103121
diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, pCfg->diffFlags);
@@ -3035,10 +3146,26 @@
30353146
}
30363147
}
30373148
if( (pCfg->diffFlags & DIFF_NOOPT)==0 ){
30383149
diff_optimize(&c);
30393150
}
3151
+ if( (pCfg->diffFlags & DIFF_BY_TOKEN)!=0 ){
3152
+ /* Convert token counts into byte counts. */
3153
+ int i;
3154
+ int iA = 0;
3155
+ int iB = 0;
3156
+ for(i=0; c.aEdit[i] || c.aEdit[i+1] || c.aEdit[i+2]; i+=3){
3157
+ int k, sum;
3158
+ for(k=0, sum=0; k<c.aEdit[i]; k++) sum += c.aFrom[iA++].n;
3159
+ iB += c.aEdit[i];
3160
+ c.aEdit[i] = sum;
3161
+ for(k=0, sum=0; k<c.aEdit[i+1]; k++) sum += c.aFrom[iA++].n;
3162
+ c.aEdit[i+1] = sum;
3163
+ for(k=0, sum=0; k<c.aEdit[i+2]; k++) sum += c.aTo[iB++].n;
3164
+ c.aEdit[i+2] = sum;
3165
+ }
3166
+ }
30403167
30413168
if( pOut ){
30423169
if( pCfg->diffFlags & DIFF_NUMSTAT ){
30433170
int nDel = 0, nIns = 0, i;
30443171
for(i=0; c.aEdit[i] || c.aEdit[i+1] || c.aEdit[i+2]; i+=3){
@@ -3049,11 +3176,11 @@
30493176
g.diffCnt[2] += nDel;
30503177
if( nIns+nDel ){
30513178
g.diffCnt[0]++;
30523179
blob_appendf(pOut, "%10d %10d", nIns, nDel);
30533180
}
3054
- }else if( pCfg->diffFlags & DIFF_RAW ){
3181
+ }else if( pCfg->diffFlags & (DIFF_RAW|DIFF_BY_TOKEN) ){
30553182
const int *R = c.aEdit;
30563183
unsigned int r;
30573184
for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
30583185
blob_appendf(pOut, " copy %6d delete %6d insert %6d\n",
30593186
R[r], R[r+1], R[r+2]);
@@ -3157,10 +3284,13 @@
31573284
31583285
/* Undocumented and unsupported flags used for development
31593286
** debugging and analysis: */
31603287
if( find_option("debug",0,0)!=0 ) diffFlags |= DIFF_DEBUG;
31613288
if( find_option("raw",0,0)!=0 ) diffFlags |= DIFF_RAW;
3289
+ if( find_option("bytoken",0,0)!=0 ){
3290
+ diffFlags = DIFF_RAW|DIFF_BY_TOKEN;
3291
+ }
31623292
}
31633293
if( (z = find_option("context","c",1))!=0 ){
31643294
char *zEnd;
31653295
f = (int)strtol(z, &zEnd, 10);
31663296
if( zEnd[0]==0 && errno!=ERANGE ){
31673297
--- src/diff.c
+++ src/diff.c
@@ -50,10 +50,11 @@
50 #define DIFF_RAW 0x00040000 /* Raw triples - for debugging */
51 #define DIFF_TCL 0x00080000 /* For the --tk option */
52 #define DIFF_INCBINARY 0x00100000 /* The --diff-binary option */
53 #define DIFF_SHOW_VERS 0x00200000 /* Show compared versions */
54 #define DIFF_DARKMODE 0x00400000 /* Use dark mode for HTML */
 
55
56 /*
57 ** Per file information that may influence output.
58 */
59 #define DIFF_FILE_ADDED 0x40000000 /* Added or rename destination */
@@ -319,10 +320,113 @@
319
320 /* Return results */
321 *pnLine = nLine;
322 return a;
323 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
325 /*
326 ** Return zero if two DLine elements are identical.
327 */
328 static int compare_dline(const DLine *pA, const DLine *pB){
@@ -2997,14 +3101,21 @@
2997 if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){
2998 c.xDiffer = compare_dline_ignore_allws;
2999 }else{
3000 c.xDiffer = compare_dline;
3001 }
3002 c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
3003 &c.nFrom, pCfg->diffFlags);
3004 c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
3005 &c.nTo, pCfg->diffFlags);
 
 
 
 
 
 
 
3006 if( c.aFrom==0 || c.aTo==0 ){
3007 fossil_free(c.aFrom);
3008 fossil_free(c.aTo);
3009 if( pOut ){
3010 diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, pCfg->diffFlags);
@@ -3035,10 +3146,26 @@
3035 }
3036 }
3037 if( (pCfg->diffFlags & DIFF_NOOPT)==0 ){
3038 diff_optimize(&c);
3039 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3040
3041 if( pOut ){
3042 if( pCfg->diffFlags & DIFF_NUMSTAT ){
3043 int nDel = 0, nIns = 0, i;
3044 for(i=0; c.aEdit[i] || c.aEdit[i+1] || c.aEdit[i+2]; i+=3){
@@ -3049,11 +3176,11 @@
3049 g.diffCnt[2] += nDel;
3050 if( nIns+nDel ){
3051 g.diffCnt[0]++;
3052 blob_appendf(pOut, "%10d %10d", nIns, nDel);
3053 }
3054 }else if( pCfg->diffFlags & DIFF_RAW ){
3055 const int *R = c.aEdit;
3056 unsigned int r;
3057 for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
3058 blob_appendf(pOut, " copy %6d delete %6d insert %6d\n",
3059 R[r], R[r+1], R[r+2]);
@@ -3157,10 +3284,13 @@
3157
3158 /* Undocumented and unsupported flags used for development
3159 ** debugging and analysis: */
3160 if( find_option("debug",0,0)!=0 ) diffFlags |= DIFF_DEBUG;
3161 if( find_option("raw",0,0)!=0 ) diffFlags |= DIFF_RAW;
 
 
 
3162 }
3163 if( (z = find_option("context","c",1))!=0 ){
3164 char *zEnd;
3165 f = (int)strtol(z, &zEnd, 10);
3166 if( zEnd[0]==0 && errno!=ERANGE ){
3167
--- src/diff.c
+++ src/diff.c
@@ -50,10 +50,11 @@
50 #define DIFF_RAW 0x00040000 /* Raw triples - for debugging */
51 #define DIFF_TCL 0x00080000 /* For the --tk option */
52 #define DIFF_INCBINARY 0x00100000 /* The --diff-binary option */
53 #define DIFF_SHOW_VERS 0x00200000 /* Show compared versions */
54 #define DIFF_DARKMODE 0x00400000 /* Use dark mode for HTML */
55 #define DIFF_BY_TOKEN 0x01000000 /* Split on tokens, not lines */
56
57 /*
58 ** Per file information that may influence output.
59 */
60 #define DIFF_FILE_ADDED 0x40000000 /* Added or rename destination */
@@ -319,10 +320,113 @@
320
321 /* Return results */
322 *pnLine = nLine;
323 return a;
324 }
325
326 /*
327 ** Character classes for the purpose of tokenization.
328 **
329 ** 1 - alphanumeric
330 ** 2 - whitespace
331 ** 3 - punctuation
332 */
333 static char aTCharClass[256] = {
334 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
335 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
336 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
337 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3,
338
339 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
341 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
343
344 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
345 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
346 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
347 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348
349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
353 };
354
355 /*
356 ** Count the number of tokens in the given string.
357 */
358 static int count_tokens(const unsigned char *p, int n){
359 int nToken = 0;
360 int iPrev = 0;
361 int i;
362 for(i=0; i<n; i++){
363 char x = aTCharClass[p[i]];
364 if( x!=iPrev ){
365 iPrev = x;
366 nToken++;
367 }
368 }
369 return nToken;
370 }
371
372 /*
373 ** Return an array of DLine objects containing a pointer to the
374 ** start of each token and a hash of that token. The lower
375 ** bits of the hash store the length of each token.
376 **
377 ** This is like break_into_lines() except that it works with tokens
378 ** instead of lines. A token is:
379 **
380 ** * A contiguous sequence of alphanumeric characters.
381 ** * A contiguous sequence of whitespace
382 ** * A contiguous sequence of punctuation characters.
383 **
384 ** Return 0 if the file is binary or contains a line that is
385 ** too long.
386 */
387 static DLine *break_into_tokens(
388 const char *z,
389 int n,
390 int *pnToken,
391 u64 diffFlags
392 ){
393 int nToken, i, k;
394 u64 h, h2;
395 DLine *a;
396 unsigned char *p = (unsigned char*)z;
397
398 nToken = count_tokens(p, n);
399 a = fossil_malloc( sizeof(a[0])*(nToken+1) );
400 memset(a, 0, sizeof(a[0])*(nToken+1));
401 if( n==0 ){
402 *pnToken = 0;
403 return a;
404 }
405 i = 0;
406 while( n>0 ){
407 char x = aTCharClass[*p];
408 h = 0xcbf29ce484222325LL;
409 for(k=1; k<n && aTCharClass[p[k]]==x; k++){
410 h ^= p[k];
411 h *= 0x100000001b3LL;
412 }
413 a[i].z = (char*)p;
414 a[i].n = k;
415 a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) | k;
416 h2 = h % nToken;
417 a[i].iNext = a[h2].iHash;
418 a[h2].iHash = i+1;
419 p += k; n -= k;
420 i++;
421 };
422 assert( i==nToken );
423
424 /* Return results */
425 *pnToken = nToken;
426 return a;
427 }
428
429 /*
430 ** Return zero if two DLine elements are identical.
431 */
432 static int compare_dline(const DLine *pA, const DLine *pB){
@@ -2997,14 +3101,21 @@
3101 if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){
3102 c.xDiffer = compare_dline_ignore_allws;
3103 }else{
3104 c.xDiffer = compare_dline;
3105 }
3106 if( pCfg->diffFlags & DIFF_BY_TOKEN ){
3107 c.aFrom = break_into_tokens(blob_str(pA_Blob), blob_size(pA_Blob),
3108 &c.nFrom, pCfg->diffFlags);
3109 c.aTo = break_into_tokens(blob_str(pB_Blob), blob_size(pB_Blob),
3110 &c.nTo, pCfg->diffFlags);
3111 }else{
3112 c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
3113 &c.nFrom, pCfg->diffFlags);
3114 c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
3115 &c.nTo, pCfg->diffFlags);
3116 }
3117 if( c.aFrom==0 || c.aTo==0 ){
3118 fossil_free(c.aFrom);
3119 fossil_free(c.aTo);
3120 if( pOut ){
3121 diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, pCfg->diffFlags);
@@ -3035,10 +3146,26 @@
3146 }
3147 }
3148 if( (pCfg->diffFlags & DIFF_NOOPT)==0 ){
3149 diff_optimize(&c);
3150 }
3151 if( (pCfg->diffFlags & DIFF_BY_TOKEN)!=0 ){
3152 /* Convert token counts into byte counts. */
3153 int i;
3154 int iA = 0;
3155 int iB = 0;
3156 for(i=0; c.aEdit[i] || c.aEdit[i+1] || c.aEdit[i+2]; i+=3){
3157 int k, sum;
3158 for(k=0, sum=0; k<c.aEdit[i]; k++) sum += c.aFrom[iA++].n;
3159 iB += c.aEdit[i];
3160 c.aEdit[i] = sum;
3161 for(k=0, sum=0; k<c.aEdit[i+1]; k++) sum += c.aFrom[iA++].n;
3162 c.aEdit[i+1] = sum;
3163 for(k=0, sum=0; k<c.aEdit[i+2]; k++) sum += c.aTo[iB++].n;
3164 c.aEdit[i+2] = sum;
3165 }
3166 }
3167
3168 if( pOut ){
3169 if( pCfg->diffFlags & DIFF_NUMSTAT ){
3170 int nDel = 0, nIns = 0, i;
3171 for(i=0; c.aEdit[i] || c.aEdit[i+1] || c.aEdit[i+2]; i+=3){
@@ -3049,11 +3176,11 @@
3176 g.diffCnt[2] += nDel;
3177 if( nIns+nDel ){
3178 g.diffCnt[0]++;
3179 blob_appendf(pOut, "%10d %10d", nIns, nDel);
3180 }
3181 }else if( pCfg->diffFlags & (DIFF_RAW|DIFF_BY_TOKEN) ){
3182 const int *R = c.aEdit;
3183 unsigned int r;
3184 for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
3185 blob_appendf(pOut, " copy %6d delete %6d insert %6d\n",
3186 R[r], R[r+1], R[r+2]);
@@ -3157,10 +3284,13 @@
3284
3285 /* Undocumented and unsupported flags used for development
3286 ** debugging and analysis: */
3287 if( find_option("debug",0,0)!=0 ) diffFlags |= DIFF_DEBUG;
3288 if( find_option("raw",0,0)!=0 ) diffFlags |= DIFF_RAW;
3289 if( find_option("bytoken",0,0)!=0 ){
3290 diffFlags = DIFF_RAW|DIFF_BY_TOKEN;
3291 }
3292 }
3293 if( (z = find_option("context","c",1))!=0 ){
3294 char *zEnd;
3295 f = (int)strtol(z, &zEnd, 10);
3296 if( zEnd[0]==0 && errno!=ERANGE ){
3297
+100 -1
--- src/merge3.c
+++ src/merge3.c
@@ -261,10 +261,98 @@
261261
p->xChngBoth = dbgChngBoth;
262262
p->xConflict = dbgConflict;
263263
p->xEnd = dbgStartEnd;
264264
p->xDestroy = dbgDestroy;
265265
}
266
+
267
+/************************* MergeBuilderToken ********************************/
268
+/* This version of MergeBuilder actually performs a merge on file that
269
+** are broken up into tokens instead of lines, and puts the result in pOut.
270
+*/
271
+static void tokenSame(MergeBuilder *p, unsigned int N){
272
+ blob_append(p->pOut, p->pPivot->aData+p->pPivot->iCursor, N);
273
+ p->pPivot->iCursor += N;
274
+ p->pV1->iCursor += N;
275
+ p->pV2->iCursor += N;
276
+}
277
+static void tokenChngV1(MergeBuilder *p, unsigned int nPivot, unsigned nV1){
278
+ blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
279
+ p->pPivot->iCursor += nPivot;
280
+ p->pV1->iCursor += nV1;
281
+ p->pV2->iCursor += nPivot;
282
+}
283
+static void tokenChngV2(MergeBuilder *p, unsigned int nPivot, unsigned nV2){
284
+ blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV2);
285
+ p->pPivot->iCursor += nPivot;
286
+ p->pV1->iCursor += nPivot;
287
+ p->pV2->iCursor += nV2;
288
+}
289
+static void tokenChngBoth(MergeBuilder *p, unsigned int nPivot, unsigned nV){
290
+ blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV);
291
+ p->pPivot->iCursor += nPivot;
292
+ p->pV1->iCursor += nV;
293
+ p->pV2->iCursor += nV;
294
+}
295
+static void tokenConflict(
296
+ MergeBuilder *p,
297
+ unsigned int nPivot,
298
+ unsigned int nV1,
299
+ unsigned int nV2
300
+){
301
+ blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
302
+ p->pPivot->iCursor += nPivot;
303
+ p->pV1->iCursor += nV1;
304
+ p->pV2->iCursor += nV2;
305
+}
306
+static void mergebuilder_init_token(MergeBuilder *p){
307
+ mergebuilder_init(p);
308
+ p->xSame = tokenSame;
309
+ p->xChngV1 = tokenChngV1;
310
+ p->xChngV2 = tokenChngV2;
311
+ p->xChngBoth = tokenChngBoth;
312
+ p->xConflict = tokenConflict;
313
+}
314
+
315
+/*
316
+** Attempt to do a low-level merge on a conflict. The conflict is
317
+** described by the first four parameters, which are the same as the
318
+** arguments to the xConflict method of the MergeBuilder object.
319
+** This routine attempts to resolve the conflict by looking at
320
+** elements of the conflict region that are finer grain than complete
321
+** lines of text.
322
+**
323
+** The result is written into Blob pOut. pOut is initialized by this
324
+** routine.
325
+*/
326
+int merge_try_to_resolve_conflict(
327
+ MergeBuilder *pMB, /* MergeBuilder that encounter conflict */
328
+ unsigned int nPivot, /* Lines of conflict in the pivot */
329
+ unsigned int nV1, /* Lines of conflict in V1 */
330
+ unsigned int nV2, /* Lines of conflict in V2 */
331
+ Blob *pOut /* Write resolution text here */
332
+){
333
+ int nConflict;
334
+ MergeBuilder mb;
335
+ Blob pv, v1, v2;
336
+ mergebuilder_init_token(&mb);
337
+ blob_extract_lines(pMB->pPivot, nPivot, &pv);
338
+ blob_extract_lines(pMB->pV1, nV1, &v1);
339
+ blob_extract_lines(pMB->pV2, nV2, &v2);
340
+ blob_zero(pOut);
341
+ mb.pPivot = &pv;
342
+ mb.pV1 = &v1;
343
+ mb.pV2 = &v2;
344
+ mb.pOut = pOut;
345
+ nConflict = merge_three_blobs(&mb);
346
+ /* pv, v1, and v2 should all be ephemeral blobs, so they do not
347
+ ** need to be freed. */
348
+ /* mb has not allocated any resources, so we do not need to invoke
349
+ ** the xDestroy method. */
350
+ blob_add_final_newline(pOut);
351
+ return nConflict;
352
+}
353
+
266354
267355
/************************* MergeBuilderText **********************************/
268356
/* This version of MergeBuilder actually performs a merge on file and puts
269357
** the result in pOut
270358
*/
@@ -520,12 +608,18 @@
520608
unsigned int nV1,
521609
unsigned int nV2
522610
){
523611
int mx = nPivot;
524612
int i;
613
+ int nOut;
614
+ Blob out;
615
+
616
+ merge_try_to_resolve_conflict(p, nPivot, nV1, nV2, &out);
617
+ nOut = blob_linecount(&out);
525618
if( nV1>mx ) mx = nV1;
526619
if( nV2>mx ) mx = nV2;
620
+ if( nOut>mx ) mx = nOut;
527621
for(i=0; i<mx; i++){
528622
if( i<nPivot ){
529623
tclLineOfText(p->pOut, p->pPivot);
530624
}else{
531625
blob_append_char(p->pOut, '.');
@@ -540,12 +634,17 @@
540634
if( i<nV2 ){
541635
tclLineOfText(p->pOut, p->pV2);
542636
}else{
543637
blob_append_char(p->pOut, '.');
544638
}
545
- blob_append(p->pOut, " X\n", 3);
639
+ if( i<nOut ){
640
+ tclLineOfText(p->pOut, &out);
641
+ }else{
642
+ blob_append(p->pOut, " X\n", 3);
643
+ }
546644
}
645
+ blob_reset(&out);
547646
p->lnPivot += nPivot;
548647
p->lnV1 += nV1;
549648
p->lnV2 += nV2;
550649
}
551650
void mergebuilder_init_tcl(MergeBuilder *p){
552651
--- src/merge3.c
+++ src/merge3.c
@@ -261,10 +261,98 @@
261 p->xChngBoth = dbgChngBoth;
262 p->xConflict = dbgConflict;
263 p->xEnd = dbgStartEnd;
264 p->xDestroy = dbgDestroy;
265 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
267 /************************* MergeBuilderText **********************************/
268 /* This version of MergeBuilder actually performs a merge on file and puts
269 ** the result in pOut
270 */
@@ -520,12 +608,18 @@
520 unsigned int nV1,
521 unsigned int nV2
522 ){
523 int mx = nPivot;
524 int i;
 
 
 
 
 
525 if( nV1>mx ) mx = nV1;
526 if( nV2>mx ) mx = nV2;
 
527 for(i=0; i<mx; i++){
528 if( i<nPivot ){
529 tclLineOfText(p->pOut, p->pPivot);
530 }else{
531 blob_append_char(p->pOut, '.');
@@ -540,12 +634,17 @@
540 if( i<nV2 ){
541 tclLineOfText(p->pOut, p->pV2);
542 }else{
543 blob_append_char(p->pOut, '.');
544 }
545 blob_append(p->pOut, " X\n", 3);
 
 
 
 
546 }
 
547 p->lnPivot += nPivot;
548 p->lnV1 += nV1;
549 p->lnV2 += nV2;
550 }
551 void mergebuilder_init_tcl(MergeBuilder *p){
552
--- src/merge3.c
+++ src/merge3.c
@@ -261,10 +261,98 @@
261 p->xChngBoth = dbgChngBoth;
262 p->xConflict = dbgConflict;
263 p->xEnd = dbgStartEnd;
264 p->xDestroy = dbgDestroy;
265 }
266
267 /************************* MergeBuilderToken ********************************/
268 /* This version of MergeBuilder actually performs a merge on file that
269 ** are broken up into tokens instead of lines, and puts the result in pOut.
270 */
271 static void tokenSame(MergeBuilder *p, unsigned int N){
272 blob_append(p->pOut, p->pPivot->aData+p->pPivot->iCursor, N);
273 p->pPivot->iCursor += N;
274 p->pV1->iCursor += N;
275 p->pV2->iCursor += N;
276 }
277 static void tokenChngV1(MergeBuilder *p, unsigned int nPivot, unsigned nV1){
278 blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
279 p->pPivot->iCursor += nPivot;
280 p->pV1->iCursor += nV1;
281 p->pV2->iCursor += nPivot;
282 }
283 static void tokenChngV2(MergeBuilder *p, unsigned int nPivot, unsigned nV2){
284 blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV2);
285 p->pPivot->iCursor += nPivot;
286 p->pV1->iCursor += nPivot;
287 p->pV2->iCursor += nV2;
288 }
289 static void tokenChngBoth(MergeBuilder *p, unsigned int nPivot, unsigned nV){
290 blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV);
291 p->pPivot->iCursor += nPivot;
292 p->pV1->iCursor += nV;
293 p->pV2->iCursor += nV;
294 }
295 static void tokenConflict(
296 MergeBuilder *p,
297 unsigned int nPivot,
298 unsigned int nV1,
299 unsigned int nV2
300 ){
301 blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
302 p->pPivot->iCursor += nPivot;
303 p->pV1->iCursor += nV1;
304 p->pV2->iCursor += nV2;
305 }
306 static void mergebuilder_init_token(MergeBuilder *p){
307 mergebuilder_init(p);
308 p->xSame = tokenSame;
309 p->xChngV1 = tokenChngV1;
310 p->xChngV2 = tokenChngV2;
311 p->xChngBoth = tokenChngBoth;
312 p->xConflict = tokenConflict;
313 }
314
315 /*
316 ** Attempt to do a low-level merge on a conflict. The conflict is
317 ** described by the first four parameters, which are the same as the
318 ** arguments to the xConflict method of the MergeBuilder object.
319 ** This routine attempts to resolve the conflict by looking at
320 ** elements of the conflict region that are finer grain than complete
321 ** lines of text.
322 **
323 ** The result is written into Blob pOut. pOut is initialized by this
324 ** routine.
325 */
326 int merge_try_to_resolve_conflict(
327 MergeBuilder *pMB, /* MergeBuilder that encounter conflict */
328 unsigned int nPivot, /* Lines of conflict in the pivot */
329 unsigned int nV1, /* Lines of conflict in V1 */
330 unsigned int nV2, /* Lines of conflict in V2 */
331 Blob *pOut /* Write resolution text here */
332 ){
333 int nConflict;
334 MergeBuilder mb;
335 Blob pv, v1, v2;
336 mergebuilder_init_token(&mb);
337 blob_extract_lines(pMB->pPivot, nPivot, &pv);
338 blob_extract_lines(pMB->pV1, nV1, &v1);
339 blob_extract_lines(pMB->pV2, nV2, &v2);
340 blob_zero(pOut);
341 mb.pPivot = &pv;
342 mb.pV1 = &v1;
343 mb.pV2 = &v2;
344 mb.pOut = pOut;
345 nConflict = merge_three_blobs(&mb);
346 /* pv, v1, and v2 should all be ephemeral blobs, so they do not
347 ** need to be freed. */
348 /* mb has not allocated any resources, so we do not need to invoke
349 ** the xDestroy method. */
350 blob_add_final_newline(pOut);
351 return nConflict;
352 }
353
354
355 /************************* MergeBuilderText **********************************/
356 /* This version of MergeBuilder actually performs a merge on file and puts
357 ** the result in pOut
358 */
@@ -520,12 +608,18 @@
608 unsigned int nV1,
609 unsigned int nV2
610 ){
611 int mx = nPivot;
612 int i;
613 int nOut;
614 Blob out;
615
616 merge_try_to_resolve_conflict(p, nPivot, nV1, nV2, &out);
617 nOut = blob_linecount(&out);
618 if( nV1>mx ) mx = nV1;
619 if( nV2>mx ) mx = nV2;
620 if( nOut>mx ) mx = nOut;
621 for(i=0; i<mx; i++){
622 if( i<nPivot ){
623 tclLineOfText(p->pOut, p->pPivot);
624 }else{
625 blob_append_char(p->pOut, '.');
@@ -540,12 +634,17 @@
634 if( i<nV2 ){
635 tclLineOfText(p->pOut, p->pV2);
636 }else{
637 blob_append_char(p->pOut, '.');
638 }
639 if( i<nOut ){
640 tclLineOfText(p->pOut, &out);
641 }else{
642 blob_append(p->pOut, " X\n", 3);
643 }
644 }
645 blob_reset(&out);
646 p->lnPivot += nPivot;
647 p->lnV1 += nV1;
648 p->lnV2 += nV2;
649 }
650 void mergebuilder_init_tcl(MergeBuilder *p){
651

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button