Fossil SCM

Rework the merge algorithm. It now only works for text files. But, it no longer gets confused by line endings (\r\n versus \n) and it reports conflicts.

drh 2007-11-16 20:42 trunk

Commit 36b96b861614cd44073a9e33c9d6f91608d03158

Parent f1b55da0ac3d3c3…

7 files changed +45 +87 -39 +11 -3 +174 -649 +11 -2 +15 -34 +5 -1

~ src/blob.c ~ src/diff.c ~ src/merge.c ~ src/merge3.c ~ src/update.c ~ test/merge1.test ~ test/tester.tcl

M src/blob.c

+45

		--- src/blob.c
		+++ src/blob.c
		@@ -400,10 +400,25 @@
400	400	i++;
401	401	}
402	402	blob_extract(pFrom, i-pFrom->iCursor, pTo);
403	403	return pTo->nUsed;
404	404	}
	405	+
	406	+/*
	407	+** Trim whitespace off of the end of a blob. Return the number
	408	+** of characters remaining.
	409	+**
	410	+** All this does is reduce the length counter. This routine does
	411	+** not insert a new zero terminator.
	412	+*/
	413	+int blob_trim(Blob *p){
	414	+ char *z = p->aData;
	415	+ int n = p->nUsed;
	416	+ while( n>0 && isspace(z[n-1]) ){ n--; }
	417	+ p->nUsed = n;
	418	+ return n;
	419	+}
405	420
406	421	/*
407	422	** Extract a single token from pFrom and use it to initialize pTo.
408	423	** Return the number of bytes in the token. If no token is found,
409	424	** return 0.
		@@ -439,10 +454,40 @@
439	454	int iCursor = pFrom->iCursor;
440	455	blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo);
441	456	pFrom->iCursor = iCursor;
442	457	return pTo->nUsed;
443	458	}
	459	+
	460	+/*
	461	+** Copy N lines of text from pFrom into pTo. The copy begins at the
	462	+** current cursor position of pIn. The pIn cursor is left pointing
	463	+** at the first character past the last \n copied.
	464	+**
	465	+** If pTo==NULL then this routine simply skips over N lines.
	466	+*/
	467	+void blob_copy_lines(Blob pTo, Blob pFrom, int N){
	468	+ char *z = pFrom->aData;
	469	+ int i = pFrom->iCursor;
	470	+ int n = pFrom->nUsed;
	471	+ int cnt = 0;
	472	+
	473	+ if( N==0 ) return;
	474	+ while( i<n ){
	475	+ if( z[i]=='\n' ){
	476	+ cnt++;
	477	+ if( cnt==N ){
	478	+ i++;
	479	+ break;
	480	+ }
	481	+ }
	482	+ i++;
	483	+ }
	484	+ if( pTo ){
	485	+ blob_append(pTo, &pFrom->aData[pFrom->iCursor], i - pFrom->iCursor);
	486	+ }
	487	+ pFrom->iCursor = i;
	488	+}
444	489
445	490	/*
446	491	** Return true if the blob contains a valid UUID_SIZE-digit base16 identifier.
447	492	*/
448	493	int blob_is_uuid(Blob *pBlob){
449	494

	--- src/blob.c
	+++ src/blob.c
	@@ -400,10 +400,25 @@
400	i++;
401	}
402	blob_extract(pFrom, i-pFrom->iCursor, pTo);
403	return pTo->nUsed;
404	}















405
406	/*
407	** Extract a single token from pFrom and use it to initialize pTo.
408	** Return the number of bytes in the token. If no token is found,
409	** return 0.
	@@ -439,10 +454,40 @@
439	int iCursor = pFrom->iCursor;
440	blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo);
441	pFrom->iCursor = iCursor;
442	return pTo->nUsed;
443	}






























444
445	/*
446	** Return true if the blob contains a valid UUID_SIZE-digit base16 identifier.
447	*/
448	int blob_is_uuid(Blob *pBlob){
449

	--- src/blob.c
	+++ src/blob.c
	@@ -400,10 +400,25 @@
400	i++;
401	}
402	blob_extract(pFrom, i-pFrom->iCursor, pTo);
403	return pTo->nUsed;
404	}
405
406	/*
407	** Trim whitespace off of the end of a blob. Return the number
408	** of characters remaining.
409	**
410	** All this does is reduce the length counter. This routine does
411	** not insert a new zero terminator.
412	*/
413	int blob_trim(Blob *p){
414	char *z = p->aData;
415	int n = p->nUsed;
416	while( n>0 && isspace(z[n-1]) ){ n--; }
417	p->nUsed = n;
418	return n;
419	}
420
421	/*
422	** Extract a single token from pFrom and use it to initialize pTo.
423	** Return the number of bytes in the token. If no token is found,
424	** return 0.
	@@ -439,10 +454,40 @@
454	int iCursor = pFrom->iCursor;
455	blob_extract(pFrom, pFrom->nUsed-pFrom->iCursor, pTo);
456	pFrom->iCursor = iCursor;
457	return pTo->nUsed;
458	}
459
460	/*
461	** Copy N lines of text from pFrom into pTo. The copy begins at the
462	** current cursor position of pIn. The pIn cursor is left pointing
463	** at the first character past the last \n copied.
464	**
465	** If pTo==NULL then this routine simply skips over N lines.
466	*/
467	void blob_copy_lines(Blob pTo, Blob pFrom, int N){
468	char *z = pFrom->aData;
469	int i = pFrom->iCursor;
470	int n = pFrom->nUsed;
471	int cnt = 0;
472
473	if( N==0 ) return;
474	while( i<n ){
475	if( z[i]=='\n' ){
476	cnt++;
477	if( cnt==N ){
478	i++;
479	break;
480	}
481	}
482	i++;
483	}
484	if( pTo ){
485	blob_append(pTo, &pFrom->aData[pFrom->iCursor], i - pFrom->iCursor);
486	}
487	pFrom->iCursor = i;
488	}
489
490	/*
491	** Return true if the blob contains a valid UUID_SIZE-digit base16 identifier.
492	*/
493	int blob_is_uuid(Blob *pBlob){
494

M src/diff.c

+87 -39

		--- src/diff.c
		+++ src/diff.c
		@@ -26,50 +26,78 @@
26	26	*/
27	27	#include "config.h"
28	28	#include "diff2.h"
29	29	#include <assert.h>
30	30
	31	+
	32	+#if 0
	33	+#define DEBUG(X) X
	34	+#else
	35	+#define DEBUG(X)
	36	+#endif
	37	+
31	38	/*
32	39	** Information about each line of a file being diffed.
	40	+**
	41	+** The lower 20 bits of the hash are the length of the
	42	+** line. If any line is longer than 1048575 characters,
	43	+** the file is considered binary.
33	44	*/
34	45	typedef struct DLine DLine;
35	46	struct DLine {
36	47	const char z; / The text of the line */
37	48	unsigned int h; /* Hash of the line */
38	49	};
39	50
	51	+#define LENGTH_MASK 0x000fffff
	52	+
40	53	/*
41		-** Break a blob into lines by converting inserting \000 characters.
42	54	** Return an array of DLine objects containing a pointer to the
43		-** start of each line and a hash of that line.
	55	+** start of each line and a hash of that line. The lower
	56	+** bits of the hash store the length of each line.
44	57	**
45	58	** Trailing whitespace is removed from each line.
	59	+**
	60	+** Return 0 if the file is binary or contains a line that is
	61	+** longer than 1048575 bytes.
46	62	*/
47	63	static DLine break_into_lines(char z, int *pnLine){
48	64	int nLine, i, j, k, x;
49	65	unsigned int h;
50	66	DLine *a;
51	67
52	68	/* Count the number of lines. Allocate space to hold
53	69	** the returned array.
54	70	*/
55		- for(i=0, nLine=1; z[i]; i++){
56		- if( z[i]=='\n' && z[i+1]!=0 ) nLine++;
	71	+ for(i=j=0, nLine=1; z[i]; i++, j++){
	72	+ int c = z[i];
	73	+ if( c==0 ){
	74	+ return 0;
	75	+ }
	76	+ if( c=='\n' && z[i+1]!=0 ){
	77	+ nLine++;
	78	+ if( j>1048575 ){
	79	+ return 0;
	80	+ }
	81	+ j = 0;
	82	+ }
	83	+ }
	84	+ if( j>1048575 ){
	85	+ return 0;
57	86	}
58	87	a = malloc( nLine*sizeof(a[0]) );
59	88	if( a==0 ) fossil_panic("out of memory");
60	89
61	90	/* Fill in the array */
62	91	for(i=0; i<nLine; i++){
63	92	a[i].z = z;
64	93	for(j=0; z[j] && z[j]!='\n'; j++){}
65	94	for(k=j; k>0 && isspace(z[k-1]); k--){}
66		- z[k] = 0;
67	95	for(h=0, x=0; x<k; x++){
68	96	h = h ^ (h<<2) ^ z[x];
69	97	}
70		- a[i].h = h;
	98	+ a[i].h = (h<<20) \| k;;
71	99	z += j+1;
72	100	}
73	101
74	102	/* Return results */
75	103	*pnLine = nLine;
		@@ -78,26 +106,28 @@
78	106
79	107	/*
80	108	** Return true if two DLine elements are identical.
81	109	*/
82	110	static int same_dline(DLine pA, DLine pB){
83		- return pA->h==pB->h && strcmp(pA->z,pB->z)==0;
	111	+ return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
84	112	}
85	113
86	114	/*
87	115	** Generate a report of the differences between files pA and pB.
88		-** The line ending (\r\n versus \n) is ignored - the two line
89		-** endings are considered to be equivalent.
90		-**
91		-** The return is a pointer to an array of integers that describes
92		-** the difference. Integers come in triples. For each triple,
	116	+** If pOut is not NULL then a unified diff is appended there. It
	117	+** is assumed that pOut has already been initialized. If pOut is
	118	+** NULL, then a pointer to an array of integers is returned.
	119	+** The integers come in triples. For each triple,
93	120	** the elements are the number of lines copied, the number of
94		-** lines delete, and the number of lines inserted. The vector
	121	+** lines deleted, and the number of lines inserted. The vector
95	122	** is terminated by a triple of all zeros.
96	123	**
97		-** The two blobs is destroyed ('\000' values are inserted)
98		-** by the diffing process.
	124	+** This diff utility does not work on binary files. If a binary
	125	+** file is encountered, 0 is returned and pOut is written with
	126	+** text "cannot compute difference between binary files".
	127	+**
	128	+****************************************************************************
99	129	**
100	130	** The core algorithm is a variation on the classic Wagner
101	131	** minimum edit distance with enhancements to reduce the runtime
102	132	** to be almost linear in the common case where the two files
103	133	** have a lot in common. For additional information see
		@@ -171,29 +201,43 @@
171	201	** compared to X and Y so little memory is required. The
172	202	** original Wagner algorithm requires X*Y memory, which for
173	203	** larger files (100K lines) is more memory than we have at
174	204	** hand.
175	205	*/
176		-int text_diff(Blob pA_Blob, Blob pB_Blob, Blob pOut, int nContext){
	206	+int *text_diff(
	207	+ Blob pA_Blob, / FROM file */
	208	+ Blob pB_Blob, / TO file */
	209	+ Blob pOut, / Write unified diff here if not NULL */
	210	+ int nContext /* Amount of context to unified diff */
	211	+){
177	212	DLine A, B; /* Files being compared */
178	213	int X, Y; /* Number of elements in A and B */
179	214	int x, y; /* Indices: A[x] and B[y] */
180	215	int szM = 0; /* Number of rows and columns in M */
181	216	int *M = 0; / Myers matrix */
182	217	int i, d; /* Indices on M. M[d][i] */
183	218	int k, q; /* Diagonal number and distinct from (0,0) */
184	219	int K, D; /* The diagonal and d for the final solution */
185		- int R; / Result vector */
	220	+ int R = 0; / Result vector */
186	221	int r; /* Loop variables */
187	222	int go = 1; /* Outer loop control */
188	223	int MAX; /* Largest of X and Y */
189	224
190	225	/* Break the two files being diffed into individual lines.
191	226	** Compute hashes on each line for fast comparison.
192	227	*/
193	228	A = break_into_lines(blob_str(pA_Blob), &X);
194	229	B = break_into_lines(blob_str(pB_Blob), &Y);
	230	+
	231	+ if( A==0 \|\| B==0 ){
	232	+ free(A);
	233	+ free(B);
	234	+ if( pOut ){
	235	+ blob_appendf(pOut, "cannot compute difference between binary files\n");
	236	+ }
	237	+ return 0;
	238	+ }
195	239
196	240	szM = 0;
197	241	MAX = X>Y ? X : Y;
198	242	for(d=0; go && d<=MAX; d++){
199	243	if( szM<d+1 ){
		@@ -211,11 +255,11 @@
211	255	k = 2*i - d;
212	256	if( d==0 ){
213	257	q = 0;
214	258	}else if( i==0 ){
215	259	q = M[d-1][0];
216		- }else if( M[d-1][i-1] < M[d-1][i] ){
	260	+ }else if( i<d-1 && M[d-1][i-1] < M[d-1][i] ){
217	261	q = M[d-1][i];
218	262	}else{
219	263	q = M[d-1][i-1];
220	264	}
221	265	x = (k + q + 1)/2;
		@@ -224,11 +268,11 @@
224	268	x = y = 0;
225	269	}else{
226	270	while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
227	271	}
228	272	M[d][i] = x + y;
229		- /* printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); */
	273	+ DEBUG( printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); )
230	274	if( x==X && y==Y ){
231	275	go = 0;
232	276	break;
233	277	}
234	278	}
		@@ -249,11 +293,11 @@
249	293	**
250	294	*/
251	295	D = d - 1;
252	296	K = X - Y;
253	297	for(d=D, i=(K+D)/2; d>0; d--){
254		- /* printf("d=%d i=%d\n", d, i); */
	298	+ DEBUG( printf("d=%d i=%d\n", d, i); )
255	299	if( i==d \|\| (i>0 && M[d-1][i-1] > M[d-1][i]) ){
256	300	M[d][0] = M[d][i] - M[d-1][i-1] - 1;
257	301	M[d][1] = 0;
258	302	i--;
259	303	}else{
		@@ -260,17 +304,16 @@
260	304	M[d][0] = M[d][i] - M[d-1][i] - 1;
261	305	M[d][1] = 1;
262	306	}
263	307	}
264	308
265		-
266		- /*
267		- printf("---------------\nM[0][0] = %5d\n", M[0][0]);
268		- for(d=1; d<=D; d++){
269		- printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]);
270		- }
271		- */
	309	+ DEBUG(
	310	+ printf("---------------\nM[0][0] = %5d\n", M[0][0]);
	311	+ for(d=1; d<=D; d++){
	312	+ printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]);
	313	+ }
	314	+ )
272	315
273	316	/* Allocate the output vector
274	317	*/
275	318	R = malloc( sizeof(R[0])(D+2)3 );
276	319	if( R==0 ){
		@@ -335,11 +378,11 @@
335	378	int skip; /* Number of lines to skip */
336	379
337	380	for(r=0; R[r+1] \|\| R[r+2] \|\| R[r+3]; r += 3*nr){
338	381	/* Figure out how many triples to show in a single block */
339	382	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
340		- /* printf("r=%d nr=%d\n", r, nr); */
	383	+ DEBUG( printf("r=%d nr=%d\n", r, nr); )
341	384
342	385	/* For the current block comprising nr triples, figure out
343	386	** how many lines of A and B are to be displayed
344	387	*/
345	388	if( R[r]>nContext ){
		@@ -369,31 +412,31 @@
369	412	/* Show the initial common area */
370	413	a += skip;
371	414	b += skip;
372	415	m = R[r] - skip;
373	416	for(j=0; j<m; j++){
374		- blob_appendf(pOut," %s\n", A[a+j].z);
	417	+ blob_appendf(pOut," %.*s\n", A[a+j].h & LENGTH_MASK, A[a+j].z);
375	418	}
376	419	a += m;
377	420	b += m;
378	421
379	422	/* Show the differences */
380	423	for(i=0; i<nr; i++){
381	424	m = R[r+i*3+1];
382	425	for(j=0; j<m; j++){
383		- blob_appendf(pOut,"-%s\n", A[a+j].z);
	426	+ blob_appendf(pOut,"-%.*s\n", A[a+j].h & LENGTH_MASK, A[a+j].z);
384	427	}
385	428	a += m;
386	429	m = R[r+i*3+2];
387	430	for(j=0; j<m; j++){
388		- blob_appendf(pOut,"+%s\n", B[b+j].z);
	431	+ blob_appendf(pOut,"+%.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z);
389	432	}
390	433	b += m;
391	434	if( i<nr-1 ){
392	435	m = R[r+i*3+3];
393	436	for(j=0; j<m; j++){
394		- blob_appendf(pOut," %s\n", B[b+j].z);
	437	+ blob_appendf(pOut," %.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z);
395	438	}
396	439	b += m;
397	440	a += m;
398	441	}
399	442	}
		@@ -401,11 +444,11 @@
401	444	/* Show the final common area */
402	445	assert( nr==i );
403	446	m = R[r+nr*3];
404	447	if( m>nContext ) m = nContext;
405	448	for(j=0; j<m; j++){
406		- blob_appendf(pOut," %s\n", B[b+j].z);
	449	+ blob_appendf(pOut," %.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z);
407	450	}
408	451	}
409	452	free(R);
410	453	R = 0;
411	454	}
		@@ -422,19 +465,24 @@
422	465	** COMMAND: test-rawdiff
423	466	*/
424	467	void test_rawdiff_cmd(void){
425	468	Blob a, b;
426	469	int r;
	470	+ int i;
427	471	int *R;
428		- if( g.argc!=4 ) usage("FILE1 FILE2");
	472	+ if( g.argc<4 ) usage("FILE1 FILE2 ...");
429	473	blob_read_from_file(&a, g.argv[2]);
430		- blob_read_from_file(&b, g.argv[3]);
431		- R = text_diff(&a, &b, 0, 0);
432		- for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
433		- printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
	474	+ for(i=3; i<g.argc; i++){
	475	+ if( i>3 ) printf("-------------------------------\n");
	476	+ blob_read_from_file(&b, g.argv[i]);
	477	+ R = text_diff(&a, &b, 0, 0);
	478	+ for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
	479	+ printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
	480	+ }
	481	+ /* free(R); */
	482	+ blob_reset(&b);
434	483	}
435		- free(R);
436	484	}
437	485
438	486	/*
439	487	** COMMAND: test-udiff
440	488	*/
441	489

	--- src/diff.c
	+++ src/diff.c
	@@ -26,50 +26,78 @@
26	*/
27	#include "config.h"
28	#include "diff2.h"
29	#include <assert.h>
30







31	/*
32	** Information about each line of a file being diffed.




33	*/
34	typedef struct DLine DLine;
35	struct DLine {
36	const char z; / The text of the line */
37	unsigned int h; /* Hash of the line */
38	};
39


40	/*
41	** Break a blob into lines by converting inserting \000 characters.
42	** Return an array of DLine objects containing a pointer to the
43	** start of each line and a hash of that line.

44	**
45	** Trailing whitespace is removed from each line.



46	*/
47	static DLine break_into_lines(char z, int *pnLine){
48	int nLine, i, j, k, x;
49	unsigned int h;
50	DLine *a;
51
52	/* Count the number of lines. Allocate space to hold
53	** the returned array.
54	*/
55	for(i=0, nLine=1; z[i]; i++){
56	if( z[i]=='\n' && z[i+1]!=0 ) nLine++;













57	}
58	a = malloc( nLine*sizeof(a[0]) );
59	if( a==0 ) fossil_panic("out of memory");
60
61	/* Fill in the array */
62	for(i=0; i<nLine; i++){
63	a[i].z = z;
64	for(j=0; z[j] && z[j]!='\n'; j++){}
65	for(k=j; k>0 && isspace(z[k-1]); k--){}
66	z[k] = 0;
67	for(h=0, x=0; x<k; x++){
68	h = h ^ (h<<2) ^ z[x];
69	}
70	a[i].h = h;
71	z += j+1;
72	}
73
74	/* Return results */
75	*pnLine = nLine;
	@@ -78,26 +106,28 @@
78
79	/*
80	** Return true if two DLine elements are identical.
81	*/
82	static int same_dline(DLine pA, DLine pB){
83	return pA->h==pB->h && strcmp(pA->z,pB->z)==0;
84	}
85
86	/*
87	** Generate a report of the differences between files pA and pB.
88	** The line ending (\r\n versus \n) is ignored - the two line
89	** endings are considered to be equivalent.
90	**
91	** The return is a pointer to an array of integers that describes
92	** the difference. Integers come in triples. For each triple,
93	** the elements are the number of lines copied, the number of
94	** lines delete, and the number of lines inserted. The vector
95	** is terminated by a triple of all zeros.
96	**
97	** The two blobs is destroyed ('\000' values are inserted)
98	** by the diffing process.



99	**
100	** The core algorithm is a variation on the classic Wagner
101	** minimum edit distance with enhancements to reduce the runtime
102	** to be almost linear in the common case where the two files
103	** have a lot in common. For additional information see
	@@ -171,29 +201,43 @@
171	** compared to X and Y so little memory is required. The
172	** original Wagner algorithm requires X*Y memory, which for
173	** larger files (100K lines) is more memory than we have at
174	** hand.
175	*/
176	int text_diff(Blob pA_Blob, Blob pB_Blob, Blob pOut, int nContext){





177	DLine A, B; /* Files being compared */
178	int X, Y; /* Number of elements in A and B */
179	int x, y; /* Indices: A[x] and B[y] */
180	int szM = 0; /* Number of rows and columns in M */
181	int *M = 0; / Myers matrix */
182	int i, d; /* Indices on M. M[d][i] */
183	int k, q; /* Diagonal number and distinct from (0,0) */
184	int K, D; /* The diagonal and d for the final solution */
185	int R; / Result vector */
186	int r; /* Loop variables */
187	int go = 1; /* Outer loop control */
188	int MAX; /* Largest of X and Y */
189
190	/* Break the two files being diffed into individual lines.
191	** Compute hashes on each line for fast comparison.
192	*/
193	A = break_into_lines(blob_str(pA_Blob), &X);
194	B = break_into_lines(blob_str(pB_Blob), &Y);









195
196	szM = 0;
197	MAX = X>Y ? X : Y;
198	for(d=0; go && d<=MAX; d++){
199	if( szM<d+1 ){
	@@ -211,11 +255,11 @@
211	k = 2*i - d;
212	if( d==0 ){
213	q = 0;
214	}else if( i==0 ){
215	q = M[d-1][0];
216	}else if( M[d-1][i-1] < M[d-1][i] ){
217	q = M[d-1][i];
218	}else{
219	q = M[d-1][i-1];
220	}
221	x = (k + q + 1)/2;
	@@ -224,11 +268,11 @@
224	x = y = 0;
225	}else{
226	while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
227	}
228	M[d][i] = x + y;
229	/* printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); */
230	if( x==X && y==Y ){
231	go = 0;
232	break;
233	}
234	}
	@@ -249,11 +293,11 @@
249	**
250	*/
251	D = d - 1;
252	K = X - Y;
253	for(d=D, i=(K+D)/2; d>0; d--){
254	/* printf("d=%d i=%d\n", d, i); */
255	if( i==d \|\| (i>0 && M[d-1][i-1] > M[d-1][i]) ){
256	M[d][0] = M[d][i] - M[d-1][i-1] - 1;
257	M[d][1] = 0;
258	i--;
259	}else{
	@@ -260,17 +304,16 @@
260	M[d][0] = M[d][i] - M[d-1][i] - 1;
261	M[d][1] = 1;
262	}
263	}
264
265
266	/*
267	printf("---------------\nM[0][0] = %5d\n", M[0][0]);
268	for(d=1; d<=D; d++){
269	printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]);
270	}
271	*/
272
273	/* Allocate the output vector
274	*/
275	R = malloc( sizeof(R[0])(D+2)3 );
276	if( R==0 ){
	@@ -335,11 +378,11 @@
335	int skip; /* Number of lines to skip */
336
337	for(r=0; R[r+1] \|\| R[r+2] \|\| R[r+3]; r += 3*nr){
338	/* Figure out how many triples to show in a single block */
339	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
340	/* printf("r=%d nr=%d\n", r, nr); */
341
342	/* For the current block comprising nr triples, figure out
343	** how many lines of A and B are to be displayed
344	*/
345	if( R[r]>nContext ){
	@@ -369,31 +412,31 @@
369	/* Show the initial common area */
370	a += skip;
371	b += skip;
372	m = R[r] - skip;
373	for(j=0; j<m; j++){
374	blob_appendf(pOut," %s\n", A[a+j].z);
375	}
376	a += m;
377	b += m;
378
379	/* Show the differences */
380	for(i=0; i<nr; i++){
381	m = R[r+i*3+1];
382	for(j=0; j<m; j++){
383	blob_appendf(pOut,"-%s\n", A[a+j].z);
384	}
385	a += m;
386	m = R[r+i*3+2];
387	for(j=0; j<m; j++){
388	blob_appendf(pOut,"+%s\n", B[b+j].z);
389	}
390	b += m;
391	if( i<nr-1 ){
392	m = R[r+i*3+3];
393	for(j=0; j<m; j++){
394	blob_appendf(pOut," %s\n", B[b+j].z);
395	}
396	b += m;
397	a += m;
398	}
399	}
	@@ -401,11 +444,11 @@
401	/* Show the final common area */
402	assert( nr==i );
403	m = R[r+nr*3];
404	if( m>nContext ) m = nContext;
405	for(j=0; j<m; j++){
406	blob_appendf(pOut," %s\n", B[b+j].z);
407	}
408	}
409	free(R);
410	R = 0;
411	}
	@@ -422,19 +465,24 @@
422	** COMMAND: test-rawdiff
423	*/
424	void test_rawdiff_cmd(void){
425	Blob a, b;
426	int r;

427	int *R;
428	if( g.argc!=4 ) usage("FILE1 FILE2");
429	blob_read_from_file(&a, g.argv[2]);
430	blob_read_from_file(&b, g.argv[3]);
431	R = text_diff(&a, &b, 0, 0);
432	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
433	printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);





434	}
435	free(R);
436	}
437
438	/*
439	** COMMAND: test-udiff
440	*/
441

	--- src/diff.c
	+++ src/diff.c
	@@ -26,50 +26,78 @@
26	*/
27	#include "config.h"
28	#include "diff2.h"
29	#include <assert.h>
30
31
32	#if 0
33	#define DEBUG(X) X
34	#else
35	#define DEBUG(X)
36	#endif
37
38	/*
39	** Information about each line of a file being diffed.
40	**
41	** The lower 20 bits of the hash are the length of the
42	** line. If any line is longer than 1048575 characters,
43	** the file is considered binary.
44	*/
45	typedef struct DLine DLine;
46	struct DLine {
47	const char z; / The text of the line */
48	unsigned int h; /* Hash of the line */
49	};
50
51	#define LENGTH_MASK 0x000fffff
52
53	/*

54	** Return an array of DLine objects containing a pointer to the
55	** start of each line and a hash of that line. The lower
56	** bits of the hash store the length of each line.
57	**
58	** Trailing whitespace is removed from each line.
59	**
60	** Return 0 if the file is binary or contains a line that is
61	** longer than 1048575 bytes.
62	*/
63	static DLine break_into_lines(char z, int *pnLine){
64	int nLine, i, j, k, x;
65	unsigned int h;
66	DLine *a;
67
68	/* Count the number of lines. Allocate space to hold
69	** the returned array.
70	*/
71	for(i=j=0, nLine=1; z[i]; i++, j++){
72	int c = z[i];
73	if( c==0 ){
74	return 0;
75	}
76	if( c=='\n' && z[i+1]!=0 ){
77	nLine++;
78	if( j>1048575 ){
79	return 0;
80	}
81	j = 0;
82	}
83	}
84	if( j>1048575 ){
85	return 0;
86	}
87	a = malloc( nLine*sizeof(a[0]) );
88	if( a==0 ) fossil_panic("out of memory");
89
90	/* Fill in the array */
91	for(i=0; i<nLine; i++){
92	a[i].z = z;
93	for(j=0; z[j] && z[j]!='\n'; j++){}
94	for(k=j; k>0 && isspace(z[k-1]); k--){}

95	for(h=0, x=0; x<k; x++){
96	h = h ^ (h<<2) ^ z[x];
97	}
98	a[i].h = (h<<20) \| k;;
99	z += j+1;
100	}
101
102	/* Return results */
103	*pnLine = nLine;
	@@ -78,26 +106,28 @@
106
107	/*
108	** Return true if two DLine elements are identical.
109	*/
110	static int same_dline(DLine pA, DLine pB){
111	return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
112	}
113
114	/*
115	** Generate a report of the differences between files pA and pB.
116	** If pOut is not NULL then a unified diff is appended there. It
117	** is assumed that pOut has already been initialized. If pOut is
118	** NULL, then a pointer to an array of integers is returned.
119	** The integers come in triples. For each triple,

120	** the elements are the number of lines copied, the number of
121	** lines deleted, and the number of lines inserted. The vector
122	** is terminated by a triple of all zeros.
123	**
124	** This diff utility does not work on binary files. If a binary
125	** file is encountered, 0 is returned and pOut is written with
126	** text "cannot compute difference between binary files".
127	**
128	****************************************************************************
129	**
130	** The core algorithm is a variation on the classic Wagner
131	** minimum edit distance with enhancements to reduce the runtime
132	** to be almost linear in the common case where the two files
133	** have a lot in common. For additional information see
	@@ -171,29 +201,43 @@
201	** compared to X and Y so little memory is required. The
202	** original Wagner algorithm requires X*Y memory, which for
203	** larger files (100K lines) is more memory than we have at
204	** hand.
205	*/
206	int *text_diff(
207	Blob pA_Blob, / FROM file */
208	Blob pB_Blob, / TO file */
209	Blob pOut, / Write unified diff here if not NULL */
210	int nContext /* Amount of context to unified diff */
211	){
212	DLine A, B; /* Files being compared */
213	int X, Y; /* Number of elements in A and B */
214	int x, y; /* Indices: A[x] and B[y] */
215	int szM = 0; /* Number of rows and columns in M */
216	int *M = 0; / Myers matrix */
217	int i, d; /* Indices on M. M[d][i] */
218	int k, q; /* Diagonal number and distinct from (0,0) */
219	int K, D; /* The diagonal and d for the final solution */
220	int R = 0; / Result vector */
221	int r; /* Loop variables */
222	int go = 1; /* Outer loop control */
223	int MAX; /* Largest of X and Y */
224
225	/* Break the two files being diffed into individual lines.
226	** Compute hashes on each line for fast comparison.
227	*/
228	A = break_into_lines(blob_str(pA_Blob), &X);
229	B = break_into_lines(blob_str(pB_Blob), &Y);
230
231	if( A==0 \|\| B==0 ){
232	free(A);
233	free(B);
234	if( pOut ){
235	blob_appendf(pOut, "cannot compute difference between binary files\n");
236	}
237	return 0;
238	}
239
240	szM = 0;
241	MAX = X>Y ? X : Y;
242	for(d=0; go && d<=MAX; d++){
243	if( szM<d+1 ){
	@@ -211,11 +255,11 @@
255	k = 2*i - d;
256	if( d==0 ){
257	q = 0;
258	}else if( i==0 ){
259	q = M[d-1][0];
260	}else if( i<d-1 && M[d-1][i-1] < M[d-1][i] ){
261	q = M[d-1][i];
262	}else{
263	q = M[d-1][i-1];
264	}
265	x = (k + q + 1)/2;
	@@ -224,11 +268,11 @@
268	x = y = 0;
269	}else{
270	while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
271	}
272	M[d][i] = x + y;
273	DEBUG( printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); )
274	if( x==X && y==Y ){
275	go = 0;
276	break;
277	}
278	}
	@@ -249,11 +293,11 @@
293	**
294	*/
295	D = d - 1;
296	K = X - Y;
297	for(d=D, i=(K+D)/2; d>0; d--){
298	DEBUG( printf("d=%d i=%d\n", d, i); )
299	if( i==d \|\| (i>0 && M[d-1][i-1] > M[d-1][i]) ){
300	M[d][0] = M[d][i] - M[d-1][i-1] - 1;
301	M[d][1] = 0;
302	i--;
303	}else{
	@@ -260,17 +304,16 @@
304	M[d][0] = M[d][i] - M[d-1][i] - 1;
305	M[d][1] = 1;
306	}
307	}
308
309	DEBUG(
310	printf("---------------\nM[0][0] = %5d\n", M[0][0]);
311	for(d=1; d<=D; d++){
312	printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]);
313	}
314	)

315
316	/* Allocate the output vector
317	*/
318	R = malloc( sizeof(R[0])(D+2)3 );
319	if( R==0 ){
	@@ -335,11 +378,11 @@
378	int skip; /* Number of lines to skip */
379
380	for(r=0; R[r+1] \|\| R[r+2] \|\| R[r+3]; r += 3*nr){
381	/* Figure out how many triples to show in a single block */
382	for(nr=1; R[r+nr3]>0 && R[r+nr3]<nContext*2; nr++){}
383	DEBUG( printf("r=%d nr=%d\n", r, nr); )
384
385	/* For the current block comprising nr triples, figure out
386	** how many lines of A and B are to be displayed
387	*/
388	if( R[r]>nContext ){
	@@ -369,31 +412,31 @@
412	/* Show the initial common area */
413	a += skip;
414	b += skip;
415	m = R[r] - skip;
416	for(j=0; j<m; j++){
417	blob_appendf(pOut," %.*s\n", A[a+j].h & LENGTH_MASK, A[a+j].z);
418	}
419	a += m;
420	b += m;
421
422	/* Show the differences */
423	for(i=0; i<nr; i++){
424	m = R[r+i*3+1];
425	for(j=0; j<m; j++){
426	blob_appendf(pOut,"-%.*s\n", A[a+j].h & LENGTH_MASK, A[a+j].z);
427	}
428	a += m;
429	m = R[r+i*3+2];
430	for(j=0; j<m; j++){
431	blob_appendf(pOut,"+%.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z);
432	}
433	b += m;
434	if( i<nr-1 ){
435	m = R[r+i*3+3];
436	for(j=0; j<m; j++){
437	blob_appendf(pOut," %.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z);
438	}
439	b += m;
440	a += m;
441	}
442	}
	@@ -401,11 +444,11 @@
444	/* Show the final common area */
445	assert( nr==i );
446	m = R[r+nr*3];
447	if( m>nContext ) m = nContext;
448	for(j=0; j<m; j++){
449	blob_appendf(pOut," %.*s\n", B[b+j].h & LENGTH_MASK, B[b+j].z);
450	}
451	}
452	free(R);
453	R = 0;
454	}
	@@ -422,19 +465,24 @@
465	** COMMAND: test-rawdiff
466	*/
467	void test_rawdiff_cmd(void){
468	Blob a, b;
469	int r;
470	int i;
471	int *R;
472	if( g.argc<4 ) usage("FILE1 FILE2 ...");
473	blob_read_from_file(&a, g.argv[2]);
474	for(i=3; i<g.argc; i++){
475	if( i>3 ) printf("-------------------------------\n");
476	blob_read_from_file(&b, g.argv[i]);
477	R = text_diff(&a, &b, 0, 0);
478	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
479	printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]);
480	}
481	/* free(R); */
482	blob_reset(&b);
483	}

484	}
485
486	/*
487	** COMMAND: test-udiff
488	*/
489

M src/merge.c

+11 -3

		--- src/merge.c
		+++ src/merge.c
		@@ -226,10 +226,11 @@
226	226	while( db_step(&q)==SQLITE_ROW ){
227	227	int ridm = db_column_int(&q, 0);
228	228	int idv = db_column_int(&q, 1);
229	229	int ridp = db_column_int(&q, 2);
230	230	int ridv = db_column_int(&q, 3);
	231	+ int rc;
231	232	char *zName = db_text(0, "SELECT pathname FROM vfile WHERE id=%d", idv);
232	233	char *zFullPath;
233	234	Blob m, p, v, r;
234	235	/* Do a 3-way merge of idp->idm into idp->idv. The results go into idv. */
235	236	if( detailFlag ){
		@@ -237,17 +238,24 @@
237	238	}else{
238	239	printf("MERGE %s\n", zName);
239	240	}
240	241	undo_save(zName);
241	242	zFullPath = mprintf("%s/%s", g.zLocalRoot, zName);
242		- free(zName);
243	243	content_get(ridp, &p);
244	244	content_get(ridm, &m);
245	245	blob_zero(&v);
246	246	blob_read_from_file(&v, zFullPath);
247		- blob_merge(&p, &m, &v, &r);
248		- blob_write_to_file(&r, zFullPath);
	247	+ rc = blob_merge(&p, &m, &v, &r);
	248	+ if( rc>=0 ){
	249	+ blob_write_to_file(&r, zFullPath);
	250	+ if( rc>0 ){
	251	+ printf("***** %d merge conflicts in %s\n", rc, zName);
	252	+ }
	253	+ }else{
	254	+ printf("***** Cannot merge binary file %s\n", zName);
	255	+ }
	256	+ free(zName);
249	257	blob_reset(&p);
250	258	blob_reset(&m);
251	259	blob_reset(&v);
252	260	blob_reset(&r);
253	261	db_multi_exec("INSERT OR IGNORE INTO vmerge(id,merge) VALUES(%d,%d)",
254	262

	--- src/merge.c
	+++ src/merge.c
	@@ -226,10 +226,11 @@
226	while( db_step(&q)==SQLITE_ROW ){
227	int ridm = db_column_int(&q, 0);
228	int idv = db_column_int(&q, 1);
229	int ridp = db_column_int(&q, 2);
230	int ridv = db_column_int(&q, 3);

231	char *zName = db_text(0, "SELECT pathname FROM vfile WHERE id=%d", idv);
232	char *zFullPath;
233	Blob m, p, v, r;
234	/* Do a 3-way merge of idp->idm into idp->idv. The results go into idv. */
235	if( detailFlag ){
	@@ -237,17 +238,24 @@
237	}else{
238	printf("MERGE %s\n", zName);
239	}
240	undo_save(zName);
241	zFullPath = mprintf("%s/%s", g.zLocalRoot, zName);
242	free(zName);
243	content_get(ridp, &p);
244	content_get(ridm, &m);
245	blob_zero(&v);
246	blob_read_from_file(&v, zFullPath);
247	blob_merge(&p, &m, &v, &r);
248	blob_write_to_file(&r, zFullPath);








249	blob_reset(&p);
250	blob_reset(&m);
251	blob_reset(&v);
252	blob_reset(&r);
253	db_multi_exec("INSERT OR IGNORE INTO vmerge(id,merge) VALUES(%d,%d)",
254

	--- src/merge.c
	+++ src/merge.c
	@@ -226,10 +226,11 @@
226	while( db_step(&q)==SQLITE_ROW ){
227	int ridm = db_column_int(&q, 0);
228	int idv = db_column_int(&q, 1);
229	int ridp = db_column_int(&q, 2);
230	int ridv = db_column_int(&q, 3);
231	int rc;
232	char *zName = db_text(0, "SELECT pathname FROM vfile WHERE id=%d", idv);
233	char *zFullPath;
234	Blob m, p, v, r;
235	/* Do a 3-way merge of idp->idm into idp->idv. The results go into idv. */
236	if( detailFlag ){
	@@ -237,17 +238,24 @@
238	}else{
239	printf("MERGE %s\n", zName);
240	}
241	undo_save(zName);
242	zFullPath = mprintf("%s/%s", g.zLocalRoot, zName);

243	content_get(ridp, &p);
244	content_get(ridm, &m);
245	blob_zero(&v);
246	blob_read_from_file(&v, zFullPath);
247	rc = blob_merge(&p, &m, &v, &r);
248	if( rc>=0 ){
249	blob_write_to_file(&r, zFullPath);
250	if( rc>0 ){
251	printf("***** %d merge conflicts in %s\n", rc, zName);
252	}
253	}else{
254	printf("***** Cannot merge binary file %s\n", zName);
255	}
256	free(zName);
257	blob_reset(&p);
258	blob_reset(&m);
259	blob_reset(&v);
260	blob_reset(&r);
261	db_multi_exec("INSERT OR IGNORE INTO vmerge(id,merge) VALUES(%d,%d)",
262

M src/merge3.c

+174 -649

		--- src/merge3.c
		+++ src/merge3.c
		@@ -24,625 +24,188 @@
24	24	** This module implements a 3-way merge
25	25	*/
26	26	#include "config.h"
27	27	#include "merge3.h"
28	28
29		-#if 1
30		-# define DEBUG1(X) X
31		-#else
32		-# define DEBUG1(X)
33		-#endif
34		-#if 0
35		-#define DEBUG2(X) X
36		-/*
37		-** For debugging:
38		-** Print 16 characters of text from zBuf
39		-*/
40		-static const char print16(const char z){
41		- int i;
42		- static char zBuf[20];
43		- for(i=0; i<16; i++){
44		- if( z[i]>=0x20 && z[i]<=0x7e ){
45		- zBuf[i] = z[i];
46		- }else{
47		- zBuf[i] = '.';
48		- }
49		- }
50		- zBuf[i] = 0;
51		- return zBuf;
52		-}
53		-#else
54		-# define DEBUG2(X)
55		-#endif
56		-
57		-/*
58		-** Must be a 32-bit integer
59		-*/
60		-typedef unsigned int u32;
61		-
62		-/*
63		-** Must be a 16-bit value
64		-*/
65		-typedef unsigned short int u16;
66		-
67		-/*
68		-** The width of a hash window in bytes. The algorithm only works if this
69		-** is a power of 2.
70		-*/
71		-#define NHASH 16
72		-
73		-/*
74		-** The current state of the rolling hash.
75		-**
76		-** z[] holds the values that have been hashed. z[] is a circular buffer.
77		-** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
78		-** the window.
79		-**
80		-** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
81		-** sum. Hash.b is z[i]NHASH + z[i+1](NHASH-1) + ... + z[i+NHASH-1]*1.
82		-** (Each index for z[] should be module NHASH, of course. The %NHASH operator
83		-** is omitted in the prior expression for brevity.)
84		-*/
85		-typedef struct hash hash;
86		-struct hash {
87		- u16 a, b; /* Hash values */
88		- u16 i; /* Start of the hash window */
89		- char z[NHASH]; /* The values that have been hashed */
90		-};
91		-
92		-/*
93		-** Initialize the rolling hash using the first NHASH characters of z[]
94		-*/
95		-static void hash_init(hash pHash, const char z){
96		- u16 a, b, i;
97		- a = b = 0;
98		- for(i=0; i<NHASH; i++){
99		- a += z[i];
100		- b += (NHASH-i)*z[i];
101		- pHash->z[i] = z[i];
102		- }
103		- pHash->a = a & 0xffff;
104		- pHash->b = b & 0xffff;
105		- pHash->i = 0;
106		-}
107		-
108		-/*
109		-** Advance the rolling hash by a single character "c"
110		-*/
111		-static void hash_next(hash *pHash, int c){
112		- u16 old = pHash->z[pHash->i];
113		- pHash->z[pHash->i] = c;
114		- pHash->i = (pHash->i+1)&(NHASH-1);
115		- pHash->a = pHash->a - old + c;
116		- pHash->b = pHash->b - NHASH*old + pHash->a;
117		-}
118		-
119		-/*
120		-** Return a 32-bit hash value
121		-*/
122		-static u32 hash_32bit(hash *pHash){
123		- return (pHash->a & 0xffff) \| (((u32)(pHash->b & 0xffff))<<16);
124		-}
125		-
126		-/*
127		-** Maximum number of landmarks to set in the source file.
128		-*/
129		-#define MX_LANDMARK (1024*128)
130		-
131		-/*
132		-** A mapping structure is used to record which parts of two
133		-** files contain the same text. There are zero or more mapping
134		-** entries in a mapping. Each entry maps a segment of text in
135		-** the source file into a segment of the output file.
136		-**
137		-** fromFirst...fromLast -> toFirst...toLast
138		-**
139		-** Extra text might be inserted in the output file after a
140		-** mapping. The nExtra parameter records the number of bytes
141		-** of extra text to insert.
142		-*/
143		-typedef struct Mapping Mapping;
144		-struct Mapping {
145		- int nMap;
146		- int nUsed;
147		- struct Mapping_entry {
148		- int fromFirst, fromLast;
149		- int toFirst, toLast;
150		- int nExtra;
151		- } *aMap;
152		-};
153		-
154		-/*
155		-** Free malloced memory associated with a mapping.
156		-*/
157		-static void MappingClear(Mapping *p){
158		- free(p->aMap);
159		- memset(p, 0, sizeof(*p));
160		-}
161		-
162		-/*
163		-** Add an entry to a mapping structure. The mapping is:
164		-**
165		-** a...b -> c...d
166		-**
167		-** The nExtra parameter is initially zero. It will be changed
168		-** later if necessary.
169		-*/
170		-static void MappingInsert(Mapping *p, int a, int b, int c, int d){
171		- struct Mapping_entry *pEntry;
172		- int i;
173		- for(i=0, pEntry=p->aMap; i<p->nUsed; i++, pEntry++){
174		- if( pEntry->fromFirst==a && pEntry->fromLast==b && pEntry->toFirst==c ){
175		- DEBUG2( printf("DUPLICATE: %6d..%-6d %6d..%d\n", a, b, c, d); )
176		- return;
177		- }
178		- }
179		- if( p->nUsed>=p->nMap ){
180		- p->nMap = p->nMap * 2 + 10;
181		- p->aMap = realloc(p->aMap, p->nMap*sizeof(p->aMap[0]) );
182		- if( p->aMap==0 ) exit(1);
183		- }
184		- pEntry = &p->aMap[p->nUsed];
185		- pEntry->fromFirst = a;
186		- pEntry->fromLast = b;
187		- pEntry->toFirst = c;
188		- pEntry->toLast = d;
189		- pEntry->nExtra = 0;
190		- p->nUsed++;
191		-}
192		-
193		-DEBUG1(
194		-/*
195		-** For debugging purposes:
196		-** Print the content of a mapping.
197		-*/
198		-static void MappingPrint(Mapping *pMap){
199		- int i;
200		- struct Mapping_entry *p;
201		- for(i=0, p=pMap->aMap; i<pMap->nUsed; i++, p++){
202		- printf("%6d..%-6d %6d..%-6d %d\n",
203		- p->fromFirst, p->fromLast,
204		- p->toFirst, p->toLast, p->nExtra);
205		- }
206		-}
207		-)
208		-
209		-/*
210		-** Remove deleted entries from a mapping. Deleted enties have
211		-** an fromFirst of less than 0.
212		-*/
213		-static void MappingPurgeDeletedEntries(Mapping *p){
214		- int i, j;
215		- for(i=j=0; i<p->nUsed; i++){
216		- if( p->aMap[i].fromFirst<0 ) continue;
217		- if( j<i ){
218		- p->aMap[j] = p->aMap[i];
219		- }
220		- j++;
221		- }
222		- p->nUsed = j;
223		-}
224		-
225		-/*
226		-** Comparisons functions used for sorting elements of a Mapping
227		-*/
228		-static int intAbs(int x){ return x<0 ? -x : x; }
229		-static int compareSize(const void a, const void b){
230		- const struct Mapping_entry A = (const struct Mapping_entry)a;
231		- const struct Mapping_entry B = (const struct Mapping_entry)b;
232		- int rc;
233		- rc = (B->fromLast - B->fromFirst) - (A->fromLast - A->fromFirst);
234		- if( rc==0 ){
235		- rc = intAbs(A->toFirst - A->fromFirst) -
236		- intAbs(B->toFirst - B->fromFirst);
237		- }
238		- return rc;
239		-}
240		-static int compareFrom(const void a, const void b){
241		- const struct Mapping_entry A = (const struct Mapping_entry)a;
242		- const struct Mapping_entry B = (const struct Mapping_entry)b;
243		- return A->fromFirst - B->fromFirst;
244		-}
245		-static int compareTo(const void a, const void b){
246		- const struct Mapping_entry A = (const struct Mapping_entry)a;
247		- const struct Mapping_entry B = (const struct Mapping_entry)b;
248		- return A->toFirst - B->toFirst;
249		-}
250		-
251		-/*
252		-** Routines for sorting the entries of a mapping. SortSize sorts
253		-** the entries in order of decreasing size (largest first.)
254		-** SortFrom and SortTo sort the entries in order of increasing
255		-** fromFirst and toFirst.
256		-*/
257		-static void MappingSortSize(Mapping *p){
258		- qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareSize);
259		-}
260		-static void MappingSortFrom(Mapping *p){
261		- qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareFrom);
262		-}
263		-static void MappingSortTo(Mapping *p){
264		- qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareTo);
265		-}
266		-
267		-/*
268		-** Initialize pMap to contain a set of similarities between two files.
269		-*/
270		-static void MappingInit(
271		- const char zSrc, / The source or pattern file */
272		- int lenSrc, /* Length of the source file */
273		- const char zOut, / The target file */
274		- int lenOut, /* Length of the target file */
275		- Mapping pMap / Write the map of similaries here */
276		-){
277		- int i, j, base, prefix;
278		- int changes;
279		- hash h;
280		- int *collide;
281		- int origLenOut = lenOut;
282		- struct Mapping_entry *aMap;
283		- int landmark[MX_LANDMARK];
284		-
285		- /*
286		- ** Initialize the map
287		- */
288		- memset(pMap, 0, sizeof(*pMap));
289		-
290		- /*
291		- ** Find common prefix and suffix
292		- */
293		- if( lenSrc<=NHASH \|\| lenOut<=NHASH ){
294		- MappingInsert(pMap, 0, 0, 0, 0);
295		- goto add_nextra;
296		- }
297		- for(i=0; i<lenSrc && i<lenOut && zSrc[i]==zOut[i]; i++){}
298		- if( i>=NHASH ){
299		- MappingInsert(pMap, 0, i-1, 0, i-1);
300		- lenSrc -= i;
301		- zSrc += i;
302		- lenOut -= i;
303		- zOut += i;
304		- if( lenSrc<=0 \|\| lenOut<=0 ) goto add_nextra;
305		- prefix = i;
306		- }else{
307		- prefix = 0;
308		- }
309		- for(i=1; i<=lenSrc && i<=lenOut && zSrc[lenSrc-i]==zOut[lenOut-i]; i++){}
310		- if( i>NHASH ){
311		- MappingInsert(pMap, prefix+lenSrc-i+1, prefix+lenSrc-1,
312		- prefix+lenOut-i+1, prefix+lenOut-1);
313		- lenSrc -= i;
314		- lenOut -= i;
315		- }
316		-
317		- /* If the source file is very small, it means that we have no
318		- ** chance of ever finding any matches. We can leave early.
319		- */
320		- if( lenSrc<=NHASH ) goto add_nextra;
321		-
322		- /* Compute the hash table used to locate matching sections in the
323		- ** source file.
324		- */
325		- collide = malloc( lenSrc*sizeof(int)/NHASH );
326		- if( collide==0 ) return;
327		- memset(landmark, -1, sizeof(landmark));
328		- memset(collide, -1, lenSrc*sizeof(int)/NHASH );
329		- for(i=0; i<lenSrc-NHASH; i+=NHASH){
330		- int hv;
331		- hash_init(&h, &zSrc[i]);
332		- hv = hash_32bit(&h) & (MX_LANDMARK-1);
333		- collide[i/NHASH] = landmark[hv];
334		- landmark[hv] = i/NHASH;
335		- }
336		-
337		- /* Begin scanning the target file and generating mappings. In this
338		- ** step, we generate as many mapping entries is we can. Many of these
339		- ** entries might overlap. The overlapping entries are removed by
340		- ** the loop the follows.
341		- */
342		- base = 0; /* We have already checked everything before zOut[base] */
343		- while( base<lenOut-NHASH ){
344		- int iSrc, iBlock, nextBase, nextBaseI;
345		- hash_init(&h, &zOut[base]);
346		- i = 0; /* Trying to match a landmark against zOut[base+i] */
347		- nextBaseI = NHASH;
348		- nextBase = base;
349		- while(1){
350		- int hv;
351		-
352		- hv = hash_32bit(&h) & (MX_LANDMARK-1);
353		- DEBUG2( printf("LOOKING: %d+%d+%d=%d [%s]\n",
354		- prefix,base,i,prefix+base+i, print16(&zOut[base+i])); )
355		- iBlock = landmark[hv];
356		- while( iBlock>=0 ){
357		- /*
358		- ** The hash window has identified a potential match against
359		- ** landmark block iBlock. But we need to investigate further.
360		- **
361		- ** Look for a region in zOut that matches zSrc. Anchor the search
362		- ** at zSrc[iSrc] and zOut[base+i].
363		- **
364		- ** Set cnt equal to the length of the match and set ofst so that
365		- ** zSrc[ofst] is the first element of the match.
366		- */
367		- int cnt, ofstSrc;
368		- int j, k, x, y;
369		-
370		- /* Beginning at iSrc, match forwards as far as we can. j counts
371		- ** the number of characters that match */
372		- iSrc = iBlock*NHASH;
373		- for(j=0, x=iSrc, y=base+i; x<lenSrc && y<lenOut; j++, x++, y++){
374		- if( zSrc[x]!=zOut[y] ) break;
375		- }
376		- j--;
377		-
378		- /* Beginning at iSrc-1, match backwards as far as we can. k counts
379		- ** the number of characters that match */
380		- for(k=1; k<iSrc && k<=base+i; k++){
381		- if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
382		- }
383		- k--;
384		-
385		- /* Compute the offset and size of the matching region zSrc */
386		- ofstSrc = iSrc-k;
387		- cnt = j+k+1;
388		- DEBUG2( printf("MATCH %d bytes at SRC[%d..%d]: [%s]\n",
389		- cnt, ofstSrc, ofstSrc+cnt-1, print16(&zSrc[ofstSrc])); )
390		- if( cnt>NHASH ){
391		- int ofstOut = base+i-k;
392		- DEBUG2( printf("COPY %6d..%-6d %6d..%d\n",
393		- prefix+ofstSrc, prefix+ofstSrc+cnt-1,
394		- prefix+ofstOut, prefix+ofstOut+cnt-1); )
395		- MappingInsert(pMap,
396		- prefix+ofstSrc, prefix+ofstSrc+cnt-1,
397		- prefix+ofstOut, prefix+ofstOut+cnt-1);
398		- if( nextBase < ofstOut+cnt-1 ){
399		- nextBase = ofstOut+cnt-1;
400		- nextBaseI = i+NHASH;
401		- }
402		- }
403		-
404		- /* Check the next matching block */
405		- iBlock = collide[iBlock];
406		- }
407		-
408		- /* If we found a match, then jump out to the outer loop and begin
409		- ** a new cycle.
410		- */
411		- if( nextBase>base && i>=nextBaseI ){
412		- base = nextBase;
413		- break;
414		- }
415		-
416		- /* Advance the hash by one character. Keep looking for a match */
417		- if( base+i+NHASH>=lenOut ){
418		- base = lenOut;
419		- break;
420		- }
421		- hash_next(&h, zOut[base+i+NHASH]);
422		- i++;
423		- }
424		- }
425		- free(collide);
426		-#if 0
427		- DEBUG1(
428		- printf("after creation:\n");
429		- MappingPrint(pMap);
430		- )
431		-#endif
432		-
433		- /* In this step, we will remove overlapping entries from the mapping.
434		- **
435		- ** We use a greedy algorithm. Select the largest mapping first and
436		- ** remove all overlapping mappings. Then take the next largest
437		- ** mapping and remove others that overlap with it. Keep going until
438		- ** all mappings have been processed.
439		- */
440		- MappingSortSize(pMap);
441		- do{
442		- changes = 0;
443		- for(i=0; i<pMap->nUsed; i++){
444		- int sortNeeded = 0;
445		- int purgeNeeded = 0;
446		- struct Mapping_entry *pA;
447		- pA = &pMap->aMap[i];
448		- for(j=i+1; j<pMap->nUsed; j++){
449		- int diff;
450		- struct Mapping_entry *pB;
451		- pB = &pMap->aMap[j];
452		- if( pB->fromLast<pA->fromFirst \|\| pB->fromFirst>pA->fromLast ){
453		- /* No overlap. Do nothing */
454		- }else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
455		- /* B is contained entirely within A. Drop B */
456		- pB->fromFirst = -1;
457		- purgeNeeded = 1;
458		- continue;
459		- }else if( pB->fromFirst<pA->fromFirst ){
460		- /* The tail B overlaps the head of A */
461		- assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
462		- diff = pB->fromLast + 1 - pA->fromFirst;
463		- pB->fromLast -= diff;
464		- pB->toLast -= diff;
465		- sortNeeded = 1;
466		- }else{
467		- /* The head of B overlaps the tail of A */
468		- assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
469		- diff = pA->fromLast + 1 - pB->fromFirst;
470		- pB->fromFirst += diff;
471		- pB->toFirst += diff;
472		- sortNeeded = 1;
473		- }
474		- if( pB->toLast<pA->toFirst \|\| pB->toFirst>pA->toLast ){
475		- /* No overlap. Do nothing */
476		- }else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
477		- /* B is contained entirely within A. Drop B */
478		- pB->fromFirst = -1;
479		- purgeNeeded = 1;
480		- }else if( pB->toFirst<pA->toFirst ){
481		- /* The tail of B overlaps the head of A */
482		- assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
483		- diff = pB->toLast + 1 - pA->toFirst;
484		- pB->fromLast -= diff;
485		- pB->toLast -= diff;
486		- sortNeeded = 1;
487		- }else{
488		- /* The head of B overlaps the tail of A */
489		- assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
490		- diff = pA->toLast + 1 - pB->toFirst;
491		- pB->fromFirst += diff;
492		- pB->toFirst += diff;
493		- sortNeeded = 1;
494		- }
495		- }
496		- if( purgeNeeded ){
497		- MappingPurgeDeletedEntries(pMap);
498		- /* changes++; */
499		- }
500		- if( sortNeeded && i<pMap->nUsed-2 ){
501		- MappingSortSize(pMap);
502		- /* changes++; */
503		- }
504		- }
505		- }while( changes );
506		-
507		- /* Final step: Arrange the mapping entires so that they are in the
508		- ** order of the output file. Then fill in the nExtra values.
509		- */
510		-add_nextra:
511		- MappingSortTo(pMap);
512		- aMap = pMap->aMap;
513		- for(i=0; i<pMap->nUsed-1; i++){
514		- aMap[i].nExtra = aMap[i+1].toFirst - aMap[i].toLast - 1;
515		- }
516		- if( pMap->nUsed>0 && origLenOut > aMap[i].toLast+1 ){
517		- aMap[i].nExtra = origLenOut - aMap[i].toLast - 1;
518		- }
519		-}
520		-
521		-/*
522		-** Translate an index into a file using a mapping.
523		-**
524		-** The mapping "p" shows how blocks in the input file map into blocks
525		-** of the output file. The index iFrom is an index into the input file.
526		-** This routine returns the index into the output file of the corresponding
527		-** character.
528		-**
529		-** If pInserted!=0 and iFrom points to the last character before a
530		-** insert in the output file, then the return value is adjusted forward
531		-** so that it points to the end of the insertion and the number of
532		-** bytes inserted is written into *pInserted. If pInserted==0 then
533		-** iFrom always maps directly in the corresponding output file
534		-** index regardless of whether or not it points to the last character
535		-** before an insertion.
536		-*/
537		-static int MappingTranslate(Mapping p, int iFrom, int pInserted){
538		- int i;
539		- for(i=0; i<p->nUsed; i++){
540		- if( iFrom>p->aMap[i].fromLast ) continue;
541		- if( iFrom<=p->aMap[i].fromFirst ){
542		- return p->aMap[i].toFirst;
543		- }
544		- if( pInserted && iFrom==p->aMap[i].fromLast ){
545		- int n = p->aMap[i].nExtra;
546		- *pInserted = n;
547		- return p->aMap[i].toLast + n;
548		- }else{
549		- return p->aMap[i].toFirst + iFrom - p->aMap[i].fromFirst;
550		- }
551		- }
552		- i--;
553		- return p->aMap[i].toLast + p->aMap[i].nExtra;
	29	+#if 0
	30	+#define DEBUG(X) X
	31	+#else
	32	+#define DEBUG(X)
	33	+#endif
	34	+
	35	+/*
	36	+** Opcodes
	37	+*/
	38	+#define CPY 0
	39	+#define DEL 1
	40	+#define INS 2
	41	+#define END 3
	42	+#define UNK 4
	43	+
	44	+/*
	45	+** Compare a single line of text from pV1 and pV2. If the lines
	46	+** are the same, return true. Return false if they are different.
	47	+**
	48	+** The cursor on both pV1 and pV2 is unchanged.
	49	+*/
	50	+static int sameLine(Blob pV1, Blob pV2){
	51	+ char z1, z2;
	52	+ int i;
	53	+
	54	+ z1 = blob_buffer(pV1);
	55	+ z2 = blob_buffer(pV2);
	56	+ for(i=0; z1[i]!='\n' && z1[i]==z2[i]; i++){}
	57	+ return z2[i]=='\n' \|\| (z2[i]=='\r' && z2[i+1]=='\n')
	58	+ \|\| (z1[i]=='\r' && z2[i]=='\n' && z1[i+1]=='\n');
554	59	}
555	60
556	61	/*
557	62	** Do a three-way merge. Initialize pOut to contain the result.
558		-*/
559		-void blob_merge(Blob pPivot, Blob pV1, Blob pV2, Blob pOut){
560		- Mapping map1, map2, map3;
561		- int i, j;
562		- const char zV1, zV2;
563		- blob_zero(pOut);
564		- MappingInit(
565		- blob_buffer(pPivot), blob_size(pPivot),
566		- blob_buffer(pV1), blob_size(pV1),
567		- &map1);
568		- MappingSortFrom(&map1);
569		- DEBUG1(
570		- printf("map1-final:\n");
571		- MappingPrint(&map1);
572		- )
573		- MappingInit(
574		- blob_buffer(pPivot), blob_size(pPivot),
575		- blob_buffer(pV2), blob_size(pV2),
576		- &map2);
577		- DEBUG1(
578		- printf("map2-final:\n");
579		- MappingPrint(&map2);
580		- )
581		- MappingInit(
582		- blob_buffer(pV1), blob_size(pV1),
583		- blob_buffer(pV2), blob_size(pV2),
584		- &map3);
585		- DEBUG1(
586		- printf("map3-final:\n");
587		- MappingPrint(&map3);
588		- )
589		- zV1 = blob_buffer(pV1);
590		- zV2 = blob_buffer(pV2);
591		- if( map1.aMap[0].toFirst>0 ){
592		- blob_append(pOut, zV1, map1.aMap[0].toFirst);
593		- DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n",
594		- map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); )
595		- }
596		- if( map2.aMap[0].toFirst>0 ){
597		- blob_append(pOut, zV2, map2.aMap[0].toFirst);
598		- DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n",
599		- map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); )
600		- }
601		- for(i=j=0; i<map2.nUsed; i++){
602		- int iFirst, iLast, nInsert, iTail;
603		- struct Mapping_entry *p = &map2.aMap[i];
604		- while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; }
605		- DEBUG1(
606		- printf("map2: %6d..%-6d %6d..%-6d %d\n",
607		- p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra);
608		- printf("map3: j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst,
609		- map3.aMap[j].toLast);
610		- );
611		- iTail = p->toLast + p->nExtra;
612		- if( i<map2.nUsed-1 &&
613		- map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){
614		- blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1);
615		- DEBUG1(
616		- printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1,
617		- p->toFirst, iTail);
618		- )
619		- continue;
620		- }
621		- iFirst = MappingTranslate(&map1, p->fromFirst, 0);
622		- iLast = MappingTranslate(&map1, p->fromLast, &nInsert);
623		- blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1);
624		- DEBUG1(
625		- printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast);
626		- )
627		- if( p->nExtra>0 ){
628		- if( p->nExtra==nInsert &&
629		- memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
630		- /* Omit a duplicate insert */
631		- DEBUG1( printf("OMIT duplicate insert\n"); )
632		- }else{
633		- blob_append(pOut, &zV2[p->toLast+1], p->nExtra);
634		- DEBUG1(
635		- printf("INSERT %d bytes from V2[%d..%d]\n",
636		- p->nExtra, p->toLast+1, p->toLast+p->nExtra);
637		- )
638		- }
639		- }
640		- }
641		- MappingClear(&map1);
642		- MappingClear(&map2);
643		- MappingClear(&map3);
	63	+**
	64	+** The merge is an edit against pV2. Both pV1 and pV2 have a
	65	+** common origin at pPivot. Apply the changes of pPivot ==> pV1
	66	+** to pV2.
	67	+**
	68	+** The return is 0 upon complete success. If any input file is binary,
	69	+** -1 is returned and pOut is unmodified. If there are merge
	70	+** conflicts, the merge proceeds as best as it can and the number
	71	+** of conflicts is returns
	72	+*/
	73	+int blob_merge(Blob pPivot, Blob pV1, Blob pV2, Blob pOut){
	74	+ int aC1; / Changes from pPivot to pV1 */
	75	+ int aC2; / Changes from pPivot to pV2 */
	76	+ int i1, i2;
	77	+ int op1, op2;
	78	+ int limit1, limit2;
	79	+ int inConflict = 0;
	80	+ int nConflict = 0;
	81	+ static const char zBegin[] = ">>>>>>>> BEGIN MERGE CONFLICT <<<<<<<<\n";
	82	+ static const char zEnd[] = ">>>>>>>>> END MERGE CONFLICT <<<<<<<<<\n";
	83	+
	84	+ aC1 = text_diff(pPivot, pV1, 0, 0);
	85	+ aC2 = text_diff(pPivot, pV2, 0, 0);
	86	+
	87	+ if( aC2==0 \|\| aC2==0 ){
	88	+ free(aC1);
	89	+ free(aC2);
	90	+ return -1;
	91	+ }
	92	+ blob_zero(pOut);
	93	+ blob_rewind(pV1);
	94	+ blob_rewind(pV2);
	95	+ blob_rewind(pPivot);
	96	+
	97	+ for(i1=0; aC1[i1] \|\| aC1[i1+1] \|\| aC1[i1+2]; i1+=3){}
	98	+ limit1 = i1;
	99	+ for(i2=0; aC2[i2] \|\| aC2[i2+1] \|\| aC2[i2+2]; i2+=3){}
	100	+ limit2 = i2;
	101	+
	102	+ DEBUG(
	103	+ for(i1=0; i1<limit1; i1+=3){
	104	+ printf("c1: %4d %4d %4d\n", aC1[i1], aC1[i1+1], aC1[i1+2]);
	105	+ }
	106	+ for(i2=0; i2<limit2; i2+=3){
	107	+ printf("c2: %4d %4d %4d\n", aC2[i2], aC2[i2+1], aC2[i2+2]);
	108	+ }
	109	+ )
	110	+
	111	+ op1 = op2 = UNK;
	112	+ i1 = i2 = 0;
	113	+ while( i1<limit1 && aC1[i1]==0 ){ i1++; }
	114	+ while( i2<limit2 && aC2[i2]==0 ){ i2++; }
	115	+
	116	+ while(1){
	117	+ if( op1==UNK ){
	118	+ if( i1>=limit1 ){
	119	+ op1 = END;
	120	+ DEBUG( printf("get op1=END\n"); )
	121	+ }else{
	122	+ op1 = i1 % 3;
	123	+ aC1[i1]--;
	124	+ DEBUG( printf("get op1=%d from %d (%d->%d)\n",
	125	+ op1,i1,aC1[i1]+1,aC1[i1]); )
	126	+ while( i1<limit1 && aC1[i1]==0 ){ i1++; }
	127	+ }
	128	+ }
	129	+ if( op2==UNK ){
	130	+ if( i2>=limit2 ){
	131	+ op2 = END;
	132	+ DEBUG( printf("get op2=END\n"); )
	133	+ }else{
	134	+ op2 = i2 % 3;
	135	+ aC2[i2]--;
	136	+ DEBUG( printf("get op2=%d from %d (%d->%d)\n",
	137	+ op2,i2,aC2[i2]+1,aC2[i2]); )
	138	+ while( i2<limit2 && aC2[i2]==0 ){ i2++; }
	139	+ }
	140	+ }
	141	+ DEBUG( printf("op1=%d op2=%d\n", op1, op2); )
	142	+ if( op1==END ){
	143	+ if( op2==INS ){
	144	+ blob_copy_lines(pOut, pV2, 1);
	145	+ op2 = UNK;
	146	+ }else{
	147	+ break;
	148	+ }
	149	+ }else if( op2==END ){
	150	+ if( op1==INS ){
	151	+ blob_copy_lines(pOut, pV1, 1);
	152	+ op1 = UNK;
	153	+ }else{
	154	+ break;
	155	+ }
	156	+ }else if( op1==INS && op2==INS ){
	157	+ if( !inConflict && sameLine(pV1, pV2) ){
	158	+ blob_copy_lines(pOut, pV2, 1);
	159	+ blob_copy_lines(0, pV1, 0);
	160	+ op1 = UNK;
	161	+ op2 = UNK;
	162	+ }else{
	163	+ if( !inConflict ){
	164	+ inConflict = 1;
	165	+ nConflict++;
	166	+ blob_appendf(pOut, zBegin);
	167	+ }
	168	+ blob_copy_lines(pOut, pV1, 1);
	169	+ op1 = UNK;
	170	+ }
	171	+ }else if( op1==INS ){
	172	+ blob_copy_lines(pOut, pV1, 1);
	173	+ op1 = UNK;
	174	+ }else if( op2==INS ){
	175	+ blob_copy_lines(pOut, pV2, 1);
	176	+ op2 = UNK;
	177	+ }else{
	178	+ if( inConflict ){
	179	+ inConflict = 0;
	180	+ blob_appendf(pOut, zEnd);
	181	+ }
	182	+ if( op1==DEL \|\| op2==DEL ){
	183	+ blob_copy_lines(0, pPivot, 1);
	184	+ if( op2==CPY ){
	185	+ blob_copy_lines(0, pV2, 1);
	186	+ }
	187	+ if( op1==CPY ){
	188	+ blob_copy_lines(0, pV1, 1);
	189	+ }
	190	+ }else{
	191	+ assert( op1==CPY && op2==CPY );
	192	+ blob_copy_lines(pOut, pPivot, 1);
	193	+ blob_copy_lines(0, pV1, 1);
	194	+ blob_copy_lines(0, pV2, 1);
	195	+ }
	196	+ op1 = UNK;
	197	+ op2 = UNK;
	198	+ }
	199	+ }
	200	+ if( inConflict ){
	201	+ blob_appendf(pOut, zEnd);
	202	+ }
	203	+
	204	+ free(aC1);
	205	+ free(aC2);
	206	+ return nConflict;
644	207	}
645	208
646	209	/*
647	210	** COMMAND: test-3-way-merge
648	211	**
		@@ -675,43 +238,5 @@
675	238	blob_reset(&pivot);
676	239	blob_reset(&v1);
677	240	blob_reset(&v2);
678	241	blob_reset(&merged);
679	242	}
680		-
681		-
682		-/*
683		-** COMMAND: test-mapping
684		-*/
685		-void mapping_test(void){
686		- int i;
687		- const char za, zb;
688		- Blob a, b;
689		- Mapping map;
690		- if( g.argc!=4 ){
691		- usage("FILE1 FILE2");
692		- }
693		- blob_read_from_file(&a, g.argv[2]);
694		- blob_read_from_file(&b, g.argv[3]);
695		- memset(&map, 0, sizeof(map));
696		- MappingInit(blob_buffer(&a), blob_size(&a),
697		- blob_buffer(&b), blob_size(&b),
698		- &map);
699		- DEBUG1(
700		- printf("map-final:\n");
701		- MappingPrint(&map);
702		- )
703		- za = blob_buffer(&a);
704		- zb = blob_buffer(&b);
705		- for(i=0; i<map.nUsed; i++){
706		- printf("======= %6d..%-6d %6d..%-6d %d\n",
707		- map.aMap[i].fromFirst, map.aMap[i].fromLast,
708		- map.aMap[i].toFirst, map.aMap[i].toLast,
709		- map.aMap[i].nExtra);
710		- printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1,
711		- &za[map.aMap[i].fromFirst]);
712		- if( map.aMap[i].nExtra ){
713		- printf("======= EXTRA:\n");
714		- printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]);
715		- }
716		- }
717		-}
718	243

	--- src/merge3.c
	+++ src/merge3.c
	@@ -24,625 +24,188 @@
24	** This module implements a 3-way merge
25	*/
26	#include "config.h"
27	#include "merge3.h"
28
29	#if 1
30	# define DEBUG1(X) X
31	#else
32	# define DEBUG1(X)
33	#endif
34	#if 0
35	#define DEBUG2(X) X
36	/*
37	** For debugging:
38	** Print 16 characters of text from zBuf
39	*/
40	static const char print16(const char z){
41	int i;
42	static char zBuf[20];
43	for(i=0; i<16; i++){
44	if( z[i]>=0x20 && z[i]<=0x7e ){
45	zBuf[i] = z[i];
46	}else{
47	zBuf[i] = '.';
48	}
49	}
50	zBuf[i] = 0;
51	return zBuf;
52	}
53	#else
54	# define DEBUG2(X)
55	#endif
56
57	/*
58	** Must be a 32-bit integer
59	*/
60	typedef unsigned int u32;
61
62	/*
63	** Must be a 16-bit value
64	*/
65	typedef unsigned short int u16;
66
67	/*
68	** The width of a hash window in bytes. The algorithm only works if this
69	** is a power of 2.
70	*/
71	#define NHASH 16
72
73	/*
74	** The current state of the rolling hash.
75	**
76	** z[] holds the values that have been hashed. z[] is a circular buffer.
77	** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
78	** the window.
79	**
80	** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
81	** sum. Hash.b is z[i]NHASH + z[i+1](NHASH-1) + ... + z[i+NHASH-1]*1.
82	** (Each index for z[] should be module NHASH, of course. The %NHASH operator
83	** is omitted in the prior expression for brevity.)
84	*/
85	typedef struct hash hash;
86	struct hash {
87	u16 a, b; /* Hash values */
88	u16 i; /* Start of the hash window */
89	char z[NHASH]; /* The values that have been hashed */
90	};
91
92	/*
93	** Initialize the rolling hash using the first NHASH characters of z[]
94	*/
95	static void hash_init(hash pHash, const char z){
96	u16 a, b, i;
97	a = b = 0;
98	for(i=0; i<NHASH; i++){
99	a += z[i];
100	b += (NHASH-i)*z[i];
101	pHash->z[i] = z[i];
102	}
103	pHash->a = a & 0xffff;
104	pHash->b = b & 0xffff;
105	pHash->i = 0;
106	}
107
108	/*
109	** Advance the rolling hash by a single character "c"
110	*/
111	static void hash_next(hash *pHash, int c){
112	u16 old = pHash->z[pHash->i];
113	pHash->z[pHash->i] = c;
114	pHash->i = (pHash->i+1)&(NHASH-1);
115	pHash->a = pHash->a - old + c;
116	pHash->b = pHash->b - NHASH*old + pHash->a;
117	}
118
119	/*
120	** Return a 32-bit hash value
121	*/
122	static u32 hash_32bit(hash *pHash){
123	return (pHash->a & 0xffff) \| (((u32)(pHash->b & 0xffff))<<16);
124	}
125
126	/*
127	** Maximum number of landmarks to set in the source file.
128	*/
129	#define MX_LANDMARK (1024*128)
130
131	/*
132	** A mapping structure is used to record which parts of two
133	** files contain the same text. There are zero or more mapping
134	** entries in a mapping. Each entry maps a segment of text in
135	** the source file into a segment of the output file.
136	**
137	** fromFirst...fromLast -> toFirst...toLast
138	**
139	** Extra text might be inserted in the output file after a
140	** mapping. The nExtra parameter records the number of bytes
141	** of extra text to insert.
142	*/
143	typedef struct Mapping Mapping;
144	struct Mapping {
145	int nMap;
146	int nUsed;
147	struct Mapping_entry {
148	int fromFirst, fromLast;
149	int toFirst, toLast;
150	int nExtra;
151	} *aMap;
152	};
153
154	/*
155	** Free malloced memory associated with a mapping.
156	*/
157	static void MappingClear(Mapping *p){
158	free(p->aMap);
159	memset(p, 0, sizeof(*p));
160	}
161
162	/*
163	** Add an entry to a mapping structure. The mapping is:
164	**
165	** a...b -> c...d
166	**
167	** The nExtra parameter is initially zero. It will be changed
168	** later if necessary.
169	*/
170	static void MappingInsert(Mapping *p, int a, int b, int c, int d){
171	struct Mapping_entry *pEntry;
172	int i;
173	for(i=0, pEntry=p->aMap; i<p->nUsed; i++, pEntry++){
174	if( pEntry->fromFirst==a && pEntry->fromLast==b && pEntry->toFirst==c ){
175	DEBUG2( printf("DUPLICATE: %6d..%-6d %6d..%d\n", a, b, c, d); )
176	return;
177	}
178	}
179	if( p->nUsed>=p->nMap ){
180	p->nMap = p->nMap * 2 + 10;
181	p->aMap = realloc(p->aMap, p->nMap*sizeof(p->aMap[0]) );
182	if( p->aMap==0 ) exit(1);
183	}
184	pEntry = &p->aMap[p->nUsed];
185	pEntry->fromFirst = a;
186	pEntry->fromLast = b;
187	pEntry->toFirst = c;
188	pEntry->toLast = d;
189	pEntry->nExtra = 0;
190	p->nUsed++;
191	}
192
193	DEBUG1(
194	/*
195	** For debugging purposes:
196	** Print the content of a mapping.
197	*/
198	static void MappingPrint(Mapping *pMap){
199	int i;
200	struct Mapping_entry *p;
201	for(i=0, p=pMap->aMap; i<pMap->nUsed; i++, p++){
202	printf("%6d..%-6d %6d..%-6d %d\n",
203	p->fromFirst, p->fromLast,
204	p->toFirst, p->toLast, p->nExtra);
205	}
206	}
207	)
208
209	/*
210	** Remove deleted entries from a mapping. Deleted enties have
211	** an fromFirst of less than 0.
212	*/
213	static void MappingPurgeDeletedEntries(Mapping *p){
214	int i, j;
215	for(i=j=0; i<p->nUsed; i++){
216	if( p->aMap[i].fromFirst<0 ) continue;
217	if( j<i ){
218	p->aMap[j] = p->aMap[i];
219	}
220	j++;
221	}
222	p->nUsed = j;
223	}
224
225	/*
226	** Comparisons functions used for sorting elements of a Mapping
227	*/
228	static int intAbs(int x){ return x<0 ? -x : x; }
229	static int compareSize(const void a, const void b){
230	const struct Mapping_entry A = (const struct Mapping_entry)a;
231	const struct Mapping_entry B = (const struct Mapping_entry)b;
232	int rc;
233	rc = (B->fromLast - B->fromFirst) - (A->fromLast - A->fromFirst);
234	if( rc==0 ){
235	rc = intAbs(A->toFirst - A->fromFirst) -
236	intAbs(B->toFirst - B->fromFirst);
237	}
238	return rc;
239	}
240	static int compareFrom(const void a, const void b){
241	const struct Mapping_entry A = (const struct Mapping_entry)a;
242	const struct Mapping_entry B = (const struct Mapping_entry)b;
243	return A->fromFirst - B->fromFirst;
244	}
245	static int compareTo(const void a, const void b){
246	const struct Mapping_entry A = (const struct Mapping_entry)a;
247	const struct Mapping_entry B = (const struct Mapping_entry)b;
248	return A->toFirst - B->toFirst;
249	}
250
251	/*
252	** Routines for sorting the entries of a mapping. SortSize sorts
253	** the entries in order of decreasing size (largest first.)
254	** SortFrom and SortTo sort the entries in order of increasing
255	** fromFirst and toFirst.
256	*/
257	static void MappingSortSize(Mapping *p){
258	qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareSize);
259	}
260	static void MappingSortFrom(Mapping *p){
261	qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareFrom);
262	}
263	static void MappingSortTo(Mapping *p){
264	qsort(p->aMap, p->nUsed, sizeof(p->aMap[0]), compareTo);
265	}
266
267	/*
268	** Initialize pMap to contain a set of similarities between two files.
269	*/
270	static void MappingInit(
271	const char zSrc, / The source or pattern file */
272	int lenSrc, /* Length of the source file */
273	const char zOut, / The target file */
274	int lenOut, /* Length of the target file */
275	Mapping pMap / Write the map of similaries here */
276	){
277	int i, j, base, prefix;
278	int changes;
279	hash h;
280	int *collide;
281	int origLenOut = lenOut;
282	struct Mapping_entry *aMap;
283	int landmark[MX_LANDMARK];
284
285	/*
286	** Initialize the map
287	*/
288	memset(pMap, 0, sizeof(*pMap));
289
290	/*
291	** Find common prefix and suffix
292	*/
293	if( lenSrc<=NHASH \|\| lenOut<=NHASH ){
294	MappingInsert(pMap, 0, 0, 0, 0);
295	goto add_nextra;
296	}
297	for(i=0; i<lenSrc && i<lenOut && zSrc[i]==zOut[i]; i++){}
298	if( i>=NHASH ){
299	MappingInsert(pMap, 0, i-1, 0, i-1);
300	lenSrc -= i;
301	zSrc += i;
302	lenOut -= i;
303	zOut += i;
304	if( lenSrc<=0 \|\| lenOut<=0 ) goto add_nextra;
305	prefix = i;
306	}else{
307	prefix = 0;
308	}
309	for(i=1; i<=lenSrc && i<=lenOut && zSrc[lenSrc-i]==zOut[lenOut-i]; i++){}
310	if( i>NHASH ){
311	MappingInsert(pMap, prefix+lenSrc-i+1, prefix+lenSrc-1,
312	prefix+lenOut-i+1, prefix+lenOut-1);
313	lenSrc -= i;
314	lenOut -= i;
315	}
316
317	/* If the source file is very small, it means that we have no
318	** chance of ever finding any matches. We can leave early.
319	*/
320	if( lenSrc<=NHASH ) goto add_nextra;
321
322	/* Compute the hash table used to locate matching sections in the
323	** source file.
324	*/
325	collide = malloc( lenSrc*sizeof(int)/NHASH );
326	if( collide==0 ) return;
327	memset(landmark, -1, sizeof(landmark));
328	memset(collide, -1, lenSrc*sizeof(int)/NHASH );
329	for(i=0; i<lenSrc-NHASH; i+=NHASH){
330	int hv;
331	hash_init(&h, &zSrc[i]);
332	hv = hash_32bit(&h) & (MX_LANDMARK-1);
333	collide[i/NHASH] = landmark[hv];
334	landmark[hv] = i/NHASH;
335	}
336
337	/* Begin scanning the target file and generating mappings. In this
338	** step, we generate as many mapping entries is we can. Many of these
339	** entries might overlap. The overlapping entries are removed by
340	** the loop the follows.
341	*/
342	base = 0; /* We have already checked everything before zOut[base] */
343	while( base<lenOut-NHASH ){
344	int iSrc, iBlock, nextBase, nextBaseI;
345	hash_init(&h, &zOut[base]);
346	i = 0; /* Trying to match a landmark against zOut[base+i] */
347	nextBaseI = NHASH;
348	nextBase = base;
349	while(1){
350	int hv;
351
352	hv = hash_32bit(&h) & (MX_LANDMARK-1);
353	DEBUG2( printf("LOOKING: %d+%d+%d=%d [%s]\n",
354	prefix,base,i,prefix+base+i, print16(&zOut[base+i])); )
355	iBlock = landmark[hv];
356	while( iBlock>=0 ){
357	/*
358	** The hash window has identified a potential match against
359	** landmark block iBlock. But we need to investigate further.
360	**
361	** Look for a region in zOut that matches zSrc. Anchor the search
362	** at zSrc[iSrc] and zOut[base+i].
363	**
364	** Set cnt equal to the length of the match and set ofst so that
365	** zSrc[ofst] is the first element of the match.
366	*/
367	int cnt, ofstSrc;
368	int j, k, x, y;
369
370	/* Beginning at iSrc, match forwards as far as we can. j counts
371	** the number of characters that match */
372	iSrc = iBlock*NHASH;
373	for(j=0, x=iSrc, y=base+i; x<lenSrc && y<lenOut; j++, x++, y++){
374	if( zSrc[x]!=zOut[y] ) break;
375	}
376	j--;
377
378	/* Beginning at iSrc-1, match backwards as far as we can. k counts
379	** the number of characters that match */
380	for(k=1; k<iSrc && k<=base+i; k++){
381	if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
382	}
383	k--;
384
385	/* Compute the offset and size of the matching region zSrc */
386	ofstSrc = iSrc-k;
387	cnt = j+k+1;
388	DEBUG2( printf("MATCH %d bytes at SRC[%d..%d]: [%s]\n",
389	cnt, ofstSrc, ofstSrc+cnt-1, print16(&zSrc[ofstSrc])); )
390	if( cnt>NHASH ){
391	int ofstOut = base+i-k;
392	DEBUG2( printf("COPY %6d..%-6d %6d..%d\n",
393	prefix+ofstSrc, prefix+ofstSrc+cnt-1,
394	prefix+ofstOut, prefix+ofstOut+cnt-1); )
395	MappingInsert(pMap,
396	prefix+ofstSrc, prefix+ofstSrc+cnt-1,
397	prefix+ofstOut, prefix+ofstOut+cnt-1);
398	if( nextBase < ofstOut+cnt-1 ){
399	nextBase = ofstOut+cnt-1;
400	nextBaseI = i+NHASH;
401	}
402	}
403
404	/* Check the next matching block */
405	iBlock = collide[iBlock];
406	}
407
408	/* If we found a match, then jump out to the outer loop and begin
409	** a new cycle.
410	*/
411	if( nextBase>base && i>=nextBaseI ){
412	base = nextBase;
413	break;
414	}
415
416	/* Advance the hash by one character. Keep looking for a match */
417	if( base+i+NHASH>=lenOut ){
418	base = lenOut;
419	break;
420	}
421	hash_next(&h, zOut[base+i+NHASH]);
422	i++;
423	}
424	}
425	free(collide);
426	#if 0
427	DEBUG1(
428	printf("after creation:\n");
429	MappingPrint(pMap);
430	)
431	#endif
432
433	/* In this step, we will remove overlapping entries from the mapping.
434	**
435	** We use a greedy algorithm. Select the largest mapping first and
436	** remove all overlapping mappings. Then take the next largest
437	** mapping and remove others that overlap with it. Keep going until
438	** all mappings have been processed.
439	*/
440	MappingSortSize(pMap);
441	do{
442	changes = 0;
443	for(i=0; i<pMap->nUsed; i++){
444	int sortNeeded = 0;
445	int purgeNeeded = 0;
446	struct Mapping_entry *pA;
447	pA = &pMap->aMap[i];
448	for(j=i+1; j<pMap->nUsed; j++){
449	int diff;
450	struct Mapping_entry *pB;
451	pB = &pMap->aMap[j];
452	if( pB->fromLast<pA->fromFirst \|\| pB->fromFirst>pA->fromLast ){
453	/* No overlap. Do nothing */
454	}else if( pB->fromFirst>=pA->fromFirst && pB->fromLast<=pA->fromLast ){
455	/* B is contained entirely within A. Drop B */
456	pB->fromFirst = -1;
457	purgeNeeded = 1;
458	continue;
459	}else if( pB->fromFirst<pA->fromFirst ){
460	/* The tail B overlaps the head of A */
461	assert( pB->fromLast>=pA->fromFirst && pB->fromLast<=pA->fromLast );
462	diff = pB->fromLast + 1 - pA->fromFirst;
463	pB->fromLast -= diff;
464	pB->toLast -= diff;
465	sortNeeded = 1;
466	}else{
467	/* The head of B overlaps the tail of A */
468	assert( pB->fromFirst<=pA->fromLast && pB->fromLast>pA->fromLast );
469	diff = pA->fromLast + 1 - pB->fromFirst;
470	pB->fromFirst += diff;
471	pB->toFirst += diff;
472	sortNeeded = 1;
473	}
474	if( pB->toLast<pA->toFirst \|\| pB->toFirst>pA->toLast ){
475	/* No overlap. Do nothing */
476	}else if( pB->toFirst>=pA->toFirst && pB->toLast<=pA->toLast ){
477	/* B is contained entirely within A. Drop B */
478	pB->fromFirst = -1;
479	purgeNeeded = 1;
480	}else if( pB->toFirst<pA->toFirst ){
481	/* The tail of B overlaps the head of A */
482	assert( pB->toLast>=pA->toFirst && pB->toLast<=pA->toLast );
483	diff = pB->toLast + 1 - pA->toFirst;
484	pB->fromLast -= diff;
485	pB->toLast -= diff;
486	sortNeeded = 1;
487	}else{
488	/* The head of B overlaps the tail of A */
489	assert( pB->toFirst<=pA->toLast && pB->toLast>pA->toLast );
490	diff = pA->toLast + 1 - pB->toFirst;
491	pB->fromFirst += diff;
492	pB->toFirst += diff;
493	sortNeeded = 1;
494	}
495	}
496	if( purgeNeeded ){
497	MappingPurgeDeletedEntries(pMap);
498	/* changes++; */
499	}
500	if( sortNeeded && i<pMap->nUsed-2 ){
501	MappingSortSize(pMap);
502	/* changes++; */
503	}
504	}
505	}while( changes );
506
507	/* Final step: Arrange the mapping entires so that they are in the
508	** order of the output file. Then fill in the nExtra values.
509	*/
510	add_nextra:
511	MappingSortTo(pMap);
512	aMap = pMap->aMap;
513	for(i=0; i<pMap->nUsed-1; i++){
514	aMap[i].nExtra = aMap[i+1].toFirst - aMap[i].toLast - 1;
515	}
516	if( pMap->nUsed>0 && origLenOut > aMap[i].toLast+1 ){
517	aMap[i].nExtra = origLenOut - aMap[i].toLast - 1;
518	}
519	}
520
521	/*
522	** Translate an index into a file using a mapping.
523	**
524	** The mapping "p" shows how blocks in the input file map into blocks
525	** of the output file. The index iFrom is an index into the input file.
526	** This routine returns the index into the output file of the corresponding
527	** character.
528	**
529	** If pInserted!=0 and iFrom points to the last character before a
530	** insert in the output file, then the return value is adjusted forward
531	** so that it points to the end of the insertion and the number of
532	** bytes inserted is written into *pInserted. If pInserted==0 then
533	** iFrom always maps directly in the corresponding output file
534	** index regardless of whether or not it points to the last character
535	** before an insertion.
536	*/
537	static int MappingTranslate(Mapping p, int iFrom, int pInserted){
538	int i;
539	for(i=0; i<p->nUsed; i++){
540	if( iFrom>p->aMap[i].fromLast ) continue;
541	if( iFrom<=p->aMap[i].fromFirst ){
542	return p->aMap[i].toFirst;
543	}
544	if( pInserted && iFrom==p->aMap[i].fromLast ){
545	int n = p->aMap[i].nExtra;
546	*pInserted = n;
547	return p->aMap[i].toLast + n;
548	}else{
549	return p->aMap[i].toFirst + iFrom - p->aMap[i].fromFirst;
550	}
551	}
552	i--;
553	return p->aMap[i].toLast + p->aMap[i].nExtra;
554	}
555
556	/*
557	** Do a three-way merge. Initialize pOut to contain the result.
558	*/
559	void blob_merge(Blob pPivot, Blob pV1, Blob pV2, Blob pOut){
560	Mapping map1, map2, map3;
561	int i, j;
562	const char zV1, zV2;
563	blob_zero(pOut);
564	MappingInit(
565	blob_buffer(pPivot), blob_size(pPivot),
566	blob_buffer(pV1), blob_size(pV1),
567	&map1);
568	MappingSortFrom(&map1);
569	DEBUG1(
570	printf("map1-final:\n");
571	MappingPrint(&map1);
572	)
573	MappingInit(
574	blob_buffer(pPivot), blob_size(pPivot),
575	blob_buffer(pV2), blob_size(pV2),
576	&map2);
577	DEBUG1(
578	printf("map2-final:\n");
579	MappingPrint(&map2);
580	)
581	MappingInit(
582	blob_buffer(pV1), blob_size(pV1),
583	blob_buffer(pV2), blob_size(pV2),
584	&map3);
585	DEBUG1(
586	printf("map3-final:\n");
587	MappingPrint(&map3);
588	)
589	zV1 = blob_buffer(pV1);
590	zV2 = blob_buffer(pV2);
591	if( map1.aMap[0].toFirst>0 ){
592	blob_append(pOut, zV1, map1.aMap[0].toFirst);
593	DEBUG1( printf("INSERT %d bytes from V1[0..%d]\n",
594	map1.aMap[0].toFirst, map1.aMap[0].toFirst-1); )
595	}
596	if( map2.aMap[0].toFirst>0 ){
597	blob_append(pOut, zV2, map2.aMap[0].toFirst);
598	DEBUG1( printf("INSERT %d bytes from V2[0..%d]\n",
599	map2.aMap[0].toFirst, map2.aMap[0].toFirst-1); )
600	}
601	for(i=j=0; i<map2.nUsed; i++){
602	int iFirst, iLast, nInsert, iTail;
603	struct Mapping_entry *p = &map2.aMap[i];
604	while( j<map3.nUsed-1 && map3.aMap[j+1].toFirst>p->toFirst ){ j++; }
605	DEBUG1(
606	printf("map2: %6d..%-6d %6d..%-6d %d\n",
607	p->fromFirst, p->fromLast, p->toFirst, p->toLast, p->nExtra);
608	printf("map3: j=%-6d %6d..%-6d\n", j, map3.aMap[j].toFirst,
609	map3.aMap[j].toLast);
610	);
611	iTail = p->toLast + p->nExtra;
612	if( i<map2.nUsed-1 &&
613	map3.aMap[j].toFirst<=p->toFirst && map3.aMap[j].toLast>=iTail ){
614	blob_append(pOut, &zV2[p->toFirst], iTail - p->toFirst + 1);
615	DEBUG1(
616	printf("COPY %d bytes from V2[%d..%d]\n", iTail - p->toFirst+1,
617	p->toFirst, iTail);
618	)
619	continue;
620	}
621	iFirst = MappingTranslate(&map1, p->fromFirst, 0);
622	iLast = MappingTranslate(&map1, p->fromLast, &nInsert);
623	blob_append(pOut, &zV1[iFirst], iLast - iFirst + 1);
624	DEBUG1(
625	printf("COPY %d bytes from V1[%d..%d]\n", iLast-iFirst+1, iFirst, iLast);
626	)
627	if( p->nExtra>0 ){
628	if( p->nExtra==nInsert &&
629	memcmp(&zV2[p->toLast+1], &zV1[iLast-nInsert+1], nInsert)==0 ){
630	/* Omit a duplicate insert */
631	DEBUG1( printf("OMIT duplicate insert\n"); )
632	}else{
633	blob_append(pOut, &zV2[p->toLast+1], p->nExtra);
634	DEBUG1(
635	printf("INSERT %d bytes from V2[%d..%d]\n",
636	p->nExtra, p->toLast+1, p->toLast+p->nExtra);
637	)
638	}
639	}
640	}
641	MappingClear(&map1);
642	MappingClear(&map2);
643	MappingClear(&map3);


























































644	}
645
646	/*
647	** COMMAND: test-3-way-merge
648	**
	@@ -675,43 +238,5 @@
675	blob_reset(&pivot);
676	blob_reset(&v1);
677	blob_reset(&v2);
678	blob_reset(&merged);
679	}
680
681
682	/*
683	** COMMAND: test-mapping
684	*/
685	void mapping_test(void){
686	int i;
687	const char za, zb;
688	Blob a, b;
689	Mapping map;
690	if( g.argc!=4 ){
691	usage("FILE1 FILE2");
692	}
693	blob_read_from_file(&a, g.argv[2]);
694	blob_read_from_file(&b, g.argv[3]);
695	memset(&map, 0, sizeof(map));
696	MappingInit(blob_buffer(&a), blob_size(&a),
697	blob_buffer(&b), blob_size(&b),
698	&map);
699	DEBUG1(
700	printf("map-final:\n");
701	MappingPrint(&map);
702	)
703	za = blob_buffer(&a);
704	zb = blob_buffer(&b);
705	for(i=0; i<map.nUsed; i++){
706	printf("======= %6d..%-6d %6d..%-6d %d\n",
707	map.aMap[i].fromFirst, map.aMap[i].fromLast,
708	map.aMap[i].toFirst, map.aMap[i].toLast,
709	map.aMap[i].nExtra);
710	printf("%.*s\n", map.aMap[i].fromLast - map.aMap[i].fromFirst + 1,
711	&za[map.aMap[i].fromFirst]);
712	if( map.aMap[i].nExtra ){
713	printf("======= EXTRA:\n");
714	printf("%.*s\n", map.aMap[i].nExtra, &zb[map.aMap[i].toLast+1]);
715	}
716	}
717	}
718

	--- src/merge3.c
	+++ src/merge3.c
	@@ -24,625 +24,188 @@
24	** This module implements a 3-way merge
25	*/
26	#include "config.h"
27	#include "merge3.h"
28
29	#if 0
30	#define DEBUG(X) X
31	#else
32	#define DEBUG(X)
33	#endif
34
35	/*
36	** Opcodes
37	*/
38	#define CPY 0
39	#define DEL 1
40	#define INS 2
41	#define END 3
42	#define UNK 4
43
44	/*
45	** Compare a single line of text from pV1 and pV2. If the lines
46	** are the same, return true. Return false if they are different.
47	**
48	** The cursor on both pV1 and pV2 is unchanged.
49	*/
50	static int sameLine(Blob pV1, Blob pV2){
51	char z1, z2;
52	int i;
53
54	z1 = blob_buffer(pV1);
55	z2 = blob_buffer(pV2);
56	for(i=0; z1[i]!='\n' && z1[i]==z2[i]; i++){}
57	return z2[i]=='\n' \|\| (z2[i]=='\r' && z2[i+1]=='\n')
58	\|\| (z1[i]=='\r' && z2[i]=='\n' && z1[i+1]=='\n');















































































































































































































































































































































































































































































































59	}
60
61	/*
62	** Do a three-way merge. Initialize pOut to contain the result.
63	**
64	** The merge is an edit against pV2. Both pV1 and pV2 have a
65	** common origin at pPivot. Apply the changes of pPivot ==> pV1
66	** to pV2.
67	**
68	** The return is 0 upon complete success. If any input file is binary,
69	** -1 is returned and pOut is unmodified. If there are merge
70	** conflicts, the merge proceeds as best as it can and the number
71	** of conflicts is returns
72	*/
73	int blob_merge(Blob pPivot, Blob pV1, Blob pV2, Blob pOut){
74	int aC1; / Changes from pPivot to pV1 */
75	int aC2; / Changes from pPivot to pV2 */
76	int i1, i2;
77	int op1, op2;
78	int limit1, limit2;
79	int inConflict = 0;
80	int nConflict = 0;
81	static const char zBegin[] = ">>>>>>>> BEGIN MERGE CONFLICT <<<<<<<<\n";
82	static const char zEnd[] = ">>>>>>>>> END MERGE CONFLICT <<<<<<<<<\n";
83
84	aC1 = text_diff(pPivot, pV1, 0, 0);
85	aC2 = text_diff(pPivot, pV2, 0, 0);
86
87	if( aC2==0 \|\| aC2==0 ){
88	free(aC1);
89	free(aC2);
90	return -1;
91	}
92	blob_zero(pOut);
93	blob_rewind(pV1);
94	blob_rewind(pV2);
95	blob_rewind(pPivot);
96
97	for(i1=0; aC1[i1] \|\| aC1[i1+1] \|\| aC1[i1+2]; i1+=3){}
98	limit1 = i1;
99	for(i2=0; aC2[i2] \|\| aC2[i2+1] \|\| aC2[i2+2]; i2+=3){}
100	limit2 = i2;
101
102	DEBUG(
103	for(i1=0; i1<limit1; i1+=3){
104	printf("c1: %4d %4d %4d\n", aC1[i1], aC1[i1+1], aC1[i1+2]);
105	}
106	for(i2=0; i2<limit2; i2+=3){
107	printf("c2: %4d %4d %4d\n", aC2[i2], aC2[i2+1], aC2[i2+2]);
108	}
109	)
110
111	op1 = op2 = UNK;
112	i1 = i2 = 0;
113	while( i1<limit1 && aC1[i1]==0 ){ i1++; }
114	while( i2<limit2 && aC2[i2]==0 ){ i2++; }
115
116	while(1){
117	if( op1==UNK ){
118	if( i1>=limit1 ){
119	op1 = END;
120	DEBUG( printf("get op1=END\n"); )
121	}else{
122	op1 = i1 % 3;
123	aC1[i1]--;
124	DEBUG( printf("get op1=%d from %d (%d->%d)\n",
125	op1,i1,aC1[i1]+1,aC1[i1]); )
126	while( i1<limit1 && aC1[i1]==0 ){ i1++; }
127	}
128	}
129	if( op2==UNK ){
130	if( i2>=limit2 ){
131	op2 = END;
132	DEBUG( printf("get op2=END\n"); )
133	}else{
134	op2 = i2 % 3;
135	aC2[i2]--;
136	DEBUG( printf("get op2=%d from %d (%d->%d)\n",
137	op2,i2,aC2[i2]+1,aC2[i2]); )
138	while( i2<limit2 && aC2[i2]==0 ){ i2++; }
139	}
140	}
141	DEBUG( printf("op1=%d op2=%d\n", op1, op2); )
142	if( op1==END ){
143	if( op2==INS ){
144	blob_copy_lines(pOut, pV2, 1);
145	op2 = UNK;
146	}else{
147	break;
148	}
149	}else if( op2==END ){
150	if( op1==INS ){
151	blob_copy_lines(pOut, pV1, 1);
152	op1 = UNK;
153	}else{
154	break;
155	}
156	}else if( op1==INS && op2==INS ){
157	if( !inConflict && sameLine(pV1, pV2) ){
158	blob_copy_lines(pOut, pV2, 1);
159	blob_copy_lines(0, pV1, 0);
160	op1 = UNK;
161	op2 = UNK;
162	}else{
163	if( !inConflict ){
164	inConflict = 1;
165	nConflict++;
166	blob_appendf(pOut, zBegin);
167	}
168	blob_copy_lines(pOut, pV1, 1);
169	op1 = UNK;
170	}
171	}else if( op1==INS ){
172	blob_copy_lines(pOut, pV1, 1);
173	op1 = UNK;
174	}else if( op2==INS ){
175	blob_copy_lines(pOut, pV2, 1);
176	op2 = UNK;
177	}else{
178	if( inConflict ){
179	inConflict = 0;
180	blob_appendf(pOut, zEnd);
181	}
182	if( op1==DEL \|\| op2==DEL ){
183	blob_copy_lines(0, pPivot, 1);
184	if( op2==CPY ){
185	blob_copy_lines(0, pV2, 1);
186	}
187	if( op1==CPY ){
188	blob_copy_lines(0, pV1, 1);
189	}
190	}else{
191	assert( op1==CPY && op2==CPY );
192	blob_copy_lines(pOut, pPivot, 1);
193	blob_copy_lines(0, pV1, 1);
194	blob_copy_lines(0, pV2, 1);
195	}
196	op1 = UNK;
197	op2 = UNK;
198	}
199	}
200	if( inConflict ){
201	blob_appendf(pOut, zEnd);
202	}
203
204	free(aC1);
205	free(aC2);
206	return nConflict;
207	}
208
209	/*
210	** COMMAND: test-3-way-merge
211	**
	@@ -675,43 +238,5 @@
238	blob_reset(&pivot);
239	blob_reset(&v1);
240	blob_reset(&v2);
241	blob_reset(&merged);
242	}






































243

M src/update.c

+11 -2

		--- src/update.c
		+++ src/update.c
		@@ -206,25 +206,34 @@
206	206	free(zFullPath);
207	207	}
208	208	}else if( idt>0 && idv>0 && ridt!=ridv && chnged ){
209	209	/* Merge the changes in the current tree into the target version */
210	210	Blob e, r, t, v;
	211	+ int rc;
211	212	char *zFullPath;
212	213	printf("MERGE %s\n", zName);
213	214	undo_save(zName);
214	215	zFullPath = mprintf("%s/%s", g.zLocalRoot, zName);
215	216	content_get(ridt, &t);
216	217	content_get(ridv, &v);
217	218	blob_zero(&e);
218	219	blob_read_from_file(&e, zFullPath);
219		- blob_merge(&v, &e, &t, &r);
220		- blob_write_to_file(&r, zFullPath);
	220	+ rc = blob_merge(&v, &e, &t, &r);
	221	+ if( rc>=0 ){
	222	+ blob_write_to_file(&r, zFullPath);
	223	+ if( rc>0 ){
	224	+ printf("***** %d merge conflicts in %s\n", rc, zName);
	225	+ }
	226	+ }else{
	227	+ printf("***** Cannot merge binary file %s\n", zName);
	228	+ }
221	229	free(zFullPath);
222	230	blob_reset(&v);
223	231	blob_reset(&e);
224	232	blob_reset(&t);
225	233	blob_reset(&r);
	234	+
226	235	}
227	236	}
228	237	db_finalize(&q);
229	238
230	239	/*
231	240

	--- src/update.c
	+++ src/update.c
	@@ -206,25 +206,34 @@
206	free(zFullPath);
207	}
208	}else if( idt>0 && idv>0 && ridt!=ridv && chnged ){
209	/* Merge the changes in the current tree into the target version */
210	Blob e, r, t, v;

211	char *zFullPath;
212	printf("MERGE %s\n", zName);
213	undo_save(zName);
214	zFullPath = mprintf("%s/%s", g.zLocalRoot, zName);
215	content_get(ridt, &t);
216	content_get(ridv, &v);
217	blob_zero(&e);
218	blob_read_from_file(&e, zFullPath);
219	blob_merge(&v, &e, &t, &r);
220	blob_write_to_file(&r, zFullPath);







221	free(zFullPath);
222	blob_reset(&v);
223	blob_reset(&e);
224	blob_reset(&t);
225	blob_reset(&r);

226	}
227	}
228	db_finalize(&q);
229
230	/*
231

	--- src/update.c
	+++ src/update.c
	@@ -206,25 +206,34 @@
206	free(zFullPath);
207	}
208	}else if( idt>0 && idv>0 && ridt!=ridv && chnged ){
209	/* Merge the changes in the current tree into the target version */
210	Blob e, r, t, v;
211	int rc;
212	char *zFullPath;
213	printf("MERGE %s\n", zName);
214	undo_save(zName);
215	zFullPath = mprintf("%s/%s", g.zLocalRoot, zName);
216	content_get(ridt, &t);
217	content_get(ridv, &v);
218	blob_zero(&e);
219	blob_read_from_file(&e, zFullPath);
220	rc = blob_merge(&v, &e, &t, &r);
221	if( rc>=0 ){
222	blob_write_to_file(&r, zFullPath);
223	if( rc>0 ){
224	printf("***** %d merge conflicts in %s\n", rc, zName);
225	}
226	}else{
227	printf("***** Cannot merge binary file %s\n", zName);
228	}
229	free(zFullPath);
230	blob_reset(&v);
231	blob_reset(&e);
232	blob_reset(&t);
233	blob_reset(&r);
234
235	}
236	}
237	db_finalize(&q);
238
239	/*
240

M test/merge1.test

+15 -34

		--- test/merge1.test
		+++ test/merge1.test
		@@ -77,18 +77,31 @@
77	77	333 - This is a test of the merging algohm - 3333
78	78	444 - If all goes well, we will be pleased - 4444
79	79	555 - we think it well and other stuff too - 5555
80	80	}
81	81	write_file_indented t23 {
82		- 111 - This is line ONE OF the demo program - 1111
	82	+ >>>>>>>> BEGIN MERGE CONFLICT <<<<<<<<
	83	+ 111 - This is line ONE of the demo program - 1111
	84	+ 111 - This is line one OF the demo program - 1111
	85	+ >>>>>>>>> END MERGE CONFLICT <<<<<<<<<
	86	+ 222 - The second line program line in code - 2222
	87	+ 333 - This is a test of the merging algohm - 3333
	88	+ 444 - If all goes well, we will be pleased - 4444
	89	+ 555 - we think it well and other stuff too - 5555
	90	+}
	91	+write_file_indented t32 {
	92	+ >>>>>>>> BEGIN MERGE CONFLICT <<<<<<<<
	93	+ 111 - This is line one OF the demo program - 1111
	94	+ 111 - This is line ONE of the demo program - 1111
	95	+ >>>>>>>>> END MERGE CONFLICT <<<<<<<<<
83	96	222 - The second line program line in code - 2222
84	97	333 - This is a test of the merging algohm - 3333
85	98	444 - If all goes well, we will be pleased - 4444
86	99	555 - we think it well and other stuff too - 5555
87	100	}
88	101	fossil test-3 t1 t3 t2 a32
89		-test merge1-2.1 {[same_file t23 a32]}
	102	+test merge1-2.1 {[same_file t32 a32]}
90	103	fossil test-3 t1 t2 t3 a23
91	104	test merge1-2.2 {[same_file t23 a23]}
92	105
93	106	write_file_indented t1 {
94	107	111 - This is line one of the demo program - 1111
		@@ -222,37 +235,5 @@
222	235	}
223	236	fossil test-3 t1 t3 t2 a32
224	237	test merge1-6.1 {[same_file t32 a32]}
225	238	fossil test-3 t1 t2 t3 a23
226	239	test merge1-6.2 {[same_file t32 a23]}
227		-
228		-# 123456789 123456789 123456789 123456789 123456789 123456789
229		-write_file_indented t1 {
230		- 111 - This is line one of the demo program - 1111
231		- 222 - The second line program line in code - 2222
232		- 333 - This is a test of the merging algohm - 3333
233		- 444 - If all goes well, we will be pleased - 4444
234		- 555 - we think it well and other stuff too - 5555
235		-}
236		-write_file_indented t2 {
237		- 222 - The second line program line in code - 2222
238		- 333 - This is a test of THREE rging algohm - 3333
239		- 444 - If all goes well, we will be pleased - 4444
240		- 111 - This is line one of the demo program - 1111
241		- 555 - we think it well and other stuff too - 5555
242		-}
243		-write_file_indented t3 {
244		- 111 - This is line ONEONE the demo program - 1111
245		- 222 - The second line program line in code - 2222
246		- 333 - This is a test of the merging algohm - 3333
247		- 444 - If all goes well, we will be pleased - 4444
248		- 555 - we think it FIVEFIVE other stuff too - 5555
249		-}
250		-write_file_indented t32 {
251		- 222 - The second line program line in code - 2222
252		- 333 - This is a test of THREE rging algohm - 3333
253		- 444 - If all goes well, we will be pleased - 4444
254		- 111 - This is line ONEONE the demo program - 1111
255		- 555 - we think it FIVEFIVE other stuff too - 5555
256		-}
257		-fossil test-3 t1 t3 t2 a32
258		-test merge1-6.1 {[same_file t32 a32]}
259	240

	--- test/merge1.test
	+++ test/merge1.test
	@@ -77,18 +77,31 @@
77	333 - This is a test of the merging algohm - 3333
78	444 - If all goes well, we will be pleased - 4444
79	555 - we think it well and other stuff too - 5555
80	}
81	write_file_indented t23 {
82	111 - This is line ONE OF the demo program - 1111













83	222 - The second line program line in code - 2222
84	333 - This is a test of the merging algohm - 3333
85	444 - If all goes well, we will be pleased - 4444
86	555 - we think it well and other stuff too - 5555
87	}
88	fossil test-3 t1 t3 t2 a32
89	test merge1-2.1 {[same_file t23 a32]}
90	fossil test-3 t1 t2 t3 a23
91	test merge1-2.2 {[same_file t23 a23]}
92
93	write_file_indented t1 {
94	111 - This is line one of the demo program - 1111
	@@ -222,37 +235,5 @@
222	}
223	fossil test-3 t1 t3 t2 a32
224	test merge1-6.1 {[same_file t32 a32]}
225	fossil test-3 t1 t2 t3 a23
226	test merge1-6.2 {[same_file t32 a23]}
227
228	# 123456789 123456789 123456789 123456789 123456789 123456789
229	write_file_indented t1 {
230	111 - This is line one of the demo program - 1111
231	222 - The second line program line in code - 2222
232	333 - This is a test of the merging algohm - 3333
233	444 - If all goes well, we will be pleased - 4444
234	555 - we think it well and other stuff too - 5555
235	}
236	write_file_indented t2 {
237	222 - The second line program line in code - 2222
238	333 - This is a test of THREE rging algohm - 3333
239	444 - If all goes well, we will be pleased - 4444
240	111 - This is line one of the demo program - 1111
241	555 - we think it well and other stuff too - 5555
242	}
243	write_file_indented t3 {
244	111 - This is line ONEONE the demo program - 1111
245	222 - The second line program line in code - 2222
246	333 - This is a test of the merging algohm - 3333
247	444 - If all goes well, we will be pleased - 4444
248	555 - we think it FIVEFIVE other stuff too - 5555
249	}
250	write_file_indented t32 {
251	222 - The second line program line in code - 2222
252	333 - This is a test of THREE rging algohm - 3333
253	444 - If all goes well, we will be pleased - 4444
254	111 - This is line ONEONE the demo program - 1111
255	555 - we think it FIVEFIVE other stuff too - 5555
256	}
257	fossil test-3 t1 t3 t2 a32
258	test merge1-6.1 {[same_file t32 a32]}
259

	--- test/merge1.test
	+++ test/merge1.test
	@@ -77,18 +77,31 @@
77	333 - This is a test of the merging algohm - 3333
78	444 - If all goes well, we will be pleased - 4444
79	555 - we think it well and other stuff too - 5555
80	}
81	write_file_indented t23 {
82	>>>>>>>> BEGIN MERGE CONFLICT <<<<<<<<
83	111 - This is line ONE of the demo program - 1111
84	111 - This is line one OF the demo program - 1111
85	>>>>>>>>> END MERGE CONFLICT <<<<<<<<<
86	222 - The second line program line in code - 2222
87	333 - This is a test of the merging algohm - 3333
88	444 - If all goes well, we will be pleased - 4444
89	555 - we think it well and other stuff too - 5555
90	}
91	write_file_indented t32 {
92	>>>>>>>> BEGIN MERGE CONFLICT <<<<<<<<
93	111 - This is line one OF the demo program - 1111
94	111 - This is line ONE of the demo program - 1111
95	>>>>>>>>> END MERGE CONFLICT <<<<<<<<<
96	222 - The second line program line in code - 2222
97	333 - This is a test of the merging algohm - 3333
98	444 - If all goes well, we will be pleased - 4444
99	555 - we think it well and other stuff too - 5555
100	}
101	fossil test-3 t1 t3 t2 a32
102	test merge1-2.1 {[same_file t32 a32]}
103	fossil test-3 t1 t2 t3 a23
104	test merge1-2.2 {[same_file t23 a23]}
105
106	write_file_indented t1 {
107	111 - This is line one of the demo program - 1111
	@@ -222,37 +235,5 @@
235	}
236	fossil test-3 t1 t3 t2 a32
237	test merge1-6.1 {[same_file t32 a32]}
238	fossil test-3 t1 t2 t3 a23
239	test merge1-6.2 {[same_file t32 a23]}
































240

M test/tester.tcl

+5 -1

		--- test/tester.tcl
		+++ test/tester.tcl
		@@ -87,11 +87,15 @@
87	87	}
88	88
89	89	# Return true if two files are the same
90	90	#
91	91	proc same_file {a b} {
92		- return [expr {[read_file $a]==[read_file $b]}]
	92	+ set x [read_file $a]
	93	+ regsub -all { +\n} $x \n x
	94	+ set y [read_file $b]
	95	+ regsub -all { +\n} $y \n y
	96	+ return [expr {$x==$y}]
93	97	}
94	98
95	99	# Perform a test
96	100	#
97	101	proc test {name expr} {
98	102

	--- test/tester.tcl
	+++ test/tester.tcl
	@@ -87,11 +87,15 @@
87	}
88
89	# Return true if two files are the same
90	#
91	proc same_file {a b} {
92	return [expr {[read_file $a]==[read_file $b]}]




93	}
94
95	# Perform a test
96	#
97	proc test {name expr} {
98

	--- test/tester.tcl
	+++ test/tester.tcl
	@@ -87,11 +87,15 @@
87	}
88
89	# Return true if two files are the same
90	#
91	proc same_file {a b} {
92	set x [read_file $a]
93	regsub -all { +\n} $x \n x
94	set y [read_file $b]
95	regsub -all { +\n} $y \n y
96	return [expr {$x==$y}]
97	}
98
99	# Perform a test
100	#
101	proc test {name expr} {
102

Fossil SCM

Keyboard Shortcuts