Fossil SCM

Steps toward doing a better job of automatically resolving merge conflicts. Compiles but does not work. This is an incremental check-in.

drh 2024-12-05 12:15 trunk

Commit 849c7eb6ca68061212b4f944e7f57fe11f265215c4fffb4cc1cef5389a5ac16d

Parent 2ec8a7ae388d067…

3 files changed +48 -1 +135 -5 +100 -1

~ src/blob.c ~ src/diff.c ~ src/merge3.c

M src/blob.c

+48 -1

		--- src/blob.c
		+++ src/blob.c
		@@ -665,11 +665,12 @@
665	665	pBlob->nUsed = dehttpize(pBlob->aData);
666	666	}
667	667
668	668	/*
669	669	** Extract N bytes from blob pFrom and use it to initialize blob pTo.
670		-** Return the actual number of bytes extracted.
	670	+** Return the actual number of bytes extracted. The cursor position
	671	+** is advanced by the number of bytes extracted.
671	672	**
672	673	** After this call completes, pTo will be an ephemeral blob.
673	674	*/
674	675	int blob_extract(Blob pFrom, int N, Blob pTo){
675	676	blob_is_init(pFrom);
		@@ -687,10 +688,56 @@
687	688	pTo->iCursor = 0;
688	689	pTo->xRealloc = blobReallocStatic;
689	690	pFrom->iCursor += N;
690	691	return N;
691	692	}
	693	+
	694	+/*
	695	+ Extract N lines** of text from blob pFrom beginning at the current
	696	+** cursor position and use that text to initialize blob pTo. Unlike the
	697	+** blob_extract() routine, the cursor position is unchanged.
	698	+**
	699	+** pTo is assumed to be uninitialized.
	700	+**
	701	+** After this call completes, pTo will be an ephemeral blob.
	702	+*/
	703	+int blob_extract_lines(Blob pFrom, int N, Blob pTo){
	704	+ int i;
	705	+ int mx;
	706	+ int iStart;
	707	+ int n;
	708	+ const char *z;
	709	+
	710	+ blob_zero(pTo);
	711	+ z = pFrom->aData;
	712	+ i = pFrom->iCursor;
	713	+ mx = pFrom->nUsed;
	714	+ while( N>0 ){
	715	+ while( i<mx && z[i]!='\n' ){ i++; }
	716	+ if( i>=mx ) break;
	717	+ i++;
	718	+ }
	719	+ iStart = pFrom->iCursor;
	720	+ n = blob_extract(pFrom, i-pFrom->iCursor, pTo);
	721	+ pFrom->iCursor = iStart;
	722	+ return n;
	723	+}
	724	+
	725	+/*
	726	+** Return the number of lines of text in the blob. If the last
	727	+** line is incomplete (if it does not have a \n at the end) then
	728	+** it still counts.
	729	+*/
	730	+int blob_linecount(Blob *p){
	731	+ int n = 0;
	732	+ int i;
	733	+ for(i=0; i<p->nUsed; i++){
	734	+ if( p->aData[i]=='\n' ) n++;
	735	+ }
	736	+ if( p->nUsed>0 && p->aData[p->nUsed-1]!='\n' ) n++;
	737	+ return n;
	738	+}
692	739
693	740	/*
694	741	** Rewind the cursor on a blob back to the beginning.
695	742	*/
696	743	void blob_rewind(Blob *p){
697	744

	--- src/blob.c
	+++ src/blob.c
	@@ -665,11 +665,12 @@
665	pBlob->nUsed = dehttpize(pBlob->aData);
666	}
667
668	/*
669	** Extract N bytes from blob pFrom and use it to initialize blob pTo.
670	** Return the actual number of bytes extracted.

671	**
672	** After this call completes, pTo will be an ephemeral blob.
673	*/
674	int blob_extract(Blob pFrom, int N, Blob pTo){
675	blob_is_init(pFrom);
	@@ -687,10 +688,56 @@
687	pTo->iCursor = 0;
688	pTo->xRealloc = blobReallocStatic;
689	pFrom->iCursor += N;
690	return N;
691	}














































692
693	/*
694	** Rewind the cursor on a blob back to the beginning.
695	*/
696	void blob_rewind(Blob *p){
697

	--- src/blob.c
	+++ src/blob.c
	@@ -665,11 +665,12 @@
665	pBlob->nUsed = dehttpize(pBlob->aData);
666	}
667
668	/*
669	** Extract N bytes from blob pFrom and use it to initialize blob pTo.
670	** Return the actual number of bytes extracted. The cursor position
671	** is advanced by the number of bytes extracted.
672	**
673	** After this call completes, pTo will be an ephemeral blob.
674	*/
675	int blob_extract(Blob pFrom, int N, Blob pTo){
676	blob_is_init(pFrom);
	@@ -687,10 +688,56 @@
688	pTo->iCursor = 0;
689	pTo->xRealloc = blobReallocStatic;
690	pFrom->iCursor += N;
691	return N;
692	}
693
694	/*
695	Extract N lines** of text from blob pFrom beginning at the current
696	** cursor position and use that text to initialize blob pTo. Unlike the
697	** blob_extract() routine, the cursor position is unchanged.
698	**
699	** pTo is assumed to be uninitialized.
700	**
701	** After this call completes, pTo will be an ephemeral blob.
702	*/
703	int blob_extract_lines(Blob pFrom, int N, Blob pTo){
704	int i;
705	int mx;
706	int iStart;
707	int n;
708	const char *z;
709
710	blob_zero(pTo);
711	z = pFrom->aData;
712	i = pFrom->iCursor;
713	mx = pFrom->nUsed;
714	while( N>0 ){
715	while( i<mx && z[i]!='\n' ){ i++; }
716	if( i>=mx ) break;
717	i++;
718	}
719	iStart = pFrom->iCursor;
720	n = blob_extract(pFrom, i-pFrom->iCursor, pTo);
721	pFrom->iCursor = iStart;
722	return n;
723	}
724
725	/*
726	** Return the number of lines of text in the blob. If the last
727	** line is incomplete (if it does not have a \n at the end) then
728	** it still counts.
729	*/
730	int blob_linecount(Blob *p){
731	int n = 0;
732	int i;
733	for(i=0; i<p->nUsed; i++){
734	if( p->aData[i]=='\n' ) n++;
735	}
736	if( p->nUsed>0 && p->aData[p->nUsed-1]!='\n' ) n++;
737	return n;
738	}
739
740	/*
741	** Rewind the cursor on a blob back to the beginning.
742	*/
743	void blob_rewind(Blob *p){
744

M src/diff.c

+135 -5

		--- src/diff.c
		+++ src/diff.c
		@@ -50,10 +50,11 @@
50	50	#define DIFF_RAW 0x00040000 /* Raw triples - for debugging */
51	51	#define DIFF_TCL 0x00080000 /* For the --tk option */
52	52	#define DIFF_INCBINARY 0x00100000 /* The --diff-binary option */
53	53	#define DIFF_SHOW_VERS 0x00200000 /* Show compared versions */
54	54	#define DIFF_DARKMODE 0x00400000 /* Use dark mode for HTML */
	55	+#define DIFF_BY_TOKEN 0x01000000 /* Split on tokens, not lines */
55	56
56	57	/*
57	58	** Per file information that may influence output.
58	59	*/
59	60	#define DIFF_FILE_ADDED 0x40000000 /* Added or rename destination */
		@@ -319,10 +320,113 @@
319	320
320	321	/* Return results */
321	322	*pnLine = nLine;
322	323	return a;
323	324	}
	325	+
	326	+/*
	327	+** Character classes for the purpose of tokenization.
	328	+**
	329	+** 1 - alphanumeric
	330	+** 2 - whitespace
	331	+** 3 - punctuation
	332	+*/
	333	+static char aTCharClass[256] = {
	334	+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	335	+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	336	+ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
	337	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3,
	338	+
	339	+ 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	340	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
	341	+ 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	342	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
	343	+
	344	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	345	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	346	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	347	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	348	+
	349	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	350	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	351	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	352	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
	353	+};
	354	+
	355	+/*
	356	+** Count the number of tokens in the given string.
	357	+*/
	358	+static int count_tokens(const unsigned char *p, int n){
	359	+ int nToken = 0;
	360	+ int iPrev = 0;
	361	+ int i;
	362	+ for(i=0; i<n; i++){
	363	+ char x = aTCharClass[p[i]];
	364	+ if( x!=iPrev ){
	365	+ iPrev = x;
	366	+ nToken++;
	367	+ }
	368	+ }
	369	+ return nToken;
	370	+}
	371	+
	372	+/*
	373	+** Return an array of DLine objects containing a pointer to the
	374	+** start of each token and a hash of that token. The lower
	375	+** bits of the hash store the length of each token.
	376	+**
	377	+** This is like break_into_lines() except that it works with tokens
	378	+** instead of lines. A token is:
	379	+**
	380	+** * A contiguous sequence of alphanumeric characters.
	381	+** * A contiguous sequence of whitespace
	382	+** * A contiguous sequence of punctuation characters.
	383	+**
	384	+** Return 0 if the file is binary or contains a line that is
	385	+** too long.
	386	+*/
	387	+static DLine *break_into_tokens(
	388	+ const char *z,
	389	+ int n,
	390	+ int *pnToken,
	391	+ u64 diffFlags
	392	+){
	393	+ int nToken, i, k;
	394	+ u64 h, h2;
	395	+ DLine *a;
	396	+ unsigned char p = (unsigned char)z;
	397	+
	398	+ nToken = count_tokens(p, n);
	399	+ a = fossil_malloc( sizeof(a[0])*(nToken+1) );
	400	+ memset(a, 0, sizeof(a[0])*(nToken+1));
	401	+ if( n==0 ){
	402	+ *pnToken = 0;
	403	+ return a;
	404	+ }
	405	+ i = 0;
	406	+ while( n>0 ){
	407	+ char x = aTCharClass[*p];
	408	+ h = 0xcbf29ce484222325LL;
	409	+ for(k=1; k<n && aTCharClass[p[k]]==x; k++){
	410	+ h ^= p[k];
	411	+ h *= 0x100000001b3LL;
	412	+ }
	413	+ a[i].z = (char*)p;
	414	+ a[i].n = k;
	415	+ a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) \| k;
	416	+ h2 = h % nToken;
	417	+ a[i].iNext = a[h2].iHash;
	418	+ a[h2].iHash = i+1;
	419	+ p += k; n -= k;
	420	+ i++;
	421	+ };
	422	+ assert( i==nToken );
	423	+
	424	+ /* Return results */
	425	+ *pnToken = nToken;
	426	+ return a;
	427	+}
324	428
325	429	/*
326	430	** Return zero if two DLine elements are identical.
327	431	*/
328	432	static int compare_dline(const DLine pA, const DLine pB){
		@@ -2997,14 +3101,21 @@
2997	3101	if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){
2998	3102	c.xDiffer = compare_dline_ignore_allws;
2999	3103	}else{
3000	3104	c.xDiffer = compare_dline;
3001	3105	}
3002		- c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
3003		- &c.nFrom, pCfg->diffFlags);
3004		- c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
3005		- &c.nTo, pCfg->diffFlags);
	3106	+ if( pCfg->diffFlags & DIFF_BY_TOKEN ){
	3107	+ c.aFrom = break_into_tokens(blob_str(pA_Blob), blob_size(pA_Blob),
	3108	+ &c.nFrom, pCfg->diffFlags);
	3109	+ c.aTo = break_into_tokens(blob_str(pB_Blob), blob_size(pB_Blob),
	3110	+ &c.nTo, pCfg->diffFlags);
	3111	+ }else{
	3112	+ c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
	3113	+ &c.nFrom, pCfg->diffFlags);
	3114	+ c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
	3115	+ &c.nTo, pCfg->diffFlags);
	3116	+ }
3006	3117	if( c.aFrom==0 \|\| c.aTo==0 ){
3007	3118	fossil_free(c.aFrom);
3008	3119	fossil_free(c.aTo);
3009	3120	if( pOut ){
3010	3121	diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, pCfg->diffFlags);
		@@ -3035,10 +3146,26 @@
3035	3146	}
3036	3147	}
3037	3148	if( (pCfg->diffFlags & DIFF_NOOPT)==0 ){
3038	3149	diff_optimize(&c);
3039	3150	}
	3151	+ if( (pCfg->diffFlags & DIFF_BY_TOKEN)!=0 ){
	3152	+ /* Convert token counts into byte counts. */
	3153	+ int i;
	3154	+ int iA = 0;
	3155	+ int iB = 0;
	3156	+ for(i=0; c.aEdit[i] \|\| c.aEdit[i+1] \|\| c.aEdit[i+2]; i+=3){
	3157	+ int k, sum;
	3158	+ for(k=0, sum=0; k<c.aEdit[i]; k++) sum += c.aFrom[iA++].n;
	3159	+ iB += c.aEdit[i];
	3160	+ c.aEdit[i] = sum;
	3161	+ for(k=0, sum=0; k<c.aEdit[i+1]; k++) sum += c.aFrom[iA++].n;
	3162	+ c.aEdit[i+1] = sum;
	3163	+ for(k=0, sum=0; k<c.aEdit[i+2]; k++) sum += c.aTo[iB++].n;
	3164	+ c.aEdit[i+2] = sum;
	3165	+ }
	3166	+ }
3040	3167
3041	3168	if( pOut ){
3042	3169	if( pCfg->diffFlags & DIFF_NUMSTAT ){
3043	3170	int nDel = 0, nIns = 0, i;
3044	3171	for(i=0; c.aEdit[i] \|\| c.aEdit[i+1] \|\| c.aEdit[i+2]; i+=3){
		@@ -3049,11 +3176,11 @@
3049	3176	g.diffCnt[2] += nDel;
3050	3177	if( nIns+nDel ){
3051	3178	g.diffCnt[0]++;
3052	3179	blob_appendf(pOut, "%10d %10d", nIns, nDel);
3053	3180	}
3054		- }else if( pCfg->diffFlags & DIFF_RAW ){
	3181	+ }else if( pCfg->diffFlags & (DIFF_RAW\|DIFF_BY_TOKEN) ){
3055	3182	const int *R = c.aEdit;
3056	3183	unsigned int r;
3057	3184	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
3058	3185	blob_appendf(pOut, " copy %6d delete %6d insert %6d\n",
3059	3186	R[r], R[r+1], R[r+2]);
		@@ -3157,10 +3284,13 @@
3157	3284
3158	3285	/* Undocumented and unsupported flags used for development
3159	3286	** debugging and analysis: */
3160	3287	if( find_option("debug",0,0)!=0 ) diffFlags \|= DIFF_DEBUG;
3161	3288	if( find_option("raw",0,0)!=0 ) diffFlags \|= DIFF_RAW;
	3289	+ if( find_option("bytoken",0,0)!=0 ){
	3290	+ diffFlags = DIFF_RAW\|DIFF_BY_TOKEN;
	3291	+ }
3162	3292	}
3163	3293	if( (z = find_option("context","c",1))!=0 ){
3164	3294	char *zEnd;
3165	3295	f = (int)strtol(z, &zEnd, 10);
3166	3296	if( zEnd[0]==0 && errno!=ERANGE ){
3167	3297

	--- src/diff.c
	+++ src/diff.c
	@@ -50,10 +50,11 @@
50	#define DIFF_RAW 0x00040000 /* Raw triples - for debugging */
51	#define DIFF_TCL 0x00080000 /* For the --tk option */
52	#define DIFF_INCBINARY 0x00100000 /* The --diff-binary option */
53	#define DIFF_SHOW_VERS 0x00200000 /* Show compared versions */
54	#define DIFF_DARKMODE 0x00400000 /* Use dark mode for HTML */

55
56	/*
57	** Per file information that may influence output.
58	*/
59	#define DIFF_FILE_ADDED 0x40000000 /* Added or rename destination */
	@@ -319,10 +320,113 @@
319
320	/* Return results */
321	*pnLine = nLine;
322	return a;
323	}







































































































324
325	/*
326	** Return zero if two DLine elements are identical.
327	*/
328	static int compare_dline(const DLine pA, const DLine pB){
	@@ -2997,14 +3101,21 @@
2997	if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){
2998	c.xDiffer = compare_dline_ignore_allws;
2999	}else{
3000	c.xDiffer = compare_dline;
3001	}
3002	c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
3003	&c.nFrom, pCfg->diffFlags);
3004	c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
3005	&c.nTo, pCfg->diffFlags);







3006	if( c.aFrom==0 \|\| c.aTo==0 ){
3007	fossil_free(c.aFrom);
3008	fossil_free(c.aTo);
3009	if( pOut ){
3010	diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, pCfg->diffFlags);
	@@ -3035,10 +3146,26 @@
3035	}
3036	}
3037	if( (pCfg->diffFlags & DIFF_NOOPT)==0 ){
3038	diff_optimize(&c);
3039	}
















3040
3041	if( pOut ){
3042	if( pCfg->diffFlags & DIFF_NUMSTAT ){
3043	int nDel = 0, nIns = 0, i;
3044	for(i=0; c.aEdit[i] \|\| c.aEdit[i+1] \|\| c.aEdit[i+2]; i+=3){
	@@ -3049,11 +3176,11 @@
3049	g.diffCnt[2] += nDel;
3050	if( nIns+nDel ){
3051	g.diffCnt[0]++;
3052	blob_appendf(pOut, "%10d %10d", nIns, nDel);
3053	}
3054	}else if( pCfg->diffFlags & DIFF_RAW ){
3055	const int *R = c.aEdit;
3056	unsigned int r;
3057	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
3058	blob_appendf(pOut, " copy %6d delete %6d insert %6d\n",
3059	R[r], R[r+1], R[r+2]);
	@@ -3157,10 +3284,13 @@
3157
3158	/* Undocumented and unsupported flags used for development
3159	** debugging and analysis: */
3160	if( find_option("debug",0,0)!=0 ) diffFlags \|= DIFF_DEBUG;
3161	if( find_option("raw",0,0)!=0 ) diffFlags \|= DIFF_RAW;



3162	}
3163	if( (z = find_option("context","c",1))!=0 ){
3164	char *zEnd;
3165	f = (int)strtol(z, &zEnd, 10);
3166	if( zEnd[0]==0 && errno!=ERANGE ){
3167

	--- src/diff.c
	+++ src/diff.c
	@@ -50,10 +50,11 @@
50	#define DIFF_RAW 0x00040000 /* Raw triples - for debugging */
51	#define DIFF_TCL 0x00080000 /* For the --tk option */
52	#define DIFF_INCBINARY 0x00100000 /* The --diff-binary option */
53	#define DIFF_SHOW_VERS 0x00200000 /* Show compared versions */
54	#define DIFF_DARKMODE 0x00400000 /* Use dark mode for HTML */
55	#define DIFF_BY_TOKEN 0x01000000 /* Split on tokens, not lines */
56
57	/*
58	** Per file information that may influence output.
59	*/
60	#define DIFF_FILE_ADDED 0x40000000 /* Added or rename destination */
	@@ -319,10 +320,113 @@
320
321	/* Return results */
322	*pnLine = nLine;
323	return a;
324	}
325
326	/*
327	** Character classes for the purpose of tokenization.
328	**
329	** 1 - alphanumeric
330	** 2 - whitespace
331	** 3 - punctuation
332	*/
333	static char aTCharClass[256] = {
334	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
335	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
336	2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
337	1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3,
338
339	3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
341	3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3,
343
344	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
345	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
346	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
347	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348
349	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
351	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
352	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
353	};
354
355	/*
356	** Count the number of tokens in the given string.
357	*/
358	static int count_tokens(const unsigned char *p, int n){
359	int nToken = 0;
360	int iPrev = 0;
361	int i;
362	for(i=0; i<n; i++){
363	char x = aTCharClass[p[i]];
364	if( x!=iPrev ){
365	iPrev = x;
366	nToken++;
367	}
368	}
369	return nToken;
370	}
371
372	/*
373	** Return an array of DLine objects containing a pointer to the
374	** start of each token and a hash of that token. The lower
375	** bits of the hash store the length of each token.
376	**
377	** This is like break_into_lines() except that it works with tokens
378	** instead of lines. A token is:
379	**
380	** * A contiguous sequence of alphanumeric characters.
381	** * A contiguous sequence of whitespace
382	** * A contiguous sequence of punctuation characters.
383	**
384	** Return 0 if the file is binary or contains a line that is
385	** too long.
386	*/
387	static DLine *break_into_tokens(
388	const char *z,
389	int n,
390	int *pnToken,
391	u64 diffFlags
392	){
393	int nToken, i, k;
394	u64 h, h2;
395	DLine *a;
396	unsigned char p = (unsigned char)z;
397
398	nToken = count_tokens(p, n);
399	a = fossil_malloc( sizeof(a[0])*(nToken+1) );
400	memset(a, 0, sizeof(a[0])*(nToken+1));
401	if( n==0 ){
402	*pnToken = 0;
403	return a;
404	}
405	i = 0;
406	while( n>0 ){
407	char x = aTCharClass[*p];
408	h = 0xcbf29ce484222325LL;
409	for(k=1; k<n && aTCharClass[p[k]]==x; k++){
410	h ^= p[k];
411	h *= 0x100000001b3LL;
412	}
413	a[i].z = (char*)p;
414	a[i].n = k;
415	a[i].h = h = ((h%281474976710597LL)<<LENGTH_MASK_SZ) \| k;
416	h2 = h % nToken;
417	a[i].iNext = a[h2].iHash;
418	a[h2].iHash = i+1;
419	p += k; n -= k;
420	i++;
421	};
422	assert( i==nToken );
423
424	/* Return results */
425	*pnToken = nToken;
426	return a;
427	}
428
429	/*
430	** Return zero if two DLine elements are identical.
431	*/
432	static int compare_dline(const DLine pA, const DLine pB){
	@@ -2997,14 +3101,21 @@
3101	if( (pCfg->diffFlags & DIFF_IGNORE_ALLWS)==DIFF_IGNORE_ALLWS ){
3102	c.xDiffer = compare_dline_ignore_allws;
3103	}else{
3104	c.xDiffer = compare_dline;
3105	}
3106	if( pCfg->diffFlags & DIFF_BY_TOKEN ){
3107	c.aFrom = break_into_tokens(blob_str(pA_Blob), blob_size(pA_Blob),
3108	&c.nFrom, pCfg->diffFlags);
3109	c.aTo = break_into_tokens(blob_str(pB_Blob), blob_size(pB_Blob),
3110	&c.nTo, pCfg->diffFlags);
3111	}else{
3112	c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
3113	&c.nFrom, pCfg->diffFlags);
3114	c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
3115	&c.nTo, pCfg->diffFlags);
3116	}
3117	if( c.aFrom==0 \|\| c.aTo==0 ){
3118	fossil_free(c.aFrom);
3119	fossil_free(c.aTo);
3120	if( pOut ){
3121	diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, pCfg->diffFlags);
	@@ -3035,10 +3146,26 @@
3146	}
3147	}
3148	if( (pCfg->diffFlags & DIFF_NOOPT)==0 ){
3149	diff_optimize(&c);
3150	}
3151	if( (pCfg->diffFlags & DIFF_BY_TOKEN)!=0 ){
3152	/* Convert token counts into byte counts. */
3153	int i;
3154	int iA = 0;
3155	int iB = 0;
3156	for(i=0; c.aEdit[i] \|\| c.aEdit[i+1] \|\| c.aEdit[i+2]; i+=3){
3157	int k, sum;
3158	for(k=0, sum=0; k<c.aEdit[i]; k++) sum += c.aFrom[iA++].n;
3159	iB += c.aEdit[i];
3160	c.aEdit[i] = sum;
3161	for(k=0, sum=0; k<c.aEdit[i+1]; k++) sum += c.aFrom[iA++].n;
3162	c.aEdit[i+1] = sum;
3163	for(k=0, sum=0; k<c.aEdit[i+2]; k++) sum += c.aTo[iB++].n;
3164	c.aEdit[i+2] = sum;
3165	}
3166	}
3167
3168	if( pOut ){
3169	if( pCfg->diffFlags & DIFF_NUMSTAT ){
3170	int nDel = 0, nIns = 0, i;
3171	for(i=0; c.aEdit[i] \|\| c.aEdit[i+1] \|\| c.aEdit[i+2]; i+=3){
	@@ -3049,11 +3176,11 @@
3176	g.diffCnt[2] += nDel;
3177	if( nIns+nDel ){
3178	g.diffCnt[0]++;
3179	blob_appendf(pOut, "%10d %10d", nIns, nDel);
3180	}
3181	}else if( pCfg->diffFlags & (DIFF_RAW\|DIFF_BY_TOKEN) ){
3182	const int *R = c.aEdit;
3183	unsigned int r;
3184	for(r=0; R[r] \|\| R[r+1] \|\| R[r+2]; r += 3){
3185	blob_appendf(pOut, " copy %6d delete %6d insert %6d\n",
3186	R[r], R[r+1], R[r+2]);
	@@ -3157,10 +3284,13 @@
3284
3285	/* Undocumented and unsupported flags used for development
3286	** debugging and analysis: */
3287	if( find_option("debug",0,0)!=0 ) diffFlags \|= DIFF_DEBUG;
3288	if( find_option("raw",0,0)!=0 ) diffFlags \|= DIFF_RAW;
3289	if( find_option("bytoken",0,0)!=0 ){
3290	diffFlags = DIFF_RAW\|DIFF_BY_TOKEN;
3291	}
3292	}
3293	if( (z = find_option("context","c",1))!=0 ){
3294	char *zEnd;
3295	f = (int)strtol(z, &zEnd, 10);
3296	if( zEnd[0]==0 && errno!=ERANGE ){
3297

M src/merge3.c

+100 -1

		--- src/merge3.c
		+++ src/merge3.c
		@@ -261,10 +261,98 @@
261	261	p->xChngBoth = dbgChngBoth;
262	262	p->xConflict = dbgConflict;
263	263	p->xEnd = dbgStartEnd;
264	264	p->xDestroy = dbgDestroy;
265	265	}
	266	+
	267	+/*********************** MergeBuilderToken ******************************/
	268	+/* This version of MergeBuilder actually performs a merge on file that
	269	+** are broken up into tokens instead of lines, and puts the result in pOut.
	270	+*/
	271	+static void tokenSame(MergeBuilder *p, unsigned int N){
	272	+ blob_append(p->pOut, p->pPivot->aData+p->pPivot->iCursor, N);
	273	+ p->pPivot->iCursor += N;
	274	+ p->pV1->iCursor += N;
	275	+ p->pV2->iCursor += N;
	276	+}
	277	+static void tokenChngV1(MergeBuilder *p, unsigned int nPivot, unsigned nV1){
	278	+ blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
	279	+ p->pPivot->iCursor += nPivot;
	280	+ p->pV1->iCursor += nV1;
	281	+ p->pV2->iCursor += nPivot;
	282	+}
	283	+static void tokenChngV2(MergeBuilder *p, unsigned int nPivot, unsigned nV2){
	284	+ blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV2);
	285	+ p->pPivot->iCursor += nPivot;
	286	+ p->pV1->iCursor += nPivot;
	287	+ p->pV2->iCursor += nV2;
	288	+}
	289	+static void tokenChngBoth(MergeBuilder *p, unsigned int nPivot, unsigned nV){
	290	+ blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV);
	291	+ p->pPivot->iCursor += nPivot;
	292	+ p->pV1->iCursor += nV;
	293	+ p->pV2->iCursor += nV;
	294	+}
	295	+static void tokenConflict(
	296	+ MergeBuilder *p,
	297	+ unsigned int nPivot,
	298	+ unsigned int nV1,
	299	+ unsigned int nV2
	300	+){
	301	+ blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
	302	+ p->pPivot->iCursor += nPivot;
	303	+ p->pV1->iCursor += nV1;
	304	+ p->pV2->iCursor += nV2;
	305	+}
	306	+static void mergebuilder_init_token(MergeBuilder *p){
	307	+ mergebuilder_init(p);
	308	+ p->xSame = tokenSame;
	309	+ p->xChngV1 = tokenChngV1;
	310	+ p->xChngV2 = tokenChngV2;
	311	+ p->xChngBoth = tokenChngBoth;
	312	+ p->xConflict = tokenConflict;
	313	+}
	314	+
	315	+/*
	316	+** Attempt to do a low-level merge on a conflict. The conflict is
	317	+** described by the first four parameters, which are the same as the
	318	+** arguments to the xConflict method of the MergeBuilder object.
	319	+** This routine attempts to resolve the conflict by looking at
	320	+** elements of the conflict region that are finer grain than complete
	321	+** lines of text.
	322	+**
	323	+** The result is written into Blob pOut. pOut is initialized by this
	324	+** routine.
	325	+*/
	326	+int merge_try_to_resolve_conflict(
	327	+ MergeBuilder pMB, / MergeBuilder that encounter conflict */
	328	+ unsigned int nPivot, /* Lines of conflict in the pivot */
	329	+ unsigned int nV1, /* Lines of conflict in V1 */
	330	+ unsigned int nV2, /* Lines of conflict in V2 */
	331	+ Blob pOut / Write resolution text here */
	332	+){
	333	+ int nConflict;
	334	+ MergeBuilder mb;
	335	+ Blob pv, v1, v2;
	336	+ mergebuilder_init_token(&mb);
	337	+ blob_extract_lines(pMB->pPivot, nPivot, &pv);
	338	+ blob_extract_lines(pMB->pV1, nV1, &v1);
	339	+ blob_extract_lines(pMB->pV2, nV2, &v2);
	340	+ blob_zero(pOut);
	341	+ mb.pPivot = &pv;
	342	+ mb.pV1 = &v1;
	343	+ mb.pV2 = &v2;
	344	+ mb.pOut = pOut;
	345	+ nConflict = merge_three_blobs(&mb);
	346	+ /* pv, v1, and v2 should all be ephemeral blobs, so they do not
	347	+ ** need to be freed. */
	348	+ /* mb has not allocated any resources, so we do not need to invoke
	349	+ ** the xDestroy method. */
	350	+ blob_add_final_newline(pOut);
	351	+ return nConflict;
	352	+}
	353	+
266	354
267	355	/*********************** MergeBuilderText ********************************/
268	356	/* This version of MergeBuilder actually performs a merge on file and puts
269	357	** the result in pOut
270	358	*/
		@@ -520,12 +608,18 @@
520	608	unsigned int nV1,
521	609	unsigned int nV2
522	610	){
523	611	int mx = nPivot;
524	612	int i;
	613	+ int nOut;
	614	+ Blob out;
	615	+
	616	+ merge_try_to_resolve_conflict(p, nPivot, nV1, nV2, &out);
	617	+ nOut = blob_linecount(&out);
525	618	if( nV1>mx ) mx = nV1;
526	619	if( nV2>mx ) mx = nV2;
	620	+ if( nOut>mx ) mx = nOut;
527	621	for(i=0; i<mx; i++){
528	622	if( i<nPivot ){
529	623	tclLineOfText(p->pOut, p->pPivot);
530	624	}else{
531	625	blob_append_char(p->pOut, '.');
		@@ -540,12 +634,17 @@
540	634	if( i<nV2 ){
541	635	tclLineOfText(p->pOut, p->pV2);
542	636	}else{
543	637	blob_append_char(p->pOut, '.');
544	638	}
545		- blob_append(p->pOut, " X\n", 3);
	639	+ if( i<nOut ){
	640	+ tclLineOfText(p->pOut, &out);
	641	+ }else{
	642	+ blob_append(p->pOut, " X\n", 3);
	643	+ }
546	644	}
	645	+ blob_reset(&out);
547	646	p->lnPivot += nPivot;
548	647	p->lnV1 += nV1;
549	648	p->lnV2 += nV2;
550	649	}
551	650	void mergebuilder_init_tcl(MergeBuilder *p){
552	651

	--- src/merge3.c
	+++ src/merge3.c
	@@ -261,10 +261,98 @@
261	p->xChngBoth = dbgChngBoth;
262	p->xConflict = dbgConflict;
263	p->xEnd = dbgStartEnd;
264	p->xDestroy = dbgDestroy;
265	}
























































































266
267	/*********************** MergeBuilderText ********************************/
268	/* This version of MergeBuilder actually performs a merge on file and puts
269	** the result in pOut
270	*/
	@@ -520,12 +608,18 @@
520	unsigned int nV1,
521	unsigned int nV2
522	){
523	int mx = nPivot;
524	int i;





525	if( nV1>mx ) mx = nV1;
526	if( nV2>mx ) mx = nV2;

527	for(i=0; i<mx; i++){
528	if( i<nPivot ){
529	tclLineOfText(p->pOut, p->pPivot);
530	}else{
531	blob_append_char(p->pOut, '.');
	@@ -540,12 +634,17 @@
540	if( i<nV2 ){
541	tclLineOfText(p->pOut, p->pV2);
542	}else{
543	blob_append_char(p->pOut, '.');
544	}
545	blob_append(p->pOut, " X\n", 3);




546	}

547	p->lnPivot += nPivot;
548	p->lnV1 += nV1;
549	p->lnV2 += nV2;
550	}
551	void mergebuilder_init_tcl(MergeBuilder *p){
552

	--- src/merge3.c
	+++ src/merge3.c
	@@ -261,10 +261,98 @@
261	p->xChngBoth = dbgChngBoth;
262	p->xConflict = dbgConflict;
263	p->xEnd = dbgStartEnd;
264	p->xDestroy = dbgDestroy;
265	}
266
267	/*********************** MergeBuilderToken ******************************/
268	/* This version of MergeBuilder actually performs a merge on file that
269	** are broken up into tokens instead of lines, and puts the result in pOut.
270	*/
271	static void tokenSame(MergeBuilder *p, unsigned int N){
272	blob_append(p->pOut, p->pPivot->aData+p->pPivot->iCursor, N);
273	p->pPivot->iCursor += N;
274	p->pV1->iCursor += N;
275	p->pV2->iCursor += N;
276	}
277	static void tokenChngV1(MergeBuilder *p, unsigned int nPivot, unsigned nV1){
278	blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
279	p->pPivot->iCursor += nPivot;
280	p->pV1->iCursor += nV1;
281	p->pV2->iCursor += nPivot;
282	}
283	static void tokenChngV2(MergeBuilder *p, unsigned int nPivot, unsigned nV2){
284	blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV2);
285	p->pPivot->iCursor += nPivot;
286	p->pV1->iCursor += nPivot;
287	p->pV2->iCursor += nV2;
288	}
289	static void tokenChngBoth(MergeBuilder *p, unsigned int nPivot, unsigned nV){
290	blob_append(p->pOut, p->pV2->aData+p->pV2->iCursor, nV);
291	p->pPivot->iCursor += nPivot;
292	p->pV1->iCursor += nV;
293	p->pV2->iCursor += nV;
294	}
295	static void tokenConflict(
296	MergeBuilder *p,
297	unsigned int nPivot,
298	unsigned int nV1,
299	unsigned int nV2
300	){
301	blob_append(p->pOut, p->pV1->aData+p->pV1->iCursor, nV1);
302	p->pPivot->iCursor += nPivot;
303	p->pV1->iCursor += nV1;
304	p->pV2->iCursor += nV2;
305	}
306	static void mergebuilder_init_token(MergeBuilder *p){
307	mergebuilder_init(p);
308	p->xSame = tokenSame;
309	p->xChngV1 = tokenChngV1;
310	p->xChngV2 = tokenChngV2;
311	p->xChngBoth = tokenChngBoth;
312	p->xConflict = tokenConflict;
313	}
314
315	/*
316	** Attempt to do a low-level merge on a conflict. The conflict is
317	** described by the first four parameters, which are the same as the
318	** arguments to the xConflict method of the MergeBuilder object.
319	** This routine attempts to resolve the conflict by looking at
320	** elements of the conflict region that are finer grain than complete
321	** lines of text.
322	**
323	** The result is written into Blob pOut. pOut is initialized by this
324	** routine.
325	*/
326	int merge_try_to_resolve_conflict(
327	MergeBuilder pMB, / MergeBuilder that encounter conflict */
328	unsigned int nPivot, /* Lines of conflict in the pivot */
329	unsigned int nV1, /* Lines of conflict in V1 */
330	unsigned int nV2, /* Lines of conflict in V2 */
331	Blob pOut / Write resolution text here */
332	){
333	int nConflict;
334	MergeBuilder mb;
335	Blob pv, v1, v2;
336	mergebuilder_init_token(&mb);
337	blob_extract_lines(pMB->pPivot, nPivot, &pv);
338	blob_extract_lines(pMB->pV1, nV1, &v1);
339	blob_extract_lines(pMB->pV2, nV2, &v2);
340	blob_zero(pOut);
341	mb.pPivot = &pv;
342	mb.pV1 = &v1;
343	mb.pV2 = &v2;
344	mb.pOut = pOut;
345	nConflict = merge_three_blobs(&mb);
346	/* pv, v1, and v2 should all be ephemeral blobs, so they do not
347	** need to be freed. */
348	/* mb has not allocated any resources, so we do not need to invoke
349	** the xDestroy method. */
350	blob_add_final_newline(pOut);
351	return nConflict;
352	}
353
354
355	/*********************** MergeBuilderText ********************************/
356	/* This version of MergeBuilder actually performs a merge on file and puts
357	** the result in pOut
358	*/
	@@ -520,12 +608,18 @@
608	unsigned int nV1,
609	unsigned int nV2
610	){
611	int mx = nPivot;
612	int i;
613	int nOut;
614	Blob out;
615
616	merge_try_to_resolve_conflict(p, nPivot, nV1, nV2, &out);
617	nOut = blob_linecount(&out);
618	if( nV1>mx ) mx = nV1;
619	if( nV2>mx ) mx = nV2;
620	if( nOut>mx ) mx = nOut;
621	for(i=0; i<mx; i++){
622	if( i<nPivot ){
623	tclLineOfText(p->pOut, p->pPivot);
624	}else{
625	blob_append_char(p->pOut, '.');
	@@ -540,12 +634,17 @@
634	if( i<nV2 ){
635	tclLineOfText(p->pOut, p->pV2);
636	}else{
637	blob_append_char(p->pOut, '.');
638	}
639	if( i<nOut ){
640	tclLineOfText(p->pOut, &out);
641	}else{
642	blob_append(p->pOut, " X\n", 3);
643	}
644	}
645	blob_reset(&out);
646	p->lnPivot += nPivot;
647	p->lnV1 += nV1;
648	p->lnV2 += nV2;
649	}
650	void mergebuilder_init_tcl(MergeBuilder *p){
651

Fossil SCM

Keyboard Shortcuts